GDP's changes

bb773274 · mhby1g21 · 385bfe63 · bb773274 · bb773274 · bb773274
Commit bb773274 authored 11 months ago by mhby1g21
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
+{
+    "git.ignoreLimitWarning": true
+}
\ No newline at end of file
--- a/combine_img.py
+++ b/combine_img.py
+import numpy as np
+import imageio
+import os
+from ui import get_res
+
+#makes sure value of x stays within the range of min and max value to prevent out of bound values accses for the image
+def clamp(x, min_val, max_val):
+    return max(min_val, min(x, max_val))
+
+#clamps the floating point coordinates to nearest integer value, copies pixel value from the image to the nearest int coord
+def nearest_neighbour_interpolation(img, x, y):
+    h, w, _ = img.shape
+    x, y = clamp(int(x), 0, w-1), clamp(int(y), 0, h-1)
+    return img[y, x]
+
+
+# def orientation_to_face(x, y, z):
+#     abs_x, abs_y, abs_z = abs(x), abs(y), abs(z)
+#     if abs_x >= abs_y and abs_x >= abs_z:
+#         if x > 0:
+#             return 'front', -y / abs_x, -z / abs_x
+#         else:
+#             return 'back', y / abs_x, -z / abs_x
+#     elif abs_y >= abs_x and abs_y >= abs_z:
+#         if y > 0:
+#             return 'right', -x / abs_y, -z / abs_y
+#         else:
+#             return 'left', x / abs_y, -z / abs_y
+#     else:
+#         if z > 0:
+#             return 'top', x / abs_z, y / abs_z
+#         else:
+#             return 'bottom', -x / abs_z, y / abs_z
+
+#maps the 3d coords for cube faces to 2d coords on that cube face
+#finds out which cube face corresponds to the current pixel for which the 3d coords are calculated and calcs the normalised 2d coords on that face
+def orientation_to_face(x, y, z):
+    abs_x, abs_y, abs_z = abs(x), abs(y), abs(z)
+    if abs_x >= abs_y and abs_x >= abs_z:
+        if x > 0:
+            return 'frontrgb', -y / abs_x, -z / abs_x
+        else:
+            return 'backrgb', y / abs_x, -z / abs_x
+    elif abs_y >= abs_x and abs_y >= abs_z:
+        if y > 0:
+            return 'rightrgb', -x / abs_y, -z / abs_y
+        else:
+            return 'leftrgb', x / abs_y, -z / abs_y
+    else:
+        if z > 0:
+            return 'toprgb', x / abs_z, y / abs_z
+        else:
+            return 'bottomrgb', -x / abs_z, y / abs_z
+
+#converts cube maps into omnidirectional image
+def cubemap_to_omnidirectional(cube_faces, out_width, out_height):
+    omnidirectional = np.zeros((out_height, out_width, 3), dtype=np.uint8)
+    
+    #iterates through the pixels in o/p image, for each pixel calulates spherical coord used to map 2d pixel loc to 3d points on a sphere
+    #which are then converted to 3d cartesian coord to find which face of cube map current pixel corresponds to
+    for j in range(out_height):
+        theta = j / out_height * np.pi
+        for i in range(out_width):
+            phi = i / out_width * 2 * np.pi
+            x = np.sin(theta) * np.cos(phi)
+            y = np.sin(theta) * np.sin(phi)
+            z = np.cos(theta)
+            
+            face, xf, yf = orientation_to_face(x, y, z)
+            face_img = cube_faces[face] #holds what cubeface and the 2d point the 3d cord maps onto
+            face_size = face_img.shape[0]
+            
+            #converts 2d coords on cube face to pixel coords on 
+            u = (xf + 1) * face_size / 2
+            v = (yf + 1) * face_size / 2
+            
+            #get pixel value from the cubemap image and assign it to actual pixel in the omnidirectional image
+            omnidirectional[j, i] = nearest_neighbour_interpolation(face_img, u, v)
+    
+    return omnidirectional
+
+# if __name__ == "__main__":
+#     # Load the cubemap images
+#     cube_faces_dir = input("Enter the directory containing the cubemap images: ").strip()
+#     faces = ["right", "left", "top", "bottom", "front", "back"]
+#     cube_faces = {}
+    
+#     for face in faces:
+#         cube_faces[face] = imageio.imread(os.path.join(cube_faces_dir, f"{face}.jpg"))
+
+
+
+if __name__ == "__main__":
+    # Load the cubemap images
+    #cube_faces_dir = input("Enter the directory containing the cubemap images: ").strip()
+    cube_faces_dir = "C:\Project\AV-VR-Internship\material_recognition\Dynamic-Backward-Attention-Transformer\output\split_output"
+
+    #faces = ["right", "left", "top", "bottom", "front", "back"]
+    faces = ["rightrgb", "leftrgb", "toprgb", "bottomrgb", "frontrgb", "backrgb"]
+    cube_faces = {}
+    
+    for face in faces:
+        image_path = os.path.join(cube_faces_dir, f"{face}.png")
+        image_data = imageio.imread(image_path)
+        
+        #rotate top and bottom face by 90 deg
+        # if face in ["top", "bottom"]:
+        #     image_data = np.rot90(image_data, 1)
+        
+        # #flip the top, bottom, front and back faces in horizontal direction
+        # if face not in ["left", "right"]:
+        #     image_data = image_data[:, ::-1]
+
+        if face in ["toprgb", "bottomrgb"]:
+            image_data = np.rot90(image_data, 1)
+        
+        if face not in ["leftrgb", "rightrgb"]:
+            image_data = image_data[:, ::-1]
+        
+        
+        cube_faces[face] = image_data
+
+    
+    # output_width = int(input("Enter output omnidirectional width: "))
+    # output_height = int(input("Enter output omnidirectional height: "))
+    with open('path.txt', 'r') as file:
+        input_path = file.readline()
+        print(f'path = {input_path}')
+    os.remove('path.txt')
+    height, width = get_res(input_path)
+    print(height, width)
+    
+    output_width = width
+    output_height = height
+
+    #print(f"height: {height}, width: {width}")
+    omnidirectional_img = cubemap_to_omnidirectional(cube_faces, output_width, output_height)
+    
+    output_path = "C:\Project\AV-VR-Internship\edgenet360\Data\Input\material.png"
+    imageio.v2.imsave(output_path, omnidirectional_img)
+    print(f"Omnidirectional image saved to {output_path}")
--- a/environment.yml
+++ b/environment.yml
-name: cv
+name: material
 channels:
  - pytorch
  - conda-forge
@@ -7,7 +7,6 @@ dependencies:
  - cudatoolkit=11.1.1
  - numpy=1.22.3
  - pandas=1.4.2
-  - pillow=7.1.2
  - python=3.8.13
  - pytorch=1.8.1
  - scipy=1.7.3
@@ -17,4 +16,4 @@ dependencies:
    - pytorch-lightning==1.2.3
    - segmentation-models-pytorch==0.2.0
    - timm==0.4.12
-prefix: /home/usrname/miniconda3/envs/cv
+    - pillow==7.1.2
--- a/split_img - Copy.py
+++ b/split_img - Copy.py
+###### This code has been Referenced from: https://github.com/jaxry/panorama-to-cubemap/blob/gh-pages/convert.js
+
+
+import numpy as np
+import imageio
+import os
+from ui import select_image
+
+#makes sure value of x stays within the range of min and max value to prevent out of bound values accses for the image
+def clamp(x, min_val, max_val):
+    return max(min_val, min(x, max_val))
+
+#calculates modulus of x w.r.t n, so that ouput is always +ve
+def mod(x, n):
+    return ((x % n) + n) % n
+
+#clamps the floating point coordinates to nearest integer value, copies pixel value from the image to the nearest int coord
+def nearest_neigbour_interpolation(img, x, y):
+    h, w, _ = img.shape
+    x, y = clamp(int(x), 0, w-1), clamp(int(y), 0, h-1)
+    return img[y, x]
+
+#gives the 3d direction based on the face of the cube and x,y corrds on that face for a particular point
+def orient_face(face, x, y, out):
+    if face == 'front':
+        out[0], out[1], out[2] = 1, x, -y
+    elif face == 'back':
+        out[0], out[1], out[2] = -1, -x, -y
+    elif face == 'right':
+        out[0], out[1], out[2] = -x, 1, -y
+    elif face == 'left':
+        out[0], out[1], out[2] = x, -1, -y
+    elif face == 'top':
+        out[0], out[1], out[2] = -y, -x, 1
+    elif face == 'bottom':
+        out[0], out[1], out[2] = y, -x, -1
+
+#converts a eqirectangular image into cube faces, does 2d representation of one face of 3d map
+#maps 2d coords to 3d direction then uses it to calculate spherical coords
+#spherical coords are used to find/map corresponding 2d equirectanglar image coords
+def face_rendering(img, face, face_size):
+    out_face = np.zeros((face_size, face_size, 3), dtype=np.uint8)
+    for x in range(face_size):
+        for y in range(face_size):
+            out = [0, 0, 0]
+            orient_face(face, (2 * (x + 0.5) / face_size - 1), (2 * (y + 0.5) / face_size - 1), out)
+            r = np.sqrt(out[0]**2 + out[1]**2 + out[2]**2)
+            longitude = mod(np.arctan2(out[1], out[0]), 2 * np.pi)
+            latitude = np.arccos(out[2] / r)
+            s_x, s_y = img.shape[1] * longitude / (2 * np.pi) - 0.5, img.shape[0] * latitude / np.pi - 0.5
+            out_face[y, x] = nearest_neigbour_interpolation(img, s_x, s_y)
+    return out_face
+
+#generates 6 cube faces
+def generate_cube_faces(input_path, output_path="cube_faces_output"):
+    
+    img = imageio.imread(input_path)
+
+    face_size = 512  #each face o/p image will be 512x512
+    faces = ["right", "left", "top", "bottom", "front", "back"]
+    
+    results = {}
+    for face in faces:
+        results[face] = face_rendering(img, face, face_size)
+        face_output_path = os.path.join(output_path, f"{face}.png")
+        imageio.imsave(face_output_path, results[face])
+        print(f"Saved {face} face to {face_output_path}")
+    
+    
+
+
+# if __name__ == "__main__":
+#     input_path = select_image()
+#     height, width = get_res(input_path)
+#     print("width: , Height:", width, height)
+#     #output_path = input("Enter output directory: ").strip()
+#     #if not output_path:
+#     output_path = "C:\Project\AV-VR\material_recognition\Dynamic-Backward-Attention-Transformer\split_output"
+#     if not os.path.exists(output_path):
+#         os.makedirs(output_path)
+#     generate_cube_faces(input_path, output_path)
+        
+
+input_path = select_image()
+with open('path.txt', 'w') as file:
+    file.write(input_path)
+    file.close()
+
+#output_path = input("Enter output directory: ").strip()
+#if not output_path:
+output_path = "C:\Project\AV-VR\material_recognition\Dynamic-Backward-Attention-Transformer\split_output"
+if not os.path.exists(output_path):
+    os.makedirs(output_path)
+generate_cube_faces(input_path, output_path)
+
+
+
+
+
+
+
+
--- a/split_img.py
+++ b/split_img.py
+###### This code has been Referenced from: https://github.com/jaxry/panorama-to-cubemap/blob/gh-pages/convert.js
+
+
+import numpy as np
+import imageio
+import os
+import sys
+from ui import select_image
+
+#makes sure value of x stays within the range of min and max value to prevent out of bound values accses for the image
+def clamp(x, min_val, max_val):
+    return max(min_val, min(x, max_val))
+
+#calculates modulus of x w.r.t n, so that ouput is always +ve
+def mod(x, n):
+    return ((x % n) + n) % n
+
+#clamps the floating point coordinates to nearest integer value, copies pixel value from the image to the nearest int coord
+def nearest_neigbour_interpolation(img, x, y):
+    h, w, _ = img.shape
+    x, y = clamp(int(x), 0, w-1), clamp(int(y), 0, h-1)
+    return img[y, x]
+
+#gives the 3d direction based on the face of the cube and x,y corrds on that face for a particular point
+def orient_face(face, x, y, out):
+    if face == 'front':
+        out[0], out[1], out[2] = 1, x, -y
+    elif face == 'back':
+        out[0], out[1], out[2] = -1, -x, -y
+    elif face == 'right':
+        out[0], out[1], out[2] = -x, 1, -y
+    elif face == 'left':
+        out[0], out[1], out[2] = x, -1, -y
+    elif face == 'top':
+        out[0], out[1], out[2] = -y, -x, 1
+    elif face == 'bottom':
+        out[0], out[1], out[2] = y, -x, -1
+
+#converts a omnidirectional image into cube faces, does 2d representation of one face of 3d map
+#maps 2d coords to 3d direction then uses it to calculate spherical coords
+#spherical coords are used to find/map corresponding 2d omnidirectional image coords
+def face_rendering(img, face, face_size):
+    out_face = np.zeros((face_size, face_size, 3), dtype=np.uint8)
+    for x in range(face_size):
+        for y in range(face_size):
+            out = [0, 0, 0]
+            orient_face(face, (2 * (x + 0.5) / face_size - 1), (2 * (y + 0.5) / face_size - 1), out)
+            r = np.sqrt(out[0]**2 + out[1]**2 + out[2]**2)
+            longitude = mod(np.arctan2(out[1], out[0]), 2 * np.pi)
+            latitude = np.arccos(out[2] / r)
+            s_x, s_y = img.shape[1] * longitude / (2 * np.pi) - 0.5, img.shape[0] * latitude / np.pi - 0.5
+            out_face[y, x] = nearest_neigbour_interpolation(img, s_x, s_y)
+    return out_face
+
+#generates 6 cube faces
+def generate_cube_faces(input_path, output_path="cube_faces_output"):
+    
+    img = imageio.imread(input_path)
+
+    face_size = 512  #each face o/p image will be 512x512
+    faces = ["right", "left", "top", "bottom", "front", "back"]
+    
+    results = {}
+    for face in faces:
+        results[face] = face_rendering(img, face, face_size)
+        face_output_path = os.path.join(output_path, f"{face}.png")
+        imageio.imsave(face_output_path, results[face])
+        print(f"Saved {face} face to {face_output_path}")
+    
+    
+
+
+# if __name__ == "__main__":
+#     input_path = select_image()
+#     height, width = get_res(input_path)
+#     print("width: , Height:", width, height)
+#     #output_path = input("Enter output directory: ").strip()
+#     #if not output_path:
+#     output_path = "C:\Project\AV-VR-Internship\material_recognition\Dynamic-Backward-Attention-Transformer\split_output"
+#     if not os.path.exists(output_path):
+#         os.makedirs(output_path)
+#     generate_cube_faces(input_path, output_path)
+        
+
+#input_path = select_image()
+input_path = sys.argv[1]
+with open('path.txt', 'w') as file:
+    file.write(input_path)
+    file.close()
+
+#output_path = input("Enter output directory: ").strip()
+#if not output_path:
+output_path = "C:\Project\AV-VR-Internship\material_recognition\Dynamic-Backward-Attention-Transformer\split_output"
+if not os.path.exists(output_path):
+    os.makedirs(output_path)
+generate_cube_faces(input_path, output_path)
+
+
+
+
+
+
+
+
--- a/torchtools/experiments/sota_segmenter.py
+++ b/torchtools/experiments/sota_segmenter.py
@@ -46,7 +46,7 @@ def segment2rgb(output, mask=False, ops=False):
    if ops:
        pass
    else:
-        color_plate = {0: [119, 17, 17], 1: [202, 198, 144], 2: [186, 200, 238], 3: [124, 143, 166], 4: [89, 125, 49],
+        color_plate = {0: [119, 17, 17], 1: [202, 198, 144], 2: [186, 200, 238], 3: [0, 0, 200], 4: [89, 125, 49],
                   5: [16, 68, 16], 6: [187, 129, 156], 7: [208, 206, 72], 8: [98, 39, 69], 9: [102, 102, 102],
                   10: [76, 74, 95], 11: [16, 16, 68], 12: [68, 65, 38], 13: [117, 214, 70], 14: [221, 67, 72],
                   15: [92, 133, 119]}
@@ -59,6 +59,41 @@ def segment2rgb(output, mask=False, ops=False):

    return rgbmask

+# from PIL import ImageDraw, ImageFont
+# from scipy.ndimage import label
+
+# def segment2rgb(output, mask=False, ops=False):
+#     if ops:
+#         pass
+#     else:
+#         color_plate = {0: [119, 17, 17], 1: [202, 198, 144], 2: [186, 200, 238], 3: [0, 0, 200], 4: [89, 125, 49],
+#                    5: [16, 68, 16], 6: [187, 129, 156], 7: [208, 206, 72], 8: [98, 39, 69], 9: [102, 102, 102],
+#                    10: [76, 74, 95], 11: [16, 16, 68], 12: [68, 65, 38], 13: [117, 214, 70], 14: [221, 67, 72],
+#                    15: [92, 133, 119]}
+#         material_names = {0: 'Asphalt', 1: 'Ceramic', 2: 'Concrete', 3: 'Fabric', 4: 'Foliage', 5: 'Food',
+#                           6: 'Glass', 7: 'Metal', 8: 'Paper', 9: 'Plaster', 10: 'Plastic', 11: 'Rubber',
+#                           12: 'Soil', 13: 'Stone', 14: 'Water', 15: 'Wood'}
+#     if not mask:
+#         output = output.argmax(dim=1)
+#     output = output.squeeze().cpu()
+#     rgbmask = np.zeros([output.size()[0], output.size()[1], 3], dtype=np.uint8)
+#     for i in np.unique(output):
+#         rgbmask[output == i] = color_plate[i]
+#         # Convert numpy array to PIL Image
+#         rgbmask_pil = Image.fromarray(rgbmask)
+#         draw = ImageDraw.Draw(rgbmask_pil)
+#         # Use a truetype font
+#         font = ImageFont.truetype("arial.ttf", 15)
+#         # Find the center of the bounding box of the mask
+#         where = np.array(np.where(output == i))
+#         if where.shape[1] > 4000:  
+#             print(f"Material: {material_names[i]}")  # Print the material name
+#             center = where.mean(axis=1)
+#             draw.text(tuple(center), f"{material_names[i]}", font=font)
+#         # Convert PIL Image back to numpy array
+#         rgbmask = np.array(rgbmask_pil)
+
+#     return rgbmask

 def resize_img_tensors(merged_tensors, segments_tensor):
    assert len(merged_tensors) == len(segments_tensor), "number of images does not match with segments"

--- a/ui.py
+++ b/ui.py
+import tkinter as tk
+from tkinter import filedialog
+import cv2
+
+def select_image():
+    
+    root = tk.Tk()
+    root.withdraw() 
+    
+    # display dialog box to open image and get file path
+    filename = filedialog.askopenfilename()
+    print(f"Selected file: {filename}")
+    return filename
+
+#function to retrieve image resolution
+def get_res(path):
+    img = cv2.imread(path)
+    height, width, _ = img.shape
+    cv2.imwrite('C:\Project\AV-VR-Internship\edgenet360\Data\Input\\rgb.png', img)
+
+    return height, width
+
+# selected_image_path = select_image()
+# print("Selected Image Path:", selected_image_path)
+# print("resolution:", get_res(selected_image_path))
\ No newline at end of file