diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000000000000000000000000000000000000..3b664107303df336bab8010caad42ddaed24550e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "git.ignoreLimitWarning": true +} \ No newline at end of file diff --git a/combine_img.py b/combine_img.py new file mode 100644 index 0000000000000000000000000000000000000000..7cf359fc6ffd58a31641ffb415651824b62d5261 --- /dev/null +++ b/combine_img.py @@ -0,0 +1,141 @@ +import numpy as np +import imageio +import os +from ui import get_res + +#makes sure value of x stays within the range of min and max value to prevent out of bound values accses for the image +def clamp(x, min_val, max_val): + return max(min_val, min(x, max_val)) + +#clamps the floating point coordinates to nearest integer value, copies pixel value from the image to the nearest int coord +def nearest_neighbour_interpolation(img, x, y): + h, w, _ = img.shape + x, y = clamp(int(x), 0, w-1), clamp(int(y), 0, h-1) + return img[y, x] + + +# def orientation_to_face(x, y, z): +# abs_x, abs_y, abs_z = abs(x), abs(y), abs(z) +# if abs_x >= abs_y and abs_x >= abs_z: +# if x > 0: +# return 'front', -y / abs_x, -z / abs_x +# else: +# return 'back', y / abs_x, -z / abs_x +# elif abs_y >= abs_x and abs_y >= abs_z: +# if y > 0: +# return 'right', -x / abs_y, -z / abs_y +# else: +# return 'left', x / abs_y, -z / abs_y +# else: +# if z > 0: +# return 'top', x / abs_z, y / abs_z +# else: +# return 'bottom', -x / abs_z, y / abs_z + +#maps the 3d coords for cube faces to 2d coords on that cube face +#finds out which cube face corresponds to the current pixel for which the 3d coords are calculated and calcs the normalised 2d coords on that face +def orientation_to_face(x, y, z): + abs_x, abs_y, abs_z = abs(x), abs(y), abs(z) + if abs_x >= abs_y and abs_x >= abs_z: + if x > 0: + return 'frontrgb', -y / abs_x, -z / abs_x + else: + return 'backrgb', y / abs_x, -z / abs_x + elif abs_y >= abs_x and abs_y >= abs_z: + if y > 0: + return 'rightrgb', -x / abs_y, -z / abs_y + else: + return 'leftrgb', x / abs_y, -z / abs_y + else: + if z > 0: + return 'toprgb', x / abs_z, y / abs_z + else: + return 'bottomrgb', -x / abs_z, y / abs_z + +#converts cube maps into omnidirectional image +def cubemap_to_omnidirectional(cube_faces, out_width, out_height): + omnidirectional = np.zeros((out_height, out_width, 3), dtype=np.uint8) + + #iterates through the pixels in o/p image, for each pixel calulates spherical coord used to map 2d pixel loc to 3d points on a sphere + #which are then converted to 3d cartesian coord to find which face of cube map current pixel corresponds to + for j in range(out_height): + theta = j / out_height * np.pi + for i in range(out_width): + phi = i / out_width * 2 * np.pi + x = np.sin(theta) * np.cos(phi) + y = np.sin(theta) * np.sin(phi) + z = np.cos(theta) + + face, xf, yf = orientation_to_face(x, y, z) + face_img = cube_faces[face] #holds what cubeface and the 2d point the 3d cord maps onto + face_size = face_img.shape[0] + + #converts 2d coords on cube face to pixel coords on + u = (xf + 1) * face_size / 2 + v = (yf + 1) * face_size / 2 + + #get pixel value from the cubemap image and assign it to actual pixel in the omnidirectional image + omnidirectional[j, i] = nearest_neighbour_interpolation(face_img, u, v) + + return omnidirectional + +# if __name__ == "__main__": +# # Load the cubemap images +# cube_faces_dir = input("Enter the directory containing the cubemap images: ").strip() +# faces = ["right", "left", "top", "bottom", "front", "back"] +# cube_faces = {} + +# for face in faces: +# cube_faces[face] = imageio.imread(os.path.join(cube_faces_dir, f"{face}.jpg")) + + + +if __name__ == "__main__": + # Load the cubemap images + #cube_faces_dir = input("Enter the directory containing the cubemap images: ").strip() + cube_faces_dir = "C:\Project\AV-VR-Internship\material_recognition\Dynamic-Backward-Attention-Transformer\output\split_output" + + #faces = ["right", "left", "top", "bottom", "front", "back"] + faces = ["rightrgb", "leftrgb", "toprgb", "bottomrgb", "frontrgb", "backrgb"] + cube_faces = {} + + for face in faces: + image_path = os.path.join(cube_faces_dir, f"{face}.png") + image_data = imageio.imread(image_path) + + #rotate top and bottom face by 90 deg + # if face in ["top", "bottom"]: + # image_data = np.rot90(image_data, 1) + + # #flip the top, bottom, front and back faces in horizontal direction + # if face not in ["left", "right"]: + # image_data = image_data[:, ::-1] + + if face in ["toprgb", "bottomrgb"]: + image_data = np.rot90(image_data, 1) + + if face not in ["leftrgb", "rightrgb"]: + image_data = image_data[:, ::-1] + + + cube_faces[face] = image_data + + + # output_width = int(input("Enter output omnidirectional width: ")) + # output_height = int(input("Enter output omnidirectional height: ")) + with open('path.txt', 'r') as file: + input_path = file.readline() + print(f'path = {input_path}') + os.remove('path.txt') + height, width = get_res(input_path) + print(height, width) + + output_width = width + output_height = height + + #print(f"height: {height}, width: {width}") + omnidirectional_img = cubemap_to_omnidirectional(cube_faces, output_width, output_height) + + output_path = "C:\Project\AV-VR-Internship\edgenet360\Data\Input\material.png" + imageio.v2.imsave(output_path, omnidirectional_img) + print(f"Omnidirectional image saved to {output_path}") diff --git a/environment.yml b/environment.yml index 687044910b1aa4eae8fa561cd519dc1e87c965c8..b053658940bb45ed6a78f8197727ff0d44bccf4b 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: cv +name: material channels: - pytorch - conda-forge @@ -7,7 +7,6 @@ dependencies: - cudatoolkit=11.1.1 - numpy=1.22.3 - pandas=1.4.2 - - pillow=7.1.2 - python=3.8.13 - pytorch=1.8.1 - scipy=1.7.3 @@ -17,4 +16,4 @@ dependencies: - pytorch-lightning==1.2.3 - segmentation-models-pytorch==0.2.0 - timm==0.4.12 -prefix: /home/usrname/miniconda3/envs/cv + - pillow==7.1.2 diff --git a/split_img - Copy.py b/split_img - Copy.py new file mode 100644 index 0000000000000000000000000000000000000000..056ae5cb9d29694195fa8fddac69322a7e88c7b8 --- /dev/null +++ b/split_img - Copy.py @@ -0,0 +1,102 @@ +###### This code has been Referenced from: https://github.com/jaxry/panorama-to-cubemap/blob/gh-pages/convert.js + + +import numpy as np +import imageio +import os +from ui import select_image + +#makes sure value of x stays within the range of min and max value to prevent out of bound values accses for the image +def clamp(x, min_val, max_val): + return max(min_val, min(x, max_val)) + +#calculates modulus of x w.r.t n, so that ouput is always +ve +def mod(x, n): + return ((x % n) + n) % n + +#clamps the floating point coordinates to nearest integer value, copies pixel value from the image to the nearest int coord +def nearest_neigbour_interpolation(img, x, y): + h, w, _ = img.shape + x, y = clamp(int(x), 0, w-1), clamp(int(y), 0, h-1) + return img[y, x] + +#gives the 3d direction based on the face of the cube and x,y corrds on that face for a particular point +def orient_face(face, x, y, out): + if face == 'front': + out[0], out[1], out[2] = 1, x, -y + elif face == 'back': + out[0], out[1], out[2] = -1, -x, -y + elif face == 'right': + out[0], out[1], out[2] = -x, 1, -y + elif face == 'left': + out[0], out[1], out[2] = x, -1, -y + elif face == 'top': + out[0], out[1], out[2] = -y, -x, 1 + elif face == 'bottom': + out[0], out[1], out[2] = y, -x, -1 + +#converts a eqirectangular image into cube faces, does 2d representation of one face of 3d map +#maps 2d coords to 3d direction then uses it to calculate spherical coords +#spherical coords are used to find/map corresponding 2d equirectanglar image coords +def face_rendering(img, face, face_size): + out_face = np.zeros((face_size, face_size, 3), dtype=np.uint8) + for x in range(face_size): + for y in range(face_size): + out = [0, 0, 0] + orient_face(face, (2 * (x + 0.5) / face_size - 1), (2 * (y + 0.5) / face_size - 1), out) + r = np.sqrt(out[0]**2 + out[1]**2 + out[2]**2) + longitude = mod(np.arctan2(out[1], out[0]), 2 * np.pi) + latitude = np.arccos(out[2] / r) + s_x, s_y = img.shape[1] * longitude / (2 * np.pi) - 0.5, img.shape[0] * latitude / np.pi - 0.5 + out_face[y, x] = nearest_neigbour_interpolation(img, s_x, s_y) + return out_face + +#generates 6 cube faces +def generate_cube_faces(input_path, output_path="cube_faces_output"): + + img = imageio.imread(input_path) + + face_size = 512 #each face o/p image will be 512x512 + faces = ["right", "left", "top", "bottom", "front", "back"] + + results = {} + for face in faces: + results[face] = face_rendering(img, face, face_size) + face_output_path = os.path.join(output_path, f"{face}.png") + imageio.imsave(face_output_path, results[face]) + print(f"Saved {face} face to {face_output_path}") + + + + +# if __name__ == "__main__": +# input_path = select_image() +# height, width = get_res(input_path) +# print("width: , Height:", width, height) +# #output_path = input("Enter output directory: ").strip() +# #if not output_path: +# output_path = "C:\Project\AV-VR\material_recognition\Dynamic-Backward-Attention-Transformer\split_output" +# if not os.path.exists(output_path): +# os.makedirs(output_path) +# generate_cube_faces(input_path, output_path) + + +input_path = select_image() +with open('path.txt', 'w') as file: + file.write(input_path) + file.close() + +#output_path = input("Enter output directory: ").strip() +#if not output_path: +output_path = "C:\Project\AV-VR\material_recognition\Dynamic-Backward-Attention-Transformer\split_output" +if not os.path.exists(output_path): + os.makedirs(output_path) +generate_cube_faces(input_path, output_path) + + + + + + + + diff --git a/split_img.py b/split_img.py new file mode 100644 index 0000000000000000000000000000000000000000..c11379e44ae283eb890a72fca3297145636ab933 --- /dev/null +++ b/split_img.py @@ -0,0 +1,104 @@ +###### This code has been Referenced from: https://github.com/jaxry/panorama-to-cubemap/blob/gh-pages/convert.js + + +import numpy as np +import imageio +import os +import sys +from ui import select_image + +#makes sure value of x stays within the range of min and max value to prevent out of bound values accses for the image +def clamp(x, min_val, max_val): + return max(min_val, min(x, max_val)) + +#calculates modulus of x w.r.t n, so that ouput is always +ve +def mod(x, n): + return ((x % n) + n) % n + +#clamps the floating point coordinates to nearest integer value, copies pixel value from the image to the nearest int coord +def nearest_neigbour_interpolation(img, x, y): + h, w, _ = img.shape + x, y = clamp(int(x), 0, w-1), clamp(int(y), 0, h-1) + return img[y, x] + +#gives the 3d direction based on the face of the cube and x,y corrds on that face for a particular point +def orient_face(face, x, y, out): + if face == 'front': + out[0], out[1], out[2] = 1, x, -y + elif face == 'back': + out[0], out[1], out[2] = -1, -x, -y + elif face == 'right': + out[0], out[1], out[2] = -x, 1, -y + elif face == 'left': + out[0], out[1], out[2] = x, -1, -y + elif face == 'top': + out[0], out[1], out[2] = -y, -x, 1 + elif face == 'bottom': + out[0], out[1], out[2] = y, -x, -1 + +#converts a omnidirectional image into cube faces, does 2d representation of one face of 3d map +#maps 2d coords to 3d direction then uses it to calculate spherical coords +#spherical coords are used to find/map corresponding 2d omnidirectional image coords +def face_rendering(img, face, face_size): + out_face = np.zeros((face_size, face_size, 3), dtype=np.uint8) + for x in range(face_size): + for y in range(face_size): + out = [0, 0, 0] + orient_face(face, (2 * (x + 0.5) / face_size - 1), (2 * (y + 0.5) / face_size - 1), out) + r = np.sqrt(out[0]**2 + out[1]**2 + out[2]**2) + longitude = mod(np.arctan2(out[1], out[0]), 2 * np.pi) + latitude = np.arccos(out[2] / r) + s_x, s_y = img.shape[1] * longitude / (2 * np.pi) - 0.5, img.shape[0] * latitude / np.pi - 0.5 + out_face[y, x] = nearest_neigbour_interpolation(img, s_x, s_y) + return out_face + +#generates 6 cube faces +def generate_cube_faces(input_path, output_path="cube_faces_output"): + + img = imageio.imread(input_path) + + face_size = 512 #each face o/p image will be 512x512 + faces = ["right", "left", "top", "bottom", "front", "back"] + + results = {} + for face in faces: + results[face] = face_rendering(img, face, face_size) + face_output_path = os.path.join(output_path, f"{face}.png") + imageio.imsave(face_output_path, results[face]) + print(f"Saved {face} face to {face_output_path}") + + + + +# if __name__ == "__main__": +# input_path = select_image() +# height, width = get_res(input_path) +# print("width: , Height:", width, height) +# #output_path = input("Enter output directory: ").strip() +# #if not output_path: +# output_path = "C:\Project\AV-VR-Internship\material_recognition\Dynamic-Backward-Attention-Transformer\split_output" +# if not os.path.exists(output_path): +# os.makedirs(output_path) +# generate_cube_faces(input_path, output_path) + + +#input_path = select_image() +input_path = sys.argv[1] +with open('path.txt', 'w') as file: + file.write(input_path) + file.close() + +#output_path = input("Enter output directory: ").strip() +#if not output_path: +output_path = "C:\Project\AV-VR-Internship\material_recognition\Dynamic-Backward-Attention-Transformer\split_output" +if not os.path.exists(output_path): + os.makedirs(output_path) +generate_cube_faces(input_path, output_path) + + + + + + + + diff --git a/torchtools/experiments/sota_segmenter.py b/torchtools/experiments/sota_segmenter.py index c4b46ff828ad9a70e2e69610a5770fb2446ec8c9..5ac109807f8dc776cf6b2777ee4f72c2b7c19aec 100644 --- a/torchtools/experiments/sota_segmenter.py +++ b/torchtools/experiments/sota_segmenter.py @@ -46,7 +46,7 @@ def segment2rgb(output, mask=False, ops=False): if ops: pass else: - color_plate = {0: [119, 17, 17], 1: [202, 198, 144], 2: [186, 200, 238], 3: [124, 143, 166], 4: [89, 125, 49], + color_plate = {0: [119, 17, 17], 1: [202, 198, 144], 2: [186, 200, 238], 3: [0, 0, 200], 4: [89, 125, 49], 5: [16, 68, 16], 6: [187, 129, 156], 7: [208, 206, 72], 8: [98, 39, 69], 9: [102, 102, 102], 10: [76, 74, 95], 11: [16, 16, 68], 12: [68, 65, 38], 13: [117, 214, 70], 14: [221, 67, 72], 15: [92, 133, 119]} @@ -59,6 +59,41 @@ def segment2rgb(output, mask=False, ops=False): return rgbmask +# from PIL import ImageDraw, ImageFont +# from scipy.ndimage import label + +# def segment2rgb(output, mask=False, ops=False): +# if ops: +# pass +# else: +# color_plate = {0: [119, 17, 17], 1: [202, 198, 144], 2: [186, 200, 238], 3: [0, 0, 200], 4: [89, 125, 49], +# 5: [16, 68, 16], 6: [187, 129, 156], 7: [208, 206, 72], 8: [98, 39, 69], 9: [102, 102, 102], +# 10: [76, 74, 95], 11: [16, 16, 68], 12: [68, 65, 38], 13: [117, 214, 70], 14: [221, 67, 72], +# 15: [92, 133, 119]} +# material_names = {0: 'Asphalt', 1: 'Ceramic', 2: 'Concrete', 3: 'Fabric', 4: 'Foliage', 5: 'Food', +# 6: 'Glass', 7: 'Metal', 8: 'Paper', 9: 'Plaster', 10: 'Plastic', 11: 'Rubber', +# 12: 'Soil', 13: 'Stone', 14: 'Water', 15: 'Wood'} +# if not mask: +# output = output.argmax(dim=1) +# output = output.squeeze().cpu() +# rgbmask = np.zeros([output.size()[0], output.size()[1], 3], dtype=np.uint8) +# for i in np.unique(output): +# rgbmask[output == i] = color_plate[i] +# # Convert numpy array to PIL Image +# rgbmask_pil = Image.fromarray(rgbmask) +# draw = ImageDraw.Draw(rgbmask_pil) +# # Use a truetype font +# font = ImageFont.truetype("arial.ttf", 15) +# # Find the center of the bounding box of the mask +# where = np.array(np.where(output == i)) +# if where.shape[1] > 4000: +# print(f"Material: {material_names[i]}") # Print the material name +# center = where.mean(axis=1) +# draw.text(tuple(center), f"{material_names[i]}", font=font) +# # Convert PIL Image back to numpy array +# rgbmask = np.array(rgbmask_pil) + +# return rgbmask def resize_img_tensors(merged_tensors, segments_tensor): assert len(merged_tensors) == len(segments_tensor), "number of images does not match with segments" diff --git a/ui.py b/ui.py new file mode 100644 index 0000000000000000000000000000000000000000..99a1d8a16026cb379c8783afd0af8c374644e82b --- /dev/null +++ b/ui.py @@ -0,0 +1,25 @@ +import tkinter as tk +from tkinter import filedialog +import cv2 + +def select_image(): + + root = tk.Tk() + root.withdraw() + + # display dialog box to open image and get file path + filename = filedialog.askopenfilename() + print(f"Selected file: {filename}") + return filename + +#function to retrieve image resolution +def get_res(path): + img = cv2.imread(path) + height, width, _ = img.shape + cv2.imwrite('C:\Project\AV-VR-Internship\edgenet360\Data\Input\\rgb.png', img) + + return height, width + +# selected_image_path = select_image() +# print("Selected Image Path:", selected_image_path) +# print("resolution:", get_res(selected_image_path)) \ No newline at end of file