Merge branch 'main' of https://github.com/Muhammad-Hazimi-Yusri/360monodepth

aee403ca · mhby1g21 · fd61a5c1 · 5c053f22 · aee403ca · aee403ca
Commit aee403ca authored 1 year ago by mhby1g21
--- a/code/python/requirements.txt
+++ b/code/python/requirements.txt
@@ -7,4 +7,4 @@ opencv-python>=4.5.1.48
 torch>=1.8.1
 torchvision>=0.9.1
 wheel>=0.36.2
-timm>=0.4.12
+timm==0.6.7
\ No newline at end of file
--- a/code/python/src/main.py
+++ b/code/python/src/main.py
@@ -97,7 +97,7 @@ class Options():
        parser.add_argument("--padding", type=float, default="0.3")
        parser.add_argument("--multires_levels", type=int, default=1, help="Levels of multi-resolution pyramid. If > 1"
                                                                           "then --grid_size is the lowest resolution")
-        parser.add_argument("--persp_monodepth", type=str, default="midas2", choices=["midas2", "midas3", "boost"])
+        parser.add_argument("--persp_monodepth", type=str, default="midas2", choices=["midas2", "midas3", "boost", "zoedepth"])
        parser.add_argument('--depthalignstep', type=int, nargs='+', default=[1, 2, 3, 4])
        parser.add_argument("--rm_debug_folder", default=True, action='store_false')
        parser.add_argument("--intermediate_data", default=False, action='store_true', help="save intermediate data"
@@ -458,7 +458,10 @@ def monodepth_360(opt):
            line = line.splitlines()[0].split(" ")
            erp_image_filename = line[0]
+            if len(line) > 1:
                erp_gtdepth_filename = line[1] if line[1] != 'None' else ""
+            else:
+                erp_gtdepth_filename = ""
            if "matterport" in erp_image_filename:
                opt.dataset_matterport_hexagon_mask_enable = True

--- a/code/python/src/utility/cam_models.py
+++ b/code/python/src/utility/cam_models.py
@@ -316,11 +316,15 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_
    fisheye_3d_points = cam2world(fisheye_2d_points, fisheye_model) 
    # point3d2obj(fisheye_3d_points, "D:/1.obj")
+    fisheye_image = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
+    fisheye_image_weight = np.zeros((fisheye_image_height, fisheye_image_width), float)
    fisheye_image = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
    fisheye_image_weight = np.zeros((fisheye_image_height, fisheye_image_width), float)
    for index in range(0, len(image_data_list)):
        fisheye_image_weight_subimg = np.zeros((fisheye_image_height, fisheye_image_width), float)
        fisheye_image_subimage = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
+        fisheye_image_weight_subimg = np.zeros((fisheye_image_height, fisheye_image_width), float)
+        fisheye_image_subimage = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
        image_param = image_param_list[index]
        image_data = image_data_list[index]
@@ -342,6 +346,7 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_
        available_pixels_list_fov = np.logical_and(points_azimuth_inhfov, points_altitude_invfov)
        available_pixels_list_fov_mat = available_pixels_list_fov.reshape(fisheye_image_height, fisheye_image_width)
+        fisheye_2d_points_subimage = fisheye_2d_points[available_pixels_list_fov].astype(int)
        fisheye_2d_points_subimage = fisheye_2d_points[available_pixels_list_fov].astype(int)
        # projection to pin-hole image
@@ -430,6 +435,7 @@ def sample_rgb_image(img, model, fov=[60, 60], canvas_size=[400, 400], sample_gr
        # fetch_from = world2cam(world_cs.T, model)
        fetch_from = world2cam_slow(world_cs, model).T
        tangential_img = np.zeros(tuple(canvas_size) + (channel_number,), dtype=float)
+        tangential_img = np.zeros(tuple(canvas_size) + (channel_number,), dtype=float)
        for channel in range(0, channel_number):
            tangential_img[:, :, channel] = ndimage.map_coordinates(img[:, :, channel], [fetch_from[:, 1].reshape(canvas_size), fetch_from[:, 0].reshape(canvas_size)], order=1, mode='constant')
@@ -502,10 +508,12 @@ def sample_img(img, cam_model, fov=53, run_midas=False):
    equirect_size = (3, 1000, 2000)     # Size for equirectangular image
    equirect_3D_points, _ = equirect_cam2world(equirect_size[1:])
    equirect_3D_points_rgb = np.zeros((7, equirect_3D_points.shape[-1]), dtype=float)
+    equirect_3D_points_rgb = np.zeros((7, equirect_3D_points.shape[-1]), dtype=float)
    equirect_3D_points_rgb[0, :] = equirect_3D_points[0, :]
    equirect_3D_points_rgb[1, :] = equirect_3D_points[1, :]
    equirect_3D_points_rgb[2, :] = equirect_3D_points[2, :]
+    fisheye2equirec = np.zeros((3, equirect_3D_points.shape[-1]), dtype=float)
    fisheye2equirec = np.zeros((3, equirect_3D_points.shape[-1]), dtype=float)
    #   Lines 200-205 is for converting the whole fisheye to equirectangular
    #   Points at the back of the cylinder are mapped to nan
@@ -539,6 +547,7 @@ def sample_img(img, cam_model, fov=53, run_midas=False):
            #   Fetch RGB from fisheye image to assemble perspective subview
            fetch_from = world2cam(world_cs.T, cam_model).astype(int)
+            fetch_from = world2cam(world_cs.T, cam_model).astype(int)
            fetch_from[:, 0] = np.clip(fetch_from[:, 0], 0, width-1)
            fetch_from[:, 1] = np.clip(fetch_from[:, 1], 0, height-1)
            virtual2fisheye_idxs = np.dstack((fetch_from[:, 0].reshape(canvas_size), fetch_from[:, 1].reshape(canvas_size)))
@@ -770,6 +779,7 @@ def generate_camera_orientation(hfov_fisheye, vfov_fisheye, hfov_pinhole, vfov_p
    overlap_area_v = v_index[0] + vfov_pinhole / 2.0 - (v_index[1] - vfov_pinhole / 2.0)
    log.debug("the vertical overlap angle is {}".format(overlap_area_v))
+    z_rotation = np.zeros(x_rotation.shape, float)
    z_rotation = np.zeros(x_rotation.shape, float)
    xyz_rotation_array = np.stack((x_rotation, y_rotation, z_rotation), axis=0)
    xyz_rotation_array = xyz_rotation_array.reshape([3, horizontal_size * vertical_size])

--- a/code/python/src/utility/depthmap_utils.py
+++ b/code/python/src/utility/depthmap_utils.py
@@ -5,6 +5,9 @@ from skimage.transform import pyramid_gaussian
 from PIL import Image
 import numpy as np
+import torch
+from torchvision import transforms
+from torchvision.utils import make_grid
 from struct import unpack
 import os
@@ -170,6 +173,8 @@ def run_persp_monodepth(rgb_image_data_list, persp_monodepth, use_large_model=Tr
        return MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=use_large_model)
    if persp_monodepth == "boost":
        return boosting_monodepth(rgb_image_data_list)
+    if persp_monodepth == "zoedepth":
+        return zoedepth_monodepth(rgb_image_data_list)
 def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=True):
@@ -181,7 +186,6 @@ def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=T
    :param use_large_model: the MiDaS model type.
    :type use_large_model: bool, optional
    """
-    import torch
    # 1)initial PyTorch run-time environment
    if use_large_model:
@@ -247,7 +251,6 @@ def MiDaS_torch_hub_file(rgb_image_path, use_large_model=True):
    :type use_large_model: bool, optional
    """
    import cv2
-    import torch
    # import urllib.request
    # import matplotlib.pyplot as plt
@@ -297,7 +300,6 @@ def boosting_monodepth(rgb_image_data_list):
    # Load merge network
    import cv2
    import argparse
-    import torch
    import warnings
    warnings.simplefilter('ignore', np.RankWarning)
@@ -540,6 +542,39 @@ def boosting_monodepth(rgb_image_data_list):
    return depthmaps
+@torch.no_grad()
+def zoedepth_monodepth(rgb_image_data_list):
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
+    torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True)
+    tfoms = transforms.Compose([transforms.ToTensor()])
+    repo = "isl-org/ZoeDepth"
+    model_zoe = torch.hub.load(repo, "ZoeD_NK", pretrained=True)
+    model_zoe = model_zoe.to(device)
+    model_zoe.eval()
+    depthmaps = []
+    for img in rgb_image_data_list:
+        img_t = tfoms(img / 255.).unsqueeze(0).type(torch.float32).to(device)
+        out = model_zoe(img_t)['metric_depth']
+        out = torch.nn.functional.interpolate(
+            out,
+            size=img.shape[:2],
+            mode="nearest-exact",
+        ).squeeze(0)
+        if torch.any(out < 0):
+            log.warn("Negative depth value")
+            out = torch.clamp(out, min=1e-6)
+        depthmaps.append(out)
+    del model_zoe
+    # grid = make_grid(depthmaps, nrow=5)[0]
+    return [depth2disparity(d.squeeze().cpu().numpy()) for d in depthmaps]
 def read_dpt(dpt_file_path):
    """read depth map from *.dpt file.

--- a/code/python/src/utility/projection_icosahedron.py
+++ b/code/python/src/utility/projection_icosahedron.py
@@ -454,7 +454,7 @@ def ico2erp_image(tangent_images, erp_image_height, padding_size=0.0, blender_me
    # compute the final optical flow base on weight
    if blender_method == "mean":
-        # erp_flow_weight_mat = np.full(erp_flow_weight_mat.shape, erp_flow_weight_mat.max(), np.float) # debug
+        # erp_flow_weight_mat = np.full(erp_flow_weight_mat.shape, erp_flow_weight_mat.max(), float) # debug
        non_zero_weight_list = erp_weight_mat != 0
        if not np.all(non_zero_weight_list):
            log.warn("the optical flow weight matrix contain 0.")

--- a/code/python/src/utility/spherical_coordinates.py
+++ b/code/python/src/utility/spherical_coordinates.py
@@ -170,7 +170,7 @@ def sph2car(theta, phi, radius=1.0):
    :return: +x right, +y down, +z is froward, shape is [3, point_number]
    :rtype: numpy
    """
-    # points_cartesian_3d = np.array.zeros((theta.shape[0],3),np.float)
+    # points_cartesian_3d = np.array.zeros((theta.shape[0],3),float)
    x = radius * np.cos(phi) * np.sin(theta)
    z = radius * np.cos(phi) * np.cos(theta)
    y = -radius * np.sin(phi)

--- a/code/python/src/utility/subimage.py
+++ b/code/python/src/utility/subimage.py
@@ -136,6 +136,7 @@ def erp_ico_cam_intrparams(image_width, padding_size=0):
    # use tangent plane
    tangent_points_x_min = np.amin(np.array(triangle_points_tangent)[:, 0])
+    tangent_points_x_max = np.amax(np.array(triangle_points_tangent)[:, 0])
    tangent_points_y_min = np.amin(np.array(triangle_points_tangent)[:, 1])
    tangent_points_y_max = np.amax(np.array(triangle_points_tangent)[:, 1])
    fov_v = np.abs(np.arctan2(tangent_points_y_min, 1.0)) + np.abs(np.arctan2(tangent_points_y_max, 1.0))
@@ -144,27 +145,28 @@ def erp_ico_cam_intrparams(image_width, padding_size=0):
    log.debug("Pin-hole camera fov_h: {}, fov_v: {}".format(np.degrees(fov_h), np.degrees(fov_v)))
    # image aspect ratio, the triangle is equilateral triangle
-    image_height = 0.5 * image_width / np.tan(np.radians(30.0))
+    image_height = image_width  # 0.5 * image_width / np.tan(np.radians(30.0))
-    fx = 0.5 * image_width / np.tan(fov_h * 0.5)
+    fx = image_width / np.abs(tangent_points_x_max - tangent_points_x_min)  # 0.5 * image_width / np.tan(fov_h * 0.5)
-    fy = 0.5 * image_height / np.tan(fov_v * 0.5)
+    fy = fx  # 0.5 * image_height / np.tan(fov_v * 0.5)
-    cx = (image_width - 1) / 2.0
+    cx = image_width / 2.0
+    cy = image_height / 2.0
    # invert and upright triangle cy
-    cy_invert = 0.5 * (image_width - 1.0) * np.tan(np.radians(30.0)) + 10.0
+    # cy_invert = 0.5 * (image_width - 1.0) * np.tan(np.radians(30.0)) + 10.0
-    cy_up = 0.5 * (image_width - 1.0) / np.sin(np.radians(60.0)) + 10.0
+    # cy_up = 0.5 * (image_width - 1.0) / np.sin(np.radians(60.0)) + 10.0
    subimage_cam_param_list = []
    for index in range(0, 20):
        # intrinsic parameters
-        cy = None
+        # cy = None
-        if 0 <= index <= 4:
+        # if 0 <= index <= 4:
-            cy = cy_up
+        #     cy = cy_up
-        elif 5 <= index <= 9:
+        # elif 5 <= index <= 9:
-            cy = cy_invert
+        #     cy = cy_invert
-        elif 10 <= index <= 14:
+        # elif 10 <= index <= 14:
-            cy = cy_up
+        #     cy = cy_up
-        else:
+        # else:
-            cy = cy_invert
+        #     cy = cy_invert
        intrinsic_matrix = np.array([[fx, 0, cx],
                                     [0, fy, cy],