Skip to content
Snippets Groups Projects
Commit aee403ca authored by mhby1g21's avatar mhby1g21
Browse files
parents fd61a5c1 5c053f22
Branches
No related tags found
No related merge requests found
...@@ -7,4 +7,4 @@ opencv-python>=4.5.1.48 ...@@ -7,4 +7,4 @@ opencv-python>=4.5.1.48
torch>=1.8.1 torch>=1.8.1
torchvision>=0.9.1 torchvision>=0.9.1
wheel>=0.36.2 wheel>=0.36.2
timm>=0.4.12 timm==0.6.7
\ No newline at end of file \ No newline at end of file
...@@ -97,7 +97,7 @@ class Options(): ...@@ -97,7 +97,7 @@ class Options():
parser.add_argument("--padding", type=float, default="0.3") parser.add_argument("--padding", type=float, default="0.3")
parser.add_argument("--multires_levels", type=int, default=1, help="Levels of multi-resolution pyramid. If > 1" parser.add_argument("--multires_levels", type=int, default=1, help="Levels of multi-resolution pyramid. If > 1"
"then --grid_size is the lowest resolution") "then --grid_size is the lowest resolution")
parser.add_argument("--persp_monodepth", type=str, default="midas2", choices=["midas2", "midas3", "boost"]) parser.add_argument("--persp_monodepth", type=str, default="midas2", choices=["midas2", "midas3", "boost", "zoedepth"])
parser.add_argument('--depthalignstep', type=int, nargs='+', default=[1, 2, 3, 4]) parser.add_argument('--depthalignstep', type=int, nargs='+', default=[1, 2, 3, 4])
parser.add_argument("--rm_debug_folder", default=True, action='store_false') parser.add_argument("--rm_debug_folder", default=True, action='store_false')
parser.add_argument("--intermediate_data", default=False, action='store_true', help="save intermediate data" parser.add_argument("--intermediate_data", default=False, action='store_true', help="save intermediate data"
...@@ -458,7 +458,10 @@ def monodepth_360(opt): ...@@ -458,7 +458,10 @@ def monodepth_360(opt):
line = line.splitlines()[0].split(" ") line = line.splitlines()[0].split(" ")
erp_image_filename = line[0] erp_image_filename = line[0]
if len(line) > 1:
erp_gtdepth_filename = line[1] if line[1] != 'None' else "" erp_gtdepth_filename = line[1] if line[1] != 'None' else ""
else:
erp_gtdepth_filename = ""
if "matterport" in erp_image_filename: if "matterport" in erp_image_filename:
opt.dataset_matterport_hexagon_mask_enable = True opt.dataset_matterport_hexagon_mask_enable = True
......
...@@ -316,11 +316,15 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_ ...@@ -316,11 +316,15 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_
fisheye_3d_points = cam2world(fisheye_2d_points, fisheye_model) fisheye_3d_points = cam2world(fisheye_2d_points, fisheye_model)
# point3d2obj(fisheye_3d_points, "D:/1.obj") # point3d2obj(fisheye_3d_points, "D:/1.obj")
fisheye_image = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
fisheye_image_weight = np.zeros((fisheye_image_height, fisheye_image_width), float)
fisheye_image = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float) fisheye_image = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
fisheye_image_weight = np.zeros((fisheye_image_height, fisheye_image_width), float) fisheye_image_weight = np.zeros((fisheye_image_height, fisheye_image_width), float)
for index in range(0, len(image_data_list)): for index in range(0, len(image_data_list)):
fisheye_image_weight_subimg = np.zeros((fisheye_image_height, fisheye_image_width), float) fisheye_image_weight_subimg = np.zeros((fisheye_image_height, fisheye_image_width), float)
fisheye_image_subimage = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float) fisheye_image_subimage = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
fisheye_image_weight_subimg = np.zeros((fisheye_image_height, fisheye_image_width), float)
fisheye_image_subimage = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
image_param = image_param_list[index] image_param = image_param_list[index]
image_data = image_data_list[index] image_data = image_data_list[index]
...@@ -342,6 +346,7 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_ ...@@ -342,6 +346,7 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_
available_pixels_list_fov = np.logical_and(points_azimuth_inhfov, points_altitude_invfov) available_pixels_list_fov = np.logical_and(points_azimuth_inhfov, points_altitude_invfov)
available_pixels_list_fov_mat = available_pixels_list_fov.reshape(fisheye_image_height, fisheye_image_width) available_pixels_list_fov_mat = available_pixels_list_fov.reshape(fisheye_image_height, fisheye_image_width)
fisheye_2d_points_subimage = fisheye_2d_points[available_pixels_list_fov].astype(int)
fisheye_2d_points_subimage = fisheye_2d_points[available_pixels_list_fov].astype(int) fisheye_2d_points_subimage = fisheye_2d_points[available_pixels_list_fov].astype(int)
# projection to pin-hole image # projection to pin-hole image
...@@ -430,6 +435,7 @@ def sample_rgb_image(img, model, fov=[60, 60], canvas_size=[400, 400], sample_gr ...@@ -430,6 +435,7 @@ def sample_rgb_image(img, model, fov=[60, 60], canvas_size=[400, 400], sample_gr
# fetch_from = world2cam(world_cs.T, model) # fetch_from = world2cam(world_cs.T, model)
fetch_from = world2cam_slow(world_cs, model).T fetch_from = world2cam_slow(world_cs, model).T
tangential_img = np.zeros(tuple(canvas_size) + (channel_number,), dtype=float) tangential_img = np.zeros(tuple(canvas_size) + (channel_number,), dtype=float)
tangential_img = np.zeros(tuple(canvas_size) + (channel_number,), dtype=float)
for channel in range(0, channel_number): for channel in range(0, channel_number):
tangential_img[:, :, channel] = ndimage.map_coordinates(img[:, :, channel], [fetch_from[:, 1].reshape(canvas_size), fetch_from[:, 0].reshape(canvas_size)], order=1, mode='constant') tangential_img[:, :, channel] = ndimage.map_coordinates(img[:, :, channel], [fetch_from[:, 1].reshape(canvas_size), fetch_from[:, 0].reshape(canvas_size)], order=1, mode='constant')
...@@ -502,10 +508,12 @@ def sample_img(img, cam_model, fov=53, run_midas=False): ...@@ -502,10 +508,12 @@ def sample_img(img, cam_model, fov=53, run_midas=False):
equirect_size = (3, 1000, 2000) # Size for equirectangular image equirect_size = (3, 1000, 2000) # Size for equirectangular image
equirect_3D_points, _ = equirect_cam2world(equirect_size[1:]) equirect_3D_points, _ = equirect_cam2world(equirect_size[1:])
equirect_3D_points_rgb = np.zeros((7, equirect_3D_points.shape[-1]), dtype=float) equirect_3D_points_rgb = np.zeros((7, equirect_3D_points.shape[-1]), dtype=float)
equirect_3D_points_rgb = np.zeros((7, equirect_3D_points.shape[-1]), dtype=float)
equirect_3D_points_rgb[0, :] = equirect_3D_points[0, :] equirect_3D_points_rgb[0, :] = equirect_3D_points[0, :]
equirect_3D_points_rgb[1, :] = equirect_3D_points[1, :] equirect_3D_points_rgb[1, :] = equirect_3D_points[1, :]
equirect_3D_points_rgb[2, :] = equirect_3D_points[2, :] equirect_3D_points_rgb[2, :] = equirect_3D_points[2, :]
fisheye2equirec = np.zeros((3, equirect_3D_points.shape[-1]), dtype=float)
fisheye2equirec = np.zeros((3, equirect_3D_points.shape[-1]), dtype=float) fisheye2equirec = np.zeros((3, equirect_3D_points.shape[-1]), dtype=float)
# Lines 200-205 is for converting the whole fisheye to equirectangular # Lines 200-205 is for converting the whole fisheye to equirectangular
# Points at the back of the cylinder are mapped to nan # Points at the back of the cylinder are mapped to nan
...@@ -539,6 +547,7 @@ def sample_img(img, cam_model, fov=53, run_midas=False): ...@@ -539,6 +547,7 @@ def sample_img(img, cam_model, fov=53, run_midas=False):
# Fetch RGB from fisheye image to assemble perspective subview # Fetch RGB from fisheye image to assemble perspective subview
fetch_from = world2cam(world_cs.T, cam_model).astype(int) fetch_from = world2cam(world_cs.T, cam_model).astype(int)
fetch_from = world2cam(world_cs.T, cam_model).astype(int)
fetch_from[:, 0] = np.clip(fetch_from[:, 0], 0, width-1) fetch_from[:, 0] = np.clip(fetch_from[:, 0], 0, width-1)
fetch_from[:, 1] = np.clip(fetch_from[:, 1], 0, height-1) fetch_from[:, 1] = np.clip(fetch_from[:, 1], 0, height-1)
virtual2fisheye_idxs = np.dstack((fetch_from[:, 0].reshape(canvas_size), fetch_from[:, 1].reshape(canvas_size))) virtual2fisheye_idxs = np.dstack((fetch_from[:, 0].reshape(canvas_size), fetch_from[:, 1].reshape(canvas_size)))
...@@ -770,6 +779,7 @@ def generate_camera_orientation(hfov_fisheye, vfov_fisheye, hfov_pinhole, vfov_p ...@@ -770,6 +779,7 @@ def generate_camera_orientation(hfov_fisheye, vfov_fisheye, hfov_pinhole, vfov_p
overlap_area_v = v_index[0] + vfov_pinhole / 2.0 - (v_index[1] - vfov_pinhole / 2.0) overlap_area_v = v_index[0] + vfov_pinhole / 2.0 - (v_index[1] - vfov_pinhole / 2.0)
log.debug("the vertical overlap angle is {}".format(overlap_area_v)) log.debug("the vertical overlap angle is {}".format(overlap_area_v))
z_rotation = np.zeros(x_rotation.shape, float)
z_rotation = np.zeros(x_rotation.shape, float) z_rotation = np.zeros(x_rotation.shape, float)
xyz_rotation_array = np.stack((x_rotation, y_rotation, z_rotation), axis=0) xyz_rotation_array = np.stack((x_rotation, y_rotation, z_rotation), axis=0)
xyz_rotation_array = xyz_rotation_array.reshape([3, horizontal_size * vertical_size]) xyz_rotation_array = xyz_rotation_array.reshape([3, horizontal_size * vertical_size])
......
...@@ -5,6 +5,9 @@ from skimage.transform import pyramid_gaussian ...@@ -5,6 +5,9 @@ from skimage.transform import pyramid_gaussian
from PIL import Image from PIL import Image
import numpy as np import numpy as np
import torch
from torchvision import transforms
from torchvision.utils import make_grid
from struct import unpack from struct import unpack
import os import os
...@@ -170,6 +173,8 @@ def run_persp_monodepth(rgb_image_data_list, persp_monodepth, use_large_model=Tr ...@@ -170,6 +173,8 @@ def run_persp_monodepth(rgb_image_data_list, persp_monodepth, use_large_model=Tr
return MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=use_large_model) return MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=use_large_model)
if persp_monodepth == "boost": if persp_monodepth == "boost":
return boosting_monodepth(rgb_image_data_list) return boosting_monodepth(rgb_image_data_list)
if persp_monodepth == "zoedepth":
return zoedepth_monodepth(rgb_image_data_list)
def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=True): def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=True):
...@@ -181,7 +186,6 @@ def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=T ...@@ -181,7 +186,6 @@ def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=T
:param use_large_model: the MiDaS model type. :param use_large_model: the MiDaS model type.
:type use_large_model: bool, optional :type use_large_model: bool, optional
""" """
import torch
# 1)initial PyTorch run-time environment # 1)initial PyTorch run-time environment
if use_large_model: if use_large_model:
...@@ -247,7 +251,6 @@ def MiDaS_torch_hub_file(rgb_image_path, use_large_model=True): ...@@ -247,7 +251,6 @@ def MiDaS_torch_hub_file(rgb_image_path, use_large_model=True):
:type use_large_model: bool, optional :type use_large_model: bool, optional
""" """
import cv2 import cv2
import torch
# import urllib.request # import urllib.request
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
...@@ -297,7 +300,6 @@ def boosting_monodepth(rgb_image_data_list): ...@@ -297,7 +300,6 @@ def boosting_monodepth(rgb_image_data_list):
# Load merge network # Load merge network
import cv2 import cv2
import argparse import argparse
import torch
import warnings import warnings
warnings.simplefilter('ignore', np.RankWarning) warnings.simplefilter('ignore', np.RankWarning)
...@@ -540,6 +542,39 @@ def boosting_monodepth(rgb_image_data_list): ...@@ -540,6 +542,39 @@ def boosting_monodepth(rgb_image_data_list):
return depthmaps return depthmaps
@torch.no_grad()
def zoedepth_monodepth(rgb_image_data_list):
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True)
tfoms = transforms.Compose([transforms.ToTensor()])
repo = "isl-org/ZoeDepth"
model_zoe = torch.hub.load(repo, "ZoeD_NK", pretrained=True)
model_zoe = model_zoe.to(device)
model_zoe.eval()
depthmaps = []
for img in rgb_image_data_list:
img_t = tfoms(img / 255.).unsqueeze(0).type(torch.float32).to(device)
out = model_zoe(img_t)['metric_depth']
out = torch.nn.functional.interpolate(
out,
size=img.shape[:2],
mode="nearest-exact",
).squeeze(0)
if torch.any(out < 0):
log.warn("Negative depth value")
out = torch.clamp(out, min=1e-6)
depthmaps.append(out)
del model_zoe
# grid = make_grid(depthmaps, nrow=5)[0]
return [depth2disparity(d.squeeze().cpu().numpy()) for d in depthmaps]
def read_dpt(dpt_file_path): def read_dpt(dpt_file_path):
"""read depth map from *.dpt file. """read depth map from *.dpt file.
......
...@@ -454,7 +454,7 @@ def ico2erp_image(tangent_images, erp_image_height, padding_size=0.0, blender_me ...@@ -454,7 +454,7 @@ def ico2erp_image(tangent_images, erp_image_height, padding_size=0.0, blender_me
# compute the final optical flow base on weight # compute the final optical flow base on weight
if blender_method == "mean": if blender_method == "mean":
# erp_flow_weight_mat = np.full(erp_flow_weight_mat.shape, erp_flow_weight_mat.max(), np.float) # debug # erp_flow_weight_mat = np.full(erp_flow_weight_mat.shape, erp_flow_weight_mat.max(), float) # debug
non_zero_weight_list = erp_weight_mat != 0 non_zero_weight_list = erp_weight_mat != 0
if not np.all(non_zero_weight_list): if not np.all(non_zero_weight_list):
log.warn("the optical flow weight matrix contain 0.") log.warn("the optical flow weight matrix contain 0.")
......
...@@ -170,7 +170,7 @@ def sph2car(theta, phi, radius=1.0): ...@@ -170,7 +170,7 @@ def sph2car(theta, phi, radius=1.0):
:return: +x right, +y down, +z is froward, shape is [3, point_number] :return: +x right, +y down, +z is froward, shape is [3, point_number]
:rtype: numpy :rtype: numpy
""" """
# points_cartesian_3d = np.array.zeros((theta.shape[0],3),np.float) # points_cartesian_3d = np.array.zeros((theta.shape[0],3),float)
x = radius * np.cos(phi) * np.sin(theta) x = radius * np.cos(phi) * np.sin(theta)
z = radius * np.cos(phi) * np.cos(theta) z = radius * np.cos(phi) * np.cos(theta)
y = -radius * np.sin(phi) y = -radius * np.sin(phi)
......
...@@ -136,6 +136,7 @@ def erp_ico_cam_intrparams(image_width, padding_size=0): ...@@ -136,6 +136,7 @@ def erp_ico_cam_intrparams(image_width, padding_size=0):
# use tangent plane # use tangent plane
tangent_points_x_min = np.amin(np.array(triangle_points_tangent)[:, 0]) tangent_points_x_min = np.amin(np.array(triangle_points_tangent)[:, 0])
tangent_points_x_max = np.amax(np.array(triangle_points_tangent)[:, 0])
tangent_points_y_min = np.amin(np.array(triangle_points_tangent)[:, 1]) tangent_points_y_min = np.amin(np.array(triangle_points_tangent)[:, 1])
tangent_points_y_max = np.amax(np.array(triangle_points_tangent)[:, 1]) tangent_points_y_max = np.amax(np.array(triangle_points_tangent)[:, 1])
fov_v = np.abs(np.arctan2(tangent_points_y_min, 1.0)) + np.abs(np.arctan2(tangent_points_y_max, 1.0)) fov_v = np.abs(np.arctan2(tangent_points_y_min, 1.0)) + np.abs(np.arctan2(tangent_points_y_max, 1.0))
...@@ -144,27 +145,28 @@ def erp_ico_cam_intrparams(image_width, padding_size=0): ...@@ -144,27 +145,28 @@ def erp_ico_cam_intrparams(image_width, padding_size=0):
log.debug("Pin-hole camera fov_h: {}, fov_v: {}".format(np.degrees(fov_h), np.degrees(fov_v))) log.debug("Pin-hole camera fov_h: {}, fov_v: {}".format(np.degrees(fov_h), np.degrees(fov_v)))
# image aspect ratio, the triangle is equilateral triangle # image aspect ratio, the triangle is equilateral triangle
image_height = 0.5 * image_width / np.tan(np.radians(30.0)) image_height = image_width # 0.5 * image_width / np.tan(np.radians(30.0))
fx = 0.5 * image_width / np.tan(fov_h * 0.5) fx = image_width / np.abs(tangent_points_x_max - tangent_points_x_min) # 0.5 * image_width / np.tan(fov_h * 0.5)
fy = 0.5 * image_height / np.tan(fov_v * 0.5) fy = fx # 0.5 * image_height / np.tan(fov_v * 0.5)
cx = (image_width - 1) / 2.0 cx = image_width / 2.0
cy = image_height / 2.0
# invert and upright triangle cy # invert and upright triangle cy
cy_invert = 0.5 * (image_width - 1.0) * np.tan(np.radians(30.0)) + 10.0 # cy_invert = 0.5 * (image_width - 1.0) * np.tan(np.radians(30.0)) + 10.0
cy_up = 0.5 * (image_width - 1.0) / np.sin(np.radians(60.0)) + 10.0 # cy_up = 0.5 * (image_width - 1.0) / np.sin(np.radians(60.0)) + 10.0
subimage_cam_param_list = [] subimage_cam_param_list = []
for index in range(0, 20): for index in range(0, 20):
# intrinsic parameters # intrinsic parameters
cy = None # cy = None
if 0 <= index <= 4: # if 0 <= index <= 4:
cy = cy_up # cy = cy_up
elif 5 <= index <= 9: # elif 5 <= index <= 9:
cy = cy_invert # cy = cy_invert
elif 10 <= index <= 14: # elif 10 <= index <= 14:
cy = cy_up # cy = cy_up
else: # else:
cy = cy_invert # cy = cy_invert
intrinsic_matrix = np.array([[fx, 0, cx], intrinsic_matrix = np.array([[fx, 0, cx],
[0, fy, cy], [0, fy, cy],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment