Skip to content
Snippets Groups Projects
Commit aee403ca authored by mhby1g21's avatar mhby1g21
Browse files
parents fd61a5c1 5c053f22
No related branches found
No related tags found
No related merge requests found
...@@ -7,4 +7,4 @@ opencv-python>=4.5.1.48 ...@@ -7,4 +7,4 @@ opencv-python>=4.5.1.48
torch>=1.8.1 torch>=1.8.1
torchvision>=0.9.1 torchvision>=0.9.1
wheel>=0.36.2 wheel>=0.36.2
timm>=0.4.12 timm==0.6.7
\ No newline at end of file \ No newline at end of file
...@@ -97,7 +97,7 @@ class Options(): ...@@ -97,7 +97,7 @@ class Options():
parser.add_argument("--padding", type=float, default="0.3") parser.add_argument("--padding", type=float, default="0.3")
parser.add_argument("--multires_levels", type=int, default=1, help="Levels of multi-resolution pyramid. If > 1" parser.add_argument("--multires_levels", type=int, default=1, help="Levels of multi-resolution pyramid. If > 1"
"then --grid_size is the lowest resolution") "then --grid_size is the lowest resolution")
parser.add_argument("--persp_monodepth", type=str, default="midas2", choices=["midas2", "midas3", "boost"]) parser.add_argument("--persp_monodepth", type=str, default="midas2", choices=["midas2", "midas3", "boost", "zoedepth"])
parser.add_argument('--depthalignstep', type=int, nargs='+', default=[1, 2, 3, 4]) parser.add_argument('--depthalignstep', type=int, nargs='+', default=[1, 2, 3, 4])
parser.add_argument("--rm_debug_folder", default=True, action='store_false') parser.add_argument("--rm_debug_folder", default=True, action='store_false')
parser.add_argument("--intermediate_data", default=False, action='store_true', help="save intermediate data" parser.add_argument("--intermediate_data", default=False, action='store_true', help="save intermediate data"
...@@ -458,7 +458,10 @@ def monodepth_360(opt): ...@@ -458,7 +458,10 @@ def monodepth_360(opt):
line = line.splitlines()[0].split(" ") line = line.splitlines()[0].split(" ")
erp_image_filename = line[0] erp_image_filename = line[0]
if len(line) > 1:
erp_gtdepth_filename = line[1] if line[1] != 'None' else "" erp_gtdepth_filename = line[1] if line[1] != 'None' else ""
else:
erp_gtdepth_filename = ""
if "matterport" in erp_image_filename: if "matterport" in erp_image_filename:
opt.dataset_matterport_hexagon_mask_enable = True opt.dataset_matterport_hexagon_mask_enable = True
......
...@@ -316,11 +316,15 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_ ...@@ -316,11 +316,15 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_
fisheye_3d_points = cam2world(fisheye_2d_points, fisheye_model) fisheye_3d_points = cam2world(fisheye_2d_points, fisheye_model)
# point3d2obj(fisheye_3d_points, "D:/1.obj") # point3d2obj(fisheye_3d_points, "D:/1.obj")
fisheye_image = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
fisheye_image_weight = np.zeros((fisheye_image_height, fisheye_image_width), float)
fisheye_image = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float) fisheye_image = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
fisheye_image_weight = np.zeros((fisheye_image_height, fisheye_image_width), float) fisheye_image_weight = np.zeros((fisheye_image_height, fisheye_image_width), float)
for index in range(0, len(image_data_list)): for index in range(0, len(image_data_list)):
fisheye_image_weight_subimg = np.zeros((fisheye_image_height, fisheye_image_width), float) fisheye_image_weight_subimg = np.zeros((fisheye_image_height, fisheye_image_width), float)
fisheye_image_subimage = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float) fisheye_image_subimage = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
fisheye_image_weight_subimg = np.zeros((fisheye_image_height, fisheye_image_width), float)
fisheye_image_subimage = np.zeros((fisheye_image_height, fisheye_image_width, channel_number), float)
image_param = image_param_list[index] image_param = image_param_list[index]
image_data = image_data_list[index] image_data = image_data_list[index]
...@@ -342,6 +346,7 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_ ...@@ -342,6 +346,7 @@ def stitch_rgb_image(image_data_list, image_param_list, fisheye_model, subimage_
available_pixels_list_fov = np.logical_and(points_azimuth_inhfov, points_altitude_invfov) available_pixels_list_fov = np.logical_and(points_azimuth_inhfov, points_altitude_invfov)
available_pixels_list_fov_mat = available_pixels_list_fov.reshape(fisheye_image_height, fisheye_image_width) available_pixels_list_fov_mat = available_pixels_list_fov.reshape(fisheye_image_height, fisheye_image_width)
fisheye_2d_points_subimage = fisheye_2d_points[available_pixels_list_fov].astype(int)
fisheye_2d_points_subimage = fisheye_2d_points[available_pixels_list_fov].astype(int) fisheye_2d_points_subimage = fisheye_2d_points[available_pixels_list_fov].astype(int)
# projection to pin-hole image # projection to pin-hole image
...@@ -430,6 +435,7 @@ def sample_rgb_image(img, model, fov=[60, 60], canvas_size=[400, 400], sample_gr ...@@ -430,6 +435,7 @@ def sample_rgb_image(img, model, fov=[60, 60], canvas_size=[400, 400], sample_gr
# fetch_from = world2cam(world_cs.T, model) # fetch_from = world2cam(world_cs.T, model)
fetch_from = world2cam_slow(world_cs, model).T fetch_from = world2cam_slow(world_cs, model).T
tangential_img = np.zeros(tuple(canvas_size) + (channel_number,), dtype=float) tangential_img = np.zeros(tuple(canvas_size) + (channel_number,), dtype=float)
tangential_img = np.zeros(tuple(canvas_size) + (channel_number,), dtype=float)
for channel in range(0, channel_number): for channel in range(0, channel_number):
tangential_img[:, :, channel] = ndimage.map_coordinates(img[:, :, channel], [fetch_from[:, 1].reshape(canvas_size), fetch_from[:, 0].reshape(canvas_size)], order=1, mode='constant') tangential_img[:, :, channel] = ndimage.map_coordinates(img[:, :, channel], [fetch_from[:, 1].reshape(canvas_size), fetch_from[:, 0].reshape(canvas_size)], order=1, mode='constant')
...@@ -502,10 +508,12 @@ def sample_img(img, cam_model, fov=53, run_midas=False): ...@@ -502,10 +508,12 @@ def sample_img(img, cam_model, fov=53, run_midas=False):
equirect_size = (3, 1000, 2000) # Size for equirectangular image equirect_size = (3, 1000, 2000) # Size for equirectangular image
equirect_3D_points, _ = equirect_cam2world(equirect_size[1:]) equirect_3D_points, _ = equirect_cam2world(equirect_size[1:])
equirect_3D_points_rgb = np.zeros((7, equirect_3D_points.shape[-1]), dtype=float) equirect_3D_points_rgb = np.zeros((7, equirect_3D_points.shape[-1]), dtype=float)
equirect_3D_points_rgb = np.zeros((7, equirect_3D_points.shape[-1]), dtype=float)
equirect_3D_points_rgb[0, :] = equirect_3D_points[0, :] equirect_3D_points_rgb[0, :] = equirect_3D_points[0, :]
equirect_3D_points_rgb[1, :] = equirect_3D_points[1, :] equirect_3D_points_rgb[1, :] = equirect_3D_points[1, :]
equirect_3D_points_rgb[2, :] = equirect_3D_points[2, :] equirect_3D_points_rgb[2, :] = equirect_3D_points[2, :]
fisheye2equirec = np.zeros((3, equirect_3D_points.shape[-1]), dtype=float)
fisheye2equirec = np.zeros((3, equirect_3D_points.shape[-1]), dtype=float) fisheye2equirec = np.zeros((3, equirect_3D_points.shape[-1]), dtype=float)
# Lines 200-205 is for converting the whole fisheye to equirectangular # Lines 200-205 is for converting the whole fisheye to equirectangular
# Points at the back of the cylinder are mapped to nan # Points at the back of the cylinder are mapped to nan
...@@ -539,6 +547,7 @@ def sample_img(img, cam_model, fov=53, run_midas=False): ...@@ -539,6 +547,7 @@ def sample_img(img, cam_model, fov=53, run_midas=False):
# Fetch RGB from fisheye image to assemble perspective subview # Fetch RGB from fisheye image to assemble perspective subview
fetch_from = world2cam(world_cs.T, cam_model).astype(int) fetch_from = world2cam(world_cs.T, cam_model).astype(int)
fetch_from = world2cam(world_cs.T, cam_model).astype(int)
fetch_from[:, 0] = np.clip(fetch_from[:, 0], 0, width-1) fetch_from[:, 0] = np.clip(fetch_from[:, 0], 0, width-1)
fetch_from[:, 1] = np.clip(fetch_from[:, 1], 0, height-1) fetch_from[:, 1] = np.clip(fetch_from[:, 1], 0, height-1)
virtual2fisheye_idxs = np.dstack((fetch_from[:, 0].reshape(canvas_size), fetch_from[:, 1].reshape(canvas_size))) virtual2fisheye_idxs = np.dstack((fetch_from[:, 0].reshape(canvas_size), fetch_from[:, 1].reshape(canvas_size)))
...@@ -770,6 +779,7 @@ def generate_camera_orientation(hfov_fisheye, vfov_fisheye, hfov_pinhole, vfov_p ...@@ -770,6 +779,7 @@ def generate_camera_orientation(hfov_fisheye, vfov_fisheye, hfov_pinhole, vfov_p
overlap_area_v = v_index[0] + vfov_pinhole / 2.0 - (v_index[1] - vfov_pinhole / 2.0) overlap_area_v = v_index[0] + vfov_pinhole / 2.0 - (v_index[1] - vfov_pinhole / 2.0)
log.debug("the vertical overlap angle is {}".format(overlap_area_v)) log.debug("the vertical overlap angle is {}".format(overlap_area_v))
z_rotation = np.zeros(x_rotation.shape, float)
z_rotation = np.zeros(x_rotation.shape, float) z_rotation = np.zeros(x_rotation.shape, float)
xyz_rotation_array = np.stack((x_rotation, y_rotation, z_rotation), axis=0) xyz_rotation_array = np.stack((x_rotation, y_rotation, z_rotation), axis=0)
xyz_rotation_array = xyz_rotation_array.reshape([3, horizontal_size * vertical_size]) xyz_rotation_array = xyz_rotation_array.reshape([3, horizontal_size * vertical_size])
......
...@@ -5,6 +5,9 @@ from skimage.transform import pyramid_gaussian ...@@ -5,6 +5,9 @@ from skimage.transform import pyramid_gaussian
from PIL import Image from PIL import Image
import numpy as np import numpy as np
import torch
from torchvision import transforms
from torchvision.utils import make_grid
from struct import unpack from struct import unpack
import os import os
...@@ -170,6 +173,8 @@ def run_persp_monodepth(rgb_image_data_list, persp_monodepth, use_large_model=Tr ...@@ -170,6 +173,8 @@ def run_persp_monodepth(rgb_image_data_list, persp_monodepth, use_large_model=Tr
return MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=use_large_model) return MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=use_large_model)
if persp_monodepth == "boost": if persp_monodepth == "boost":
return boosting_monodepth(rgb_image_data_list) return boosting_monodepth(rgb_image_data_list)
if persp_monodepth == "zoedepth":
return zoedepth_monodepth(rgb_image_data_list)
def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=True): def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=True):
...@@ -181,7 +186,6 @@ def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=T ...@@ -181,7 +186,6 @@ def MiDaS_torch_hub_data(rgb_image_data_list, persp_monodepth, use_large_model=T
:param use_large_model: the MiDaS model type. :param use_large_model: the MiDaS model type.
:type use_large_model: bool, optional :type use_large_model: bool, optional
""" """
import torch
# 1)initial PyTorch run-time environment # 1)initial PyTorch run-time environment
if use_large_model: if use_large_model:
...@@ -247,7 +251,6 @@ def MiDaS_torch_hub_file(rgb_image_path, use_large_model=True): ...@@ -247,7 +251,6 @@ def MiDaS_torch_hub_file(rgb_image_path, use_large_model=True):
:type use_large_model: bool, optional :type use_large_model: bool, optional
""" """
import cv2 import cv2
import torch
# import urllib.request # import urllib.request
# import matplotlib.pyplot as plt # import matplotlib.pyplot as plt
...@@ -297,7 +300,6 @@ def boosting_monodepth(rgb_image_data_list): ...@@ -297,7 +300,6 @@ def boosting_monodepth(rgb_image_data_list):
# Load merge network # Load merge network
import cv2 import cv2
import argparse import argparse
import torch
import warnings import warnings
warnings.simplefilter('ignore', np.RankWarning) warnings.simplefilter('ignore', np.RankWarning)
...@@ -540,6 +542,39 @@ def boosting_monodepth(rgb_image_data_list): ...@@ -540,6 +542,39 @@ def boosting_monodepth(rgb_image_data_list):
return depthmaps return depthmaps
@torch.no_grad()
def zoedepth_monodepth(rgb_image_data_list):
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
torch.hub.help("intel-isl/MiDaS", "DPT_BEiT_L_384", force_reload=True)
tfoms = transforms.Compose([transforms.ToTensor()])
repo = "isl-org/ZoeDepth"
model_zoe = torch.hub.load(repo, "ZoeD_NK", pretrained=True)
model_zoe = model_zoe.to(device)
model_zoe.eval()
depthmaps = []
for img in rgb_image_data_list:
img_t = tfoms(img / 255.).unsqueeze(0).type(torch.float32).to(device)
out = model_zoe(img_t)['metric_depth']
out = torch.nn.functional.interpolate(
out,
size=img.shape[:2],
mode="nearest-exact",
).squeeze(0)
if torch.any(out < 0):
log.warn("Negative depth value")
out = torch.clamp(out, min=1e-6)
depthmaps.append(out)
del model_zoe
# grid = make_grid(depthmaps, nrow=5)[0]
return [depth2disparity(d.squeeze().cpu().numpy()) for d in depthmaps]
def read_dpt(dpt_file_path): def read_dpt(dpt_file_path):
"""read depth map from *.dpt file. """read depth map from *.dpt file.
......
...@@ -454,7 +454,7 @@ def ico2erp_image(tangent_images, erp_image_height, padding_size=0.0, blender_me ...@@ -454,7 +454,7 @@ def ico2erp_image(tangent_images, erp_image_height, padding_size=0.0, blender_me
# compute the final optical flow base on weight # compute the final optical flow base on weight
if blender_method == "mean": if blender_method == "mean":
# erp_flow_weight_mat = np.full(erp_flow_weight_mat.shape, erp_flow_weight_mat.max(), np.float) # debug # erp_flow_weight_mat = np.full(erp_flow_weight_mat.shape, erp_flow_weight_mat.max(), float) # debug
non_zero_weight_list = erp_weight_mat != 0 non_zero_weight_list = erp_weight_mat != 0
if not np.all(non_zero_weight_list): if not np.all(non_zero_weight_list):
log.warn("the optical flow weight matrix contain 0.") log.warn("the optical flow weight matrix contain 0.")
......
...@@ -170,7 +170,7 @@ def sph2car(theta, phi, radius=1.0): ...@@ -170,7 +170,7 @@ def sph2car(theta, phi, radius=1.0):
:return: +x right, +y down, +z is froward, shape is [3, point_number] :return: +x right, +y down, +z is froward, shape is [3, point_number]
:rtype: numpy :rtype: numpy
""" """
# points_cartesian_3d = np.array.zeros((theta.shape[0],3),np.float) # points_cartesian_3d = np.array.zeros((theta.shape[0],3),float)
x = radius * np.cos(phi) * np.sin(theta) x = radius * np.cos(phi) * np.sin(theta)
z = radius * np.cos(phi) * np.cos(theta) z = radius * np.cos(phi) * np.cos(theta)
y = -radius * np.sin(phi) y = -radius * np.sin(phi)
......
...@@ -136,6 +136,7 @@ def erp_ico_cam_intrparams(image_width, padding_size=0): ...@@ -136,6 +136,7 @@ def erp_ico_cam_intrparams(image_width, padding_size=0):
# use tangent plane # use tangent plane
tangent_points_x_min = np.amin(np.array(triangle_points_tangent)[:, 0]) tangent_points_x_min = np.amin(np.array(triangle_points_tangent)[:, 0])
tangent_points_x_max = np.amax(np.array(triangle_points_tangent)[:, 0])
tangent_points_y_min = np.amin(np.array(triangle_points_tangent)[:, 1]) tangent_points_y_min = np.amin(np.array(triangle_points_tangent)[:, 1])
tangent_points_y_max = np.amax(np.array(triangle_points_tangent)[:, 1]) tangent_points_y_max = np.amax(np.array(triangle_points_tangent)[:, 1])
fov_v = np.abs(np.arctan2(tangent_points_y_min, 1.0)) + np.abs(np.arctan2(tangent_points_y_max, 1.0)) fov_v = np.abs(np.arctan2(tangent_points_y_min, 1.0)) + np.abs(np.arctan2(tangent_points_y_max, 1.0))
...@@ -144,27 +145,28 @@ def erp_ico_cam_intrparams(image_width, padding_size=0): ...@@ -144,27 +145,28 @@ def erp_ico_cam_intrparams(image_width, padding_size=0):
log.debug("Pin-hole camera fov_h: {}, fov_v: {}".format(np.degrees(fov_h), np.degrees(fov_v))) log.debug("Pin-hole camera fov_h: {}, fov_v: {}".format(np.degrees(fov_h), np.degrees(fov_v)))
# image aspect ratio, the triangle is equilateral triangle # image aspect ratio, the triangle is equilateral triangle
image_height = 0.5 * image_width / np.tan(np.radians(30.0)) image_height = image_width # 0.5 * image_width / np.tan(np.radians(30.0))
fx = 0.5 * image_width / np.tan(fov_h * 0.5) fx = image_width / np.abs(tangent_points_x_max - tangent_points_x_min) # 0.5 * image_width / np.tan(fov_h * 0.5)
fy = 0.5 * image_height / np.tan(fov_v * 0.5) fy = fx # 0.5 * image_height / np.tan(fov_v * 0.5)
cx = (image_width - 1) / 2.0 cx = image_width / 2.0
cy = image_height / 2.0
# invert and upright triangle cy # invert and upright triangle cy
cy_invert = 0.5 * (image_width - 1.0) * np.tan(np.radians(30.0)) + 10.0 # cy_invert = 0.5 * (image_width - 1.0) * np.tan(np.radians(30.0)) + 10.0
cy_up = 0.5 * (image_width - 1.0) / np.sin(np.radians(60.0)) + 10.0 # cy_up = 0.5 * (image_width - 1.0) / np.sin(np.radians(60.0)) + 10.0
subimage_cam_param_list = [] subimage_cam_param_list = []
for index in range(0, 20): for index in range(0, 20):
# intrinsic parameters # intrinsic parameters
cy = None # cy = None
if 0 <= index <= 4: # if 0 <= index <= 4:
cy = cy_up # cy = cy_up
elif 5 <= index <= 9: # elif 5 <= index <= 9:
cy = cy_invert # cy = cy_invert
elif 10 <= index <= 14: # elif 10 <= index <= 14:
cy = cy_up # cy = cy_up
else: # else:
cy = cy_invert # cy = cy_invert
intrinsic_matrix = np.array([[fx, 0, cx], intrinsic_matrix = np.array([[fx, 0, cx],
[0, fy, cy], [0, fy, cy],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment