-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathevaluation_util.py
148 lines (113 loc) · 4.52 KB
/
evaluation_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import math
import os
import numpy as np
import cv2 as cv
import torch
from skimage import io
from scipy.spatial.transform import Rotation as Rotation
def read_pose_data(file_name):
'''
Expects path to file with one pose per line.
Input pose is expected to map world to camera coordinates.
Output pose maps camera to world coordinates.
Pose format: file qw qx qy qz tx ty tz (f)
Return dictionary that maps a file name to a tuple of (4x4 pose, focal_length)
Sets focal_length to None if not contained in file.
'''
with open(file_name, "r") as f:
pose_data = f.readlines()
# create a dict from the poses with file name as key
pose_dict = {}
for pose_string in pose_data:
pose_string = pose_string.split()
file_name = pose_string[0]
pose_q = np.array(pose_string[1:5])
pose_q = np.array([pose_q[1], pose_q[2], pose_q[3], pose_q[0]])
pose_t = np.array(pose_string[5:8])
pose_R = Rotation.from_quat(pose_q).as_matrix()
pose_4x4 = np.identity(4)
pose_4x4[0:3, 0:3] = pose_R
pose_4x4[0:3, 3] = pose_t
# convert world->cam to cam->world for evaluation
pose_4x4 = np.linalg.inv(pose_4x4)
if len(pose_string) > 8:
focal_length = float(pose_string[8])
else:
focal_length = None
pose_dict[file_name] = (pose_4x4, focal_length)
return pose_dict
def compute_error_max_rot_trans(pgt_pose, est_pose):
'''
Compute the pose error.
Expects poses to map camera coordinate to world coordinates.
'''
# calculate pose errors
t_err = float(np.linalg.norm(pgt_pose[0:3, 3] - est_pose[0:3, 3]))
r_err = est_pose[0:3, 0:3] @ np.transpose(pgt_pose[0:3, 0:3])
r_err = cv.Rodrigues(r_err)[0]
r_err = np.linalg.norm(r_err) * 180 / math.pi
return max(r_err, t_err*100)
def infer_depth_file_from_image_file(image_file):
'''
Map an image file the corresponding depth file.
Assumes prior knowledge about the dataset structure, thus adapt for new datasets.
'''
# assumes image file format: some_name.color.xyz
# assumes depth file format: some_name.depth.png
depth_file, _ = os.path.splitext(image_file)
depth_file, _ = os.path.splitext(depth_file)
return f"{depth_file}.depth.png"
def compute_error_dcre(depth_file, pgt_pose, est_pose, rgb_focal_length, rgb_image_width, use_max=True):
'''
Compute the dense reprojection error.
Expects poses to map camera coordinate to world coordinates.
Needs access to image depth.
Calculates the max. DCRE per images, or the mean if use_max=False.
'''
pgt_pose = torch.from_numpy(pgt_pose).cuda()
est_pose = torch.from_numpy(est_pose).cuda()
depth = io.imread(depth_file)
depth = depth.astype(np.float64)
depth /= 1000 # from millimeters to meters
d_h = depth.shape[0]
d_w = depth.shape[1]
rgb_to_d_scale = d_w / rgb_image_width
d_focal_length = rgb_focal_length * rgb_to_d_scale
# reproject depth map to 3D eye coordinates
prec_eye_coords = np.zeros((4, d_h, d_w))
# set x and y coordinates
prec_eye_coords[0] = np.dstack([np.arange(0, d_w)] * d_h)[0].T
prec_eye_coords[1] = np.dstack([np.arange(0, d_h)] * d_w)[0]
prec_eye_coords = prec_eye_coords.reshape(4, -1)
eye_coords = prec_eye_coords.copy()
depth = depth.reshape(-1)
# filter pixels with invalid depth
depth_mask = (depth > 0.3) & (depth < 10)
eye_coords = eye_coords[:, depth_mask]
depth = depth[depth_mask]
eye_coords = torch.from_numpy(eye_coords).cuda()
depth = torch.from_numpy(depth).cuda()
# save original pixel positions for later
pixel_coords = eye_coords[0:2].clone()
# substract depth principal point (assume image center)
eye_coords[0] -= d_w / 2
eye_coords[1] -= d_h / 2
# reproject
eye_coords[0:2] *= depth / d_focal_length
eye_coords[2] = depth
eye_coords[3] = 1
# transform to world and back to cam
scene_coords = torch.matmul(pgt_pose, eye_coords)
eye_coords = torch.matmul(torch.inverse(est_pose), scene_coords)
# project
depth = eye_coords[2]
eye_coords = eye_coords[0:2]
eye_coords *= (d_focal_length / depth)
# add RGB principal point (assume image center)
eye_coords[0] += d_w / 2
eye_coords[1] += d_h / 2
reprojection_errors = torch.norm(eye_coords - pixel_coords, p=2, dim=0)
if use_max:
return float(torch.max(reprojection_errors)) / rgb_to_d_scale
else:
return float(torch.mean(reprojection_errors)) / rgb_to_d_scale