-
Notifications
You must be signed in to change notification settings - Fork 0
/
acrv.py
executable file
·527 lines (433 loc) · 20.9 KB
/
acrv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
import cv2
import json
import numpy as np
from scipy.spatial.transform import Rotation
from PIL import Image
from matplotlib import pyplot as plt
def compute_robot_T_camera(pose_rotation=[1, 0, 0, 0], pose_translation=[0.0, 0.0, 0.0]):
"""
THis function computes the transformation T that transform one point in the camera coordinate
(x - right, y - up, z - forward) to the point in the robot coordinate system (x - forward, y - left, z - up).
(The camera is fixed on the robot base-link)
P_r = P_c * T, P_c = [x, y, z, 1] is a row vector in the camera coordinate
Please note that the pose_rotation quaternion is not valid because the camera coordinate is different from the robot
coordinate in Isaac Sim. Actually, the camera coordinate is left-handed whereas the robot's is right-handed. To
transform the camera coordinate, a axis-swap matrix is needed.
:param pose_rotation: [quat_w, quat_x, quat_y, quat_z], a quaternion that represents a rotation
:param pose_translation: [tr_x, tr_y, tr_z], the location of camera origin in the robot coordinate system
:return: transformation matrix
"""
# compute the rotation matrix
# note that the scipy...Rotation accepts the quaternion in scalar-last format
pose_quat = [pose_rotation[1], pose_rotation[2], pose_rotation[3], pose_rotation[0]]
rot = Rotation.from_quat(pose_quat)
rotation_matrix = np.eye(4)
rotation_matrix[0:3][:, 0:3] = rot.as_dcm().T
translation_matrix = np.eye(4)
translation_matrix[3][0:3] = np.array(pose_translation)
# swap the axes of camera coordinate system to be coherent with the robot coordinate system
swap_matrix = np.array([[0, -1, 0, 0],
[0, 0, 1, 0],
[1, 0, 0, 0],
[0, 0, 0, 1]], dtype=np.float)
transformation_matrix = swap_matrix @ rotation_matrix @ translation_matrix
return transformation_matrix
def compute_world_T_robot(pose_rotation, pose_translation):
"""
This function computes the transformation matrix T that transforms one point in the robot coordinate system to
the world coordinate system. This function needs the scipy package - scipy.spatial.transform.Rotation
P_w = P_r * T, P_r = [x, y, z, 1] is a row vector in the robot coordinate (x - forward, y - left, z - up)
:param pose_rotation: [quat_w, quat_x, quat_y, quat_z], a quaternion that represents a rotation
:param pose_translation: [tr_x, tr_y, tr_z], the location of the robot origin in the world coordinate system
:return: transformation matrix
"""
# compute the rotation matrix
# note that the scipy...Rotation accepts the quaternion in scalar-last format
pose_quat = [pose_rotation[1], pose_rotation[2], pose_rotation[3], pose_rotation[0]]
rot = Rotation.from_quat(pose_quat)
rotation_matrix = np.eye(4)
rotation_matrix[0:3][:, 0:3] = rot.as_dcm().T
# build the tranlation matrix
translation_matrix = np.eye(4)
translation_matrix[3][0:3] = np.array(pose_translation)
# compute the tranformation matrix
transformation_matrix = rotation_matrix @ translation_matrix
# convert robot's right-handed coordinate system to unreal world's left-handed coordinate system
transformation_matrix[:, 1] = -1.0 * transformation_matrix[:, 1]
return transformation_matrix
def compute_camera_T_world(pose_rotation, pose_translation):
"""
This function is written according to the UpdateProjectMatrix() in IssacSimBoundingBox.cpp
It computes the transformation matrix T that transforms one point in the world coordinate into the camera coordinate
(x - right, y - up, z - forward)
P_c = P_w * T, P_w = [x, y, z, 1] a row vector denoting point location in the world coordinate
:param pose_rotation: [Yaw, Pitch, Roll] (degree), represents a rotation, exported from Isaac Sim
:param pose_translation: [tr_x, tr_y, tr_z], the location of the camera origin in the world coordinate system
:return: transformation matrix
"""
# build rotation matrix
yaw_angle = pose_rotation[0] / 180.0 * np.pi
pitch_angle = -1 * pose_rotation[1] / 180.0 * np.pi
roll_angle = -1 * pose_rotation[2] / 180.0 * np.pi
yaw = np.array([[np.cos(yaw_angle), -np.sin(yaw_angle), 0],
[np.sin(yaw_angle), np.cos(yaw_angle), 0],
[ 0, 0, 1]])
pitch = np.array([[ np.cos(pitch_angle), 0, np.sin(pitch_angle)],
[ 0, 1, 0],
[-np.sin(pitch_angle), 0, np.cos(pitch_angle)]])
roll = np.array([[1, 0, 0],
[0, np.cos(roll_angle), -np.sin(roll_angle)],
[0, np.sin(roll_angle), np.cos(roll_angle)]])
rotation_matrix = np.eye(4)
rotation_matrix[0:3][:, 0:3] = yaw @ pitch @ roll
# build translation matrix
# [1 0 0 0;
# 0 1 0 0;
# 0 0 1 0;
# dx dy dz 1]
translation_matrix = np.eye(4)
translation_matrix[3][0:3] = -1. * np.array(pose_translation)
# swap the axis
swap_matrix = np.array([[0, 0, 1, 0],
[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 0, 1]], dtype=np.float)
# compute transform_matrix
transformation_matrix = translation_matrix @ rotation_matrix @ swap_matrix
return transformation_matrix
def project_camera_to_image(projection_matrix, point_3d):
"""
This function project one point in the camera coordinate system into the image plane
image_x, image_y, image_z = point * projection_matrix.T
:param projection_matrix: 4*4 matrix
:param point_3d: [x1, y1, z1; x2, y2, z2; ...; xk, yk, zk], must be in centimeter
:return: [image_x, image_y]
"""
point_3d = np.array(point_3d)
point_3d1 = np.ones((point_3d.shape[0], point_3d.shape[1] + 1))
point_3d1[:, :-1] = point_3d
point_2d = point_3d1 @ np.array(projection_matrix).T
image_xy = np.zeros((point_2d.shape[0], 2))
image_xy[:, 0] = point_2d[:, 0] / point_2d[:, 2]
image_xy[:, 1] = point_2d[:, 1] / point_2d[:, 2]
return image_xy
def has_file_allowed_extension(filename, extensions):
filename_lower = filename.lower()
return any(filename_lower.endswith(ext) for ext in extensions)
def make_dataset(root_dir, sequences, extensions):
images = {}
image_nums = {}
for seq in sorted(sequences):
image_num = 0
d = os.path.join(root_dir, seq)
if not os.path.isdir(d):
continue
for root, _, fnames in sorted(os.walk(d)):
for fname in sorted(fnames):
if has_file_allowed_extension(fname, extensions):
path = os.path.join(root, fname)
item = (seq, fname)
images[item] = path
image_num += 1
image_nums[seq] = image_num
return images, image_nums
def pil_loader(path):
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
class ACRV(object):
def __init__(self, data_dir, transform=None):
# data_dir: the folder that contains sequences of frames
IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', '.tiff']
self.subfolders = os.listdir(data_dir)
if 'rgb' not in self.subfolders:
raise(RuntimeError('There is no rgb folder, please check'))
self.rgb_dir = os.path.join(data_dir, 'rgb')
if 'rgb_right' in self.subfolders:
self.rgb_right_dir = os.path.join(data_dir, 'rgb_right')
if 'depth' in self.subfolders:
self.depth_dir = os.path.join(data_dir, 'depth')
if 'time_stamp' in self.subfolders:
self.time_stamp_dir = os.path.join(data_dir, 'time_stamp')
if 'calibration' in self.subfolders:
self.calibration_dir = os.path.join(data_dir, 'calibration')
if 'wheel_odometry' in self.subfolders:
self.wheel_odometry_dir = os.path.join(data_dir, 'wheel_odometry')
self.flag = self.train_test_flag(self.subfolders)
if self.flag == 'training':
self.gt_dir = os.path.join(data_dir, 'ground_truth')
sequences = self._find_sequences(self.rgb_dir)
samples, sample_nums = make_dataset(self.rgb_dir, sequences, IMG_EXTENSIONS)
if len(samples) == 0:
raise (RuntimeError("Found 0 files in subfolders of: " + self.rgb_dir + "\n"
"Supported extensions are: " + ",".join(IMG_EXTENSIONS)))
self.data_dir = data_dir
self.sequences = sequences
self.samples = samples
self.sample_keys = sorted(samples.keys())
self.sample_nums = sample_nums
self.transform = transform
self.selected_seq = None # choose a certain sequence
if self.flag == 'training':
self.gt_labels = {}
for seq in self.sequences:
self.gt_labels[seq] = self.read_gt_label(seq)
self.time_stamps = {}
for seq in self.sequences:
self.time_stamps[seq] = self.read_time_stamp(seq)
def __getitem__(self, idx):
# given an idx, read the image or read the image and ground truth
if self.flag == 'test':
return self.get_image(idx)
if self.flag == 'training':
return self.get_image(idx), self.get_gt_label(idx), self.get_gt_instance(idx)
def __len__(self):
if self.selected_seq:
return self.sample_nums[self.selected_seq]
return len(self.samples)
def __call__(self, seq_name=None):
# initialize the selected_seq and iterate images just in this sequence
if isinstance(seq_name, int) or len(str(seq_name)) < 6:
self.selected_seq = '{0:06d}'.format(int(seq_name))
else:
self.selected_seq = seq_name
def _find_sequences(self, dir):
# Finds the sequence folders in a dataset.
if sys.version_info >= (3, 5):
# Faster and available in Python 3.5 and above
sequences = [d.name for d in os.scandir(dir) if d.is_dir()]
else:
sequences = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
sequences.sort()
return sequences
def train_test_flag(self, folders):
if 'ground_truth' in folders:
flag = 'training'
print('This is a training dataset')
else:
flag = 'test'
print('This is a testing dataset')
return flag
def idx_to_key(self, idx):
# key_pair = (sequence_name, image_name) = ('000001', '00000002.png')
if isinstance(idx, int):
if self.selected_seq:
# ends the loop when idx >= the number of images in this sequence
if idx >= self.sample_nums[self.selected_seq]:
raise IndexError()
image_name = '{0:08d}'.format(idx) + '.png'
key_pair = (self.selected_seq, image_name)
else:
key_pair = self.sample_keys[idx]
elif len(idx) == 2:
key_pair = (self.seq_to_key(idx[0]), '{0:08d}'.format(idx[1]) + '.png')
else:
raise(RuntimeError('Wrong index'))
return key_pair
def seq_to_key(self, seq):
if isinstance(seq, int):
seq_key = '{0:06d}'.format(seq)
elif len(seq) == 6 and isinstance(seq, str):
seq_key = seq
else:
raise(RuntimeError('Wrong sequence name'))
return seq_key
def get_image_name(self, idx):
# get the image name, given the index
image_key = self.idx_to_key(idx)
return image_key[1]
def get_image(self, idx):
image_key = self.idx_to_key(idx)
image_file = self.samples[image_key]
image = pil_loader(image_file)
image.name = self.get_image_name(idx)
if self.transform is not None:
image = self.transform(image)
# plt.imshow(image)
# plt.show()
return image
def get_right_image(self, idx):
image_key = self.idx_to_key(idx)
right_image_file = os.path.join(self.rgb_right_dir, image_key[0], image_key[1])
right_image = pil_loader(right_image_file)
if self.transform is not None:
right_image = self.transform(right_image)
return right_image
def get_depth_image(self, idx):
image_key = self.idx_to_key(idx)
depth_image_file = os.path.join(self.depth_dir, image_key[0], image_key[1])
# depth image is stored in uint16
depth_image = cv2.imread(depth_image_file, -1)
# depth image should be divided by 255 to get the actual depth in cm
depth_image = depth_image.astype(np.float32) / 255.0
if self.transform is not None:
depth_image = self.transform(depth_image)
# plt.imshow(depth_image)
# plt.show()
return depth_image
def get_gt_instance(self, idx):
if self.flag == 'test':
raise(RuntimeError('This is a test dataset, no ground truth'))
else:
image_key = self.idx_to_key(idx)
instance_image_dir = os.path.join(self.gt_dir, 'instance')
if not os.path.isdir(instance_image_dir):
raise(RuntimeError('Instance dir not exist'))
instance_image_file = os.path.join(instance_image_dir, image_key[0], image_key[1])
# instance image is stored in uint16
instance_image = cv2.imread(instance_image_file, -1)
if self.transform is not None:
instance_image = self.transform(instance_image)
# plt.imshow(instance_image)
# plt.show()
return instance_image
def get_gt_segmentation(self, idx):
if self.flag == 'test':
raise(RuntimeError('This is a test dataset, no ground truth'))
else:
image_key = self.idx_to_key(idx)
segmentation_image_dir = os.path.join(self.gt_dir, 'segmentation')
if not os.path.isdir(segmentation_image_dir):
raise(RuntimeError('Segmentation dir not exist'))
segmentation_image_file = os.path.join(segmentation_image_dir, image_key[0], image_key[1])
# segmentation image is stored in uint8
segmentation_image = cv2.imread(segmentation_image_file, -1)
if self.transform is not None:
segmentation_image = self.transform(segmentation_image)
# plt.imshow(segmentation_image)
# plt.show()
return segmentation_image
def get_label_index(self):
if self.flag == 'test':
raise(RuntimeError('This is a test dataset, no ground truth'))
label_index_file = os.path.join(self.gt_dir, 'label_index.json')
with open(label_index_file, 'r') as f:
label_index = json.load(f)
return label_index
def read_gt_label(self, seq):
if self.flag == 'test':
raise(RuntimeError('This is a test dataset, no ground truth'))
gt_label_dir = os.path.join(self.gt_dir, 'label')
if not os.path.isdir(gt_label_dir):
raise (RuntimeError('Label dir not exist'))
gt_label_file = os.path.join(gt_label_dir, self.seq_to_key(seq), 'labels.json')
with open(gt_label_file, 'r') as fp:
labels = json.load(fp)
return labels
def get_gt_label(self, idx):
if self.flag == 'test':
raise(RuntimeError('This is a test dataset, no ground truth'))
else:
image_key = self.idx_to_key(idx)
gt_label = self.gt_labels[image_key[0]][image_key[1][:-4]]
return gt_label
def get_calibration(self, seq):
# read projection matrix, pose of the camera in the robot frame and the base line of the stereo cameras, etc.
calib_dir = os.path.join(self.calibration_dir, self.seq_to_key(seq))
if not os.path.isdir(calib_dir):
raise (RuntimeError('calibration dir not exist'))
calib_file = os.path.join(self.calibration_dir, self.seq_to_key(seq), 'calibration.json')
with open(calib_file, 'r') as fp:
calibrations = json.load(fp)
return calibrations
def read_time_stamp(self, seq):
time_stamp_dir = os.path.join(self.time_stamp_dir, self.seq_to_key(seq))
if not os.path.isdir(time_stamp_dir):
raise (RuntimeError('time stamp dir not exist'))
time_stamp_file = os.path.join(time_stamp_dir, 'time_stamp.json')
with open(time_stamp_file, 'r') as fp:
time_stamp = json.load(fp)
return time_stamp
def get_time_stamp(self, idx):
image_key = self.idx_to_key(idx)
time_stamp = self.time_stamps[image_key[0]][image_key[1][:-4]]
return time_stamp
def get_wheel_odometry(self, idx):
image_key = self.idx_to_key(idx)
wheel_odometry_file = os.path.join(self.wheel_odometry_dir, image_key[0], image_key[1][:-4]+'.json')
with open(wheel_odometry_file, 'r') as fp:
wheel_odomemetry = json.load(fp)
return wheel_odomemetry
def get_world_T_robot(self, idx):
wheel_odom = self.get_wheel_odometry(idx)
world_T_robot = compute_world_T_robot(wheel_odom[0]['pose_rotation'], wheel_odom[0]['pose_translation'])
return world_T_robot
def get_robot_T_camera(self, idx):
image_key = self.idx_to_key(idx)
calib = self.get_calibration(image_key[0])
robot_T_camera = compute_robot_T_camera(calib['camera_pose_in_robot']['pose_rotation'],
calib['camera_pose_in_robot']['pose_translation'])
return robot_T_camera
def get_camera_T_world(self, idx):
gt_label = self.get_gt_label(idx)
camera_T_world = compute_camera_T_world(gt_label['pose_rotation'], gt_label['pose_translation'])
return camera_T_world
if __name__ == '__main__':
# replace the data_dir with your own and run the code to learn the usage
data_dir = '/home/wind/Data/Isaac_sim/AIUE_V01_001/demo/'
# create the ACRV object with data_dir which should include 'rgb', 'rgb_right', 'depth', 'calibration'
# 'time_stamp', 'wheel_odometry' and 'ground_truth' (if this is a training dataset) sub-folders
acrv_data = ACRV(data_dir)
# get the number of the images
print(len(acrv_data))
idx = 0
# given the idx, get the image name
print('image name: ', acrv_data.get_image_name(idx))
# given the idx, get the depth image
acrv_data.get_depth_image(idx)
# given the idx, get the rgb image
acrv_data.get_image(0)
# given the sequence number and image number, get the rgb image
idx = ('000000', 2)
acrv_data.get_image(idx)
# given the idx, get the right image
acrv_data.get_right_image(0)
# given the idx, get the instance ground truth
acrv_data.get_gt_instance(0)
# given the idx, get the semantic segmentation truth
acrv_data.get_gt_segmentation(0)
# get the mapping from object class names to numbers
print(acrv_data.get_label_index())
# get the ground truth annotations, including 2D bbox, 3D bbox and camera pose etc.
print(acrv_data.get_gt_label(8))
# given the sequence name, get the calibration information for this sequence
# including camera parameters, camera and robot pose in the world coordinate system in the first frame
seq = '000000' # or 0
calib = acrv_data.get_calibration(seq)
# given the idx, get the wheel odometry for this frame
wheel_odom = acrv_data.get_wheel_odometry(0)
# given the idx, get the world_T_robot transformation matrix
# P_w = P_r @ world_T_robot, P_r = [x, y, z, 1] the point in the robot coordinate system
w_T_r = acrv_data.get_world_T_robot(2)
# given the idx, get the robot_T_camera transformation matrix
# P_r = P_c @ robot_T_camera
r_T_c = acrv_data.get_robot_T_camera(2)
# compute the world_T_camera tranformation matrix
w_T_c = r_T_c @ w_T_r
# given the idx, get the camera_T_world transformation matrix
c_T_w = acrv_data.get_camera_T_world(2)
# w_T_c should be very similar to the inverse of c_T_w
wTc = np.linalg.inv(c_T_w)
# given one point in the camera coordinate system
# compute its location in the world coordinate system
P_c_ori = np.array([189.004486, -86.714561, 213.965942, 1])
P_w = P_c_ori @ r_T_c @ w_T_r
print(P_w)
# using the wTc to compute this location again
P_w_c = P_c_ori @ wTc
print(P_w_c)
# given the idx, use the class name to read images or read images and ground truth (training set)
img, gt, gt_instance = acrv_data[idx]
img, _, _ = acrv_data[('000000', 1)]
# you can also intilize the ACRV object with the specified sequence
acrv_data('000000')
img, _, _ = acrv_data[idx]
print(img.name)
# for image, _, _ in acrv_data:
# print(image.name)
# print('end')