From 7a352bd5bfe55f3ccea2d182d0ded89084f5560c Mon Sep 17 00:00:00 2001 From: lianqing Date: Mon, 26 Sep 2022 22:14:58 +0800 Subject: [PATCH] add the code of generating cam_sync_labels in waymo dataset --- mmdet3d/datasets/convert_utils.py | 2 - mmdet3d/structures/bbox_3d/box_3d_mode.py | 83 +++++++++++++++--- mmdet3d/structures/bbox_3d/cam_box3d.py | 11 ++- mmdet3d/structures/bbox_3d/lidar_box3d.py | 11 ++- tools/create_data.py | 7 +- tools/dataset_converters/kitti_data_utils.py | 37 ++++++++ .../dataset_converters/update_infos_to_v2.py | 4 +- tools/dataset_converters/waymo_converter.py | 87 ++++++++++++++----- 8 files changed, 198 insertions(+), 44 deletions(-) diff --git a/mmdet3d/datasets/convert_utils.py b/mmdet3d/datasets/convert_utils.py index 2ccdd2b98d..0fc4663104 100644 --- a/mmdet3d/datasets/convert_utils.py +++ b/mmdet3d/datasets/convert_utils.py @@ -459,14 +459,12 @@ def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token, repro_rec['bbox_corners'] = [x1, y1, x2, y2] repro_rec['filename'] = filename - coco_rec['file_name'] = filename coco_rec['image_id'] = sample_data_token coco_rec['area'] = (y2 - y1) * (x2 - x1) if repro_rec['category_name'] not in kitti_categories: return None cat_name = repro_rec['category_name'] - coco_rec['category_name'] = cat_name coco_rec['category_id'] = kitti_categories.index(cat_name) coco_rec['bbox_label'] = coco_rec['category_id'] coco_rec['bbox_label_3d'] = coco_rec['bbox_label'] diff --git a/mmdet3d/structures/bbox_3d/box_3d_mode.py b/mmdet3d/structures/bbox_3d/box_3d_mode.py index 3048b0addb..9ff9fed1c4 100644 --- a/mmdet3d/structures/bbox_3d/box_3d_mode.py +++ b/mmdet3d/structures/bbox_3d/box_3d_mode.py @@ -63,7 +63,7 @@ class Box3DMode(IntEnum): DEPTH = 2 @staticmethod - def convert(box, src, dst, rt_mat=None, with_yaw=True): + def convert(box, src, dst, rt_mat=None, with_yaw=True, correct_yaw=False): """Convert boxes from `src` mode to `dst` mode. Args: @@ -81,6 +81,7 @@ def convert(box, src, dst, rt_mat=None, with_yaw=True): with_yaw (bool, optional): If `box` is an instance of :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle. Defaults to True. + correct_yaw (bool): If the yaw is rotated by rt_mat. Returns: (tuple | list | np.ndarray | torch.Tensor | @@ -119,41 +120,89 @@ def convert(box, src, dst, rt_mat=None, with_yaw=True): rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) if with_yaw: - yaw = -yaw - np.pi / 2 - yaw = limit_period(yaw, period=np.pi * 2) + if correct_yaw: + yaw_vector = torch.cat([ + torch.cos(yaw), + torch.sin(yaw), + torch.zeros_like(yaw) + ], + dim=1) + else: + yaw = -yaw - np.pi / 2 + yaw = limit_period(yaw, period=np.pi * 2) elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR: if rt_mat is None: rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]]) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) if with_yaw: - yaw = -yaw - np.pi / 2 - yaw = limit_period(yaw, period=np.pi * 2) + if correct_yaw: + yaw_vector = torch.cat([ + torch.cos(-yaw), + torch.zeros_like(yaw), + torch.sin(-yaw) + ], + dim=1) + else: + yaw = -yaw - np.pi / 2 + yaw = limit_period(yaw, period=np.pi * 2) elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM: if rt_mat is None: rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) if with_yaw: - yaw = -yaw + if correct_yaw: + yaw_vector = torch.cat([ + torch.cos(yaw), + torch.sin(yaw), + torch.zeros_like(yaw) + ], + dim=1) + else: + yaw = -yaw elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH: if rt_mat is None: rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]]) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) if with_yaw: - yaw = -yaw + if correct_yaw: + yaw_vector = torch.cat([ + torch.cos(-yaw), + torch.zeros_like(yaw), + torch.sin(-yaw) + ], + dim=1) + else: + yaw = -yaw elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH: if rt_mat is None: rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]]) xyz_size = torch.cat([x_size, y_size, z_size], dim=-1) if with_yaw: - yaw = yaw + np.pi / 2 - yaw = limit_period(yaw, period=np.pi * 2) + if correct_yaw: + yaw_vector = torch.cat([ + torch.cos(yaw), + torch.sin(yaw), + torch.zeros_like(yaw) + ], + dim=1) + else: + yaw = yaw + np.pi / 2 + yaw = limit_period(yaw, period=np.pi * 2) elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR: if rt_mat is None: rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]]) xyz_size = torch.cat([x_size, y_size, z_size], dim=-1) if with_yaw: - yaw = yaw - np.pi / 2 - yaw = limit_period(yaw, period=np.pi * 2) + if correct_yaw: + yaw_vector = torch.cat([ + torch.cos(yaw), + torch.sin(yaw), + torch.zeros_like(yaw) + ], + dim=1) + else: + yaw = yaw - np.pi / 2 + yaw = limit_period(yaw, period=np.pi * 2) else: raise NotImplementedError( f'Conversion from Box3DMode {src} to {dst} ' @@ -168,6 +217,18 @@ def convert(box, src, dst, rt_mat=None, with_yaw=True): else: xyz = arr[..., :3] @ rt_mat.t() + # Note: we only use rotation in rt_mat + # so don't need to extend yaw_vector + if with_yaw and correct_yaw: + rot_yaw_vector = yaw_vector @ rt_mat[:3, :3].t() + if dst == Box3DMode.CAM: + yaw = torch.atan2(-rot_yaw_vector[:, [2]], rot_yaw_vector[:, + [0]]) + elif dst in [Box3DMode.LIDAR, Box3DMode.DEPTH]: + yaw = torch.atan2(rot_yaw_vector[:, [1]], rot_yaw_vector[:, + [0]]) + yaw = limit_period(yaw, period=np.pi * 2) + if with_yaw: remains = arr[..., 7:] arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1) diff --git a/mmdet3d/structures/bbox_3d/cam_box3d.py b/mmdet3d/structures/bbox_3d/cam_box3d.py index 082018e65a..8ec28c2e86 100644 --- a/mmdet3d/structures/bbox_3d/cam_box3d.py +++ b/mmdet3d/structures/bbox_3d/cam_box3d.py @@ -280,7 +280,7 @@ def height_overlaps(cls, boxes1, boxes2, mode='iou'): overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0) return overlaps_h - def convert_to(self, dst, rt_mat=None): + def convert_to(self, dst, rt_mat=None, correct_yaw=False): """Convert self to ``dst`` mode. Args: @@ -291,14 +291,19 @@ def convert_to(self, dst, rt_mat=None): The conversion from ``src`` coordinates to ``dst`` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. - + correct_yaw (bool): If convert the yaw angle to the target + coordinate. Defaults to False. Returns: :obj:`BaseInstance3DBoxes`: The converted box of the same type in the ``dst`` mode. """ from .box_3d_mode import Box3DMode return Box3DMode.convert( - box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat) + box=self, + src=Box3DMode.CAM, + dst=dst, + rt_mat=rt_mat, + correct_yaw=correct_yaw) def points_in_boxes_part(self, points, boxes_override=None): """Find the box in which each point is. diff --git a/mmdet3d/structures/bbox_3d/lidar_box3d.py b/mmdet3d/structures/bbox_3d/lidar_box3d.py index 6998d02894..a176edffe5 100644 --- a/mmdet3d/structures/bbox_3d/lidar_box3d.py +++ b/mmdet3d/structures/bbox_3d/lidar_box3d.py @@ -174,7 +174,7 @@ def flip(self, bev_direction='horizontal', points=None): points.flip(bev_direction) return points - def convert_to(self, dst, rt_mat=None): + def convert_to(self, dst, rt_mat=None, correct_yaw=False): """Convert self to ``dst`` mode. Args: @@ -185,14 +185,19 @@ def convert_to(self, dst, rt_mat=None): The conversion from ``src`` coordinates to ``dst`` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. - + correct_yaw (bool): If convert the yaw angle to the target + coordinate. Defaults to False. Returns: :obj:`BaseInstance3DBoxes`: The converted box of the same type in the ``dst`` mode. """ from .box_3d_mode import Box3DMode return Box3DMode.convert( - box=self, src=Box3DMode.LIDAR, dst=dst, rt_mat=rt_mat) + box=self, + src=Box3DMode.LIDAR, + dst=dst, + rt_mat=rt_mat, + correct_yaw=correct_yaw) def enlarged_box(self, extra_width): """Enlarge the length, width and height boxes. diff --git a/tools/create_data.py b/tools/create_data.py index 9ef3fbce9d..18b9716f69 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -191,7 +191,9 @@ def waymo_data_prep(root_path, """ from tools.dataset_converters import waymo_converter as waymo - splits = ['training', 'validation', 'testing'] + splits = [ + 'training', 'validation', 'testing', 'testing_3d_camera_only_detection' + ] for i, split in enumerate(splits): load_dir = osp.join(root_path, 'waymo_format', split) if split == 'validation': @@ -203,7 +205,8 @@ def waymo_data_prep(root_path, save_dir, prefix=str(i), workers=workers, - test_mode=(split == 'testing')) + test_mode=(split + in ['testing', 'testing_3d_camera_only_detection'])) converter.convert() # Generate waymo infos out_dir = osp.join(out_dir, 'kitti_format') diff --git a/tools/dataset_converters/kitti_data_utils.py b/tools/dataset_converters/kitti_data_utils.py index 29a39afb3e..893119c658 100644 --- a/tools/dataset_converters/kitti_data_utils.py +++ b/tools/dataset_converters/kitti_data_utils.py @@ -394,9 +394,18 @@ def gather_single(self, idx): self.relative_path, info_type='label_all', use_prefix_id=True) + cam_sync_label_path = get_label_path( + idx, + self.path, + self.training, + self.relative_path, + info_type='cam_sync_label_all', + use_prefix_id=True) if self.relative_path: label_path = str(root_path / label_path) + cam_sync_label_path = str(root_path / cam_sync_label_path) annotations = get_label_anno(label_path) + cam_sync_annotations = get_label_anno(cam_sync_label_path) info['image'] = image_info info['point_cloud'] = pc_info if self.calib: @@ -437,8 +446,24 @@ def gather_single(self, idx): Tr_velo_to_cam = np.array([ float(info) for info in lines[6].split(' ')[1:13] ]).reshape([3, 4]) + Tr_velo_to_cam1 = np.array([ + float(info) for info in lines[7].split(' ')[1:13] + ]).reshape([3, 4]) + Tr_velo_to_cam2 = np.array([ + float(info) for info in lines[8].split(' ')[1:13] + ]).reshape([3, 4]) + Tr_velo_to_cam3 = np.array([ + float(info) for info in lines[9].split(' ')[1:13] + ]).reshape([3, 4]) + Tr_velo_to_cam4 = np.array([ + float(info) for info in lines[10].split(' ')[1:13] + ]).reshape([3, 4]) if self.extend_matrix: Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam) + Tr_velo_to_cam1 = _extend_matrix(Tr_velo_to_cam1) + Tr_velo_to_cam2 = _extend_matrix(Tr_velo_to_cam2) + Tr_velo_to_cam3 = _extend_matrix(Tr_velo_to_cam3) + Tr_velo_to_cam4 = _extend_matrix(Tr_velo_to_cam4) calib_info['P0'] = P0 calib_info['P1'] = P1 calib_info['P2'] = P2 @@ -446,7 +471,12 @@ def gather_single(self, idx): calib_info['P4'] = P4 calib_info['R0_rect'] = rect_4x4 calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam + calib_info['Tr_velo_to_cam1'] = Tr_velo_to_cam1 + calib_info['Tr_velo_to_cam2'] = Tr_velo_to_cam2 + calib_info['Tr_velo_to_cam3'] = Tr_velo_to_cam3 + calib_info['Tr_velo_to_cam4'] = Tr_velo_to_cam4 info['calib'] = calib_info + if self.pose: pose_path = get_pose_path( idx, @@ -460,6 +490,13 @@ def gather_single(self, idx): info['annos'] = annotations info['annos']['camera_id'] = info['annos'].pop('score') add_difficulty_to_annos(info) + info['cam_sync_annos'] = cam_sync_annotations + # NOTE: the 2D labels do not have strict correspondence with + # the projected 2D lidar labels + # e.g.: the projected 2D labels can be in camera 2 + # while the most_visible_camera can have id 4 + info['cam_sync_annos']['camera_id'] = info['cam_sync_annos'].pop( + 'score') sweeps = [] prev_idx = idx diff --git a/tools/dataset_converters/update_infos_to_v2.py b/tools/dataset_converters/update_infos_to_v2.py index 3dd98ec396..20952cb4bd 100644 --- a/tools/dataset_converters/update_infos_to_v2.py +++ b/tools/dataset_converters/update_infos_to_v2.py @@ -195,7 +195,7 @@ def clear_data_info_unused_keys(data_info): empty_flag = True for key in keys: # we allow no annotations in datainfo - if key == 'instances': + if key in ['instances', 'cam_sync_instances', 'cam_instances']: empty_flag = False continue if isinstance(data_info[key], list): @@ -1056,4 +1056,4 @@ def update_pkl_infos(dataset, out_dir, pkl_path): if args.out_dir is None: args.out_dir = args.root_dir update_pkl_infos( - dataset=args.dataset, out_dir=args.out_dir, pkl_path=args.pkl_path) + dataset=args.dataset, out_dir=args.out_dir, pkl_path=args.pkl) diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index cbef29ac3a..1d523bccc9 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -6,9 +6,8 @@ try: from waymo_open_dataset import dataset_pb2 except ImportError: - raise ImportError( - 'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" ' - 'to install the official devkit first.') + raise ImportError('Please run "pip install waymo-open-dataset-tf-2-5-0" ' + '>1.4.5 to install the official devkit first.') from glob import glob from os.path import join @@ -35,6 +34,8 @@ class Waymo2KITTI(object): validation and 2 for testing. workers (int, optional): Number of workers for the parallel process. test_mode (bool, optional): Whether in the test_mode. Default: False. + save_cam_sync_labels (bool, Optional): Whether to save cam sync labels. + Defaults to True. """ def __init__(self, @@ -42,7 +43,8 @@ def __init__(self, save_dir, prefix, workers=64, - test_mode=False): + test_mode=False, + save_cam_sync_labels=True): self.filter_empty_3dboxes = True self.filter_no_label_zone_points = True @@ -58,6 +60,14 @@ def __init__(self, if int(tf.__version__.split('.')[0]) < 2: tf.enable_eager_execution() + # keep the order defined by the official protocol + self.cam_list = [ + '_FRONT', + '_FRONT_LEFT', + '_FRONT_RIGHT', + '_SIDE_LEFT', + '_SIDE_RIGHT', + ] self.lidar_list = [ '_FRONT', '_FRONT_RIGHT', '_FRONT_LEFT', '_SIDE_RIGHT', '_SIDE_LEFT' @@ -78,6 +88,7 @@ def __init__(self, self.prefix = prefix self.workers = int(workers) self.test_mode = test_mode + self.save_cam_sync_labels = save_cam_sync_labels self.tfrecord_pathnames = sorted( glob(join(self.load_dir, '*.tfrecord'))) @@ -89,6 +100,10 @@ def __init__(self, self.point_cloud_save_dir = f'{self.save_dir}/velodyne' self.pose_save_dir = f'{self.save_dir}/pose' self.timestamp_save_dir = f'{self.save_dir}/timestamp' + if self.save_cam_sync_labels: + self.cam_sync_label_save_dir = f'{self.save_dir}/cam_sync_label_' + self.cam_sync_label_all_save_dir = \ + f'{self.save_dir}/cam_sync_label_all' self.create_folder() @@ -124,7 +139,10 @@ def convert_one(self, file_idx): self.save_timestamp(frame, file_idx, frame_idx) if not self.test_mode: + # TODO save the depth image for waymo challenge solution. self.save_label(frame, file_idx, frame_idx) + if self.save_cam_sync_labels: + self.save_label(frame, file_idx, frame_idx, cam_sync=True) def __len__(self): """Length of the filename list.""" @@ -255,7 +273,7 @@ def save_lidar(self, frame, file_idx, frame_idx): f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin' point_cloud.astype(np.float32).tofile(pc_path) - def save_label(self, frame, file_idx, frame_idx): + def save_label(self, frame, file_idx, frame_idx, cam_sync=False): """Parse and save the label data in txt format. The relation between waymo and kitti coordinates is noteworthy: 1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti) @@ -267,10 +285,15 @@ def save_label(self, frame, file_idx, frame_idx): frame (:obj:`Frame`): Open dataset frame proto. file_idx (int): Current file index. frame_idx (int): Current frame index. + cam_sync (bool, optional): Whether to save the cam sync labels. + Defaults to False. """ - fp_label_all = open( - f'{self.label_all_save_dir}/{self.prefix}' + - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'w+') + label_all_path = f'{self.label_all_save_dir}/{self.prefix}' + \ + f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt' + if cam_sync: + label_all_path = label_all_path.replace('label_', + 'cam_sync_label_') + fp_label_all = open(label_all_path, 'w+') id_to_bbox = dict() id_to_name = dict() for labels in frame.projected_lidar_labels: @@ -296,6 +319,21 @@ def save_label(self, frame, file_idx, frame_idx): name = str(id_to_name.get(id + lidar)) break + # NOTE: the 2D labels do not have strict correspondence with + # the projected 2D lidar labels + # e.g.: the projected 2D labels can be in camera 2 + # while the most_visible_camera can have id 4 + if cam_sync: + if obj.most_visible_camera_name: + name = str( + self.cam_list.index( + f'_{obj.most_visible_camera_name}')) + box3d = obj.camera_synced_box + else: + continue + else: + box3d = obj.box + if bounding_box is None or name is None: name = '0' bounding_box = (0, 0, 0, 0) @@ -310,20 +348,20 @@ def save_label(self, frame, file_idx, frame_idx): my_type = self.waymo_to_kitti_class_map[my_type] - height = obj.box.height - width = obj.box.width - length = obj.box.length + height = box3d.height + width = box3d.width + length = box3d.length - x = obj.box.center_x - y = obj.box.center_y - z = obj.box.center_z - height / 2 + x = box3d.center_x + y = box3d.center_y + z = box3d.center_z - height / 2 # project bounding box to the virtual reference frame pt_ref = self.T_velo_to_front_cam @ \ np.array([x, y, z, 1]).reshape((4, 1)) x, y, z, _ = pt_ref.flatten().tolist() - rotation_y = -obj.box.heading - np.pi / 2 + rotation_y = -box3d.heading - np.pi / 2 track_id = obj.id # not available @@ -345,9 +383,11 @@ def save_label(self, frame, file_idx, frame_idx): else: line_all = line[:-1] + ' ' + name + '\n' - fp_label = open( - f'{self.label_save_dir}{name}/{self.prefix}' + - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', 'a') + label_path = f'{self.label_save_dir}{name}/{self.prefix}' + \ + f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt' + if cam_sync: + label_path = label_path.replace('label_', 'cam_sync_label_') + fp_label = open(label_path, 'a') fp_label.write(line) fp_label.close() @@ -398,11 +438,16 @@ def create_folder(self): """Create folder for data preprocessing.""" if not self.test_mode: dir_list1 = [ - self.label_all_save_dir, self.calib_save_dir, - self.point_cloud_save_dir, self.pose_save_dir, - self.timestamp_save_dir + self.label_all_save_dir, + self.calib_save_dir, + self.point_cloud_save_dir, + self.pose_save_dir, + self.timestamp_save_dir, ] dir_list2 = [self.label_save_dir, self.image_save_dir] + if self.save_cam_sync_labels: + dir_list1.append(self.cam_sync_label_all_save_dir) + dir_list2.append(self.cam_sync_label_save_dir) else: dir_list1 = [ self.calib_save_dir, self.point_cloud_save_dir,