open-mmlab · Tai-Wang · Oct 8, 2022 · Sep 26, 2022
diff --git a/mmdet3d/datasets/convert_utils.py b/mmdet3d/datasets/convert_utils.py
@@ -459,14 +459,12 @@ def generate_waymo_mono3d_record(ann_rec, x1, y1, x2, y2, sample_data_token,
     repro_rec['bbox_corners'] = [x1, y1, x2, y2]
     repro_rec['filename'] = filename
 
-    coco_rec['file_name'] = filename
     coco_rec['image_id'] = sample_data_token
     coco_rec['area'] = (y2 - y1) * (x2 - x1)
 
     if repro_rec['category_name'] not in kitti_categories:
         return None
     cat_name = repro_rec['category_name']
-    coco_rec['category_name'] = cat_name
     coco_rec['category_id'] = kitti_categories.index(cat_name)
     coco_rec['bbox_label'] = coco_rec['category_id']
     coco_rec['bbox_label_3d'] = coco_rec['bbox_label']

diff --git a/mmdet3d/structures/bbox_3d/box_3d_mode.py b/mmdet3d/structures/bbox_3d/box_3d_mode.py
@@ -63,7 +63,7 @@ class Box3DMode(IntEnum):
     DEPTH = 2
 
     @staticmethod
-    def convert(box, src, dst, rt_mat=None, with_yaw=True):
+    def convert(box, src, dst, rt_mat=None, with_yaw=True, correct_yaw=False):
         """Convert boxes from `src` mode to `dst` mode.
 
         Args:
@@ -81,6 +81,7 @@ def convert(box, src, dst, rt_mat=None, with_yaw=True):
             with_yaw (bool, optional): If `box` is an instance of
                 :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                 Defaults to True.
+            correct_yaw (bool): If the yaw is rotated by rt_mat.
 
         Returns:
             (tuple | list | np.ndarray | torch.Tensor |
@@ -119,41 +120,89 @@ def convert(box, src, dst, rt_mat=None, with_yaw=True):
                 rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
             xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
             if with_yaw:
-                yaw = -yaw - np.pi / 2
-                yaw = limit_period(yaw, period=np.pi * 2)
+                if correct_yaw:
+                    yaw_vector = torch.cat([
+                        torch.cos(yaw),
+                        torch.sin(yaw),
+                        torch.zeros_like(yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = -yaw - np.pi / 2
+                    yaw = limit_period(yaw, period=np.pi * 2)
         elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
             if rt_mat is None:
                 rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
             xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
             if with_yaw:
-                yaw = -yaw - np.pi / 2
-                yaw = limit_period(yaw, period=np.pi * 2)
+                if correct_yaw:
+                    yaw_vector = torch.cat([
+                        torch.cos(-yaw),
+                        torch.zeros_like(yaw),
+                        torch.sin(-yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = -yaw - np.pi / 2
+                    yaw = limit_period(yaw, period=np.pi * 2)
         elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
             if rt_mat is None:
                 rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
             xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
             if with_yaw:
-                yaw = -yaw
+                if correct_yaw:
+                    yaw_vector = torch.cat([
+                        torch.cos(yaw),
+                        torch.sin(yaw),
+                        torch.zeros_like(yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = -yaw
         elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
             if rt_mat is None:
                 rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
             xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
             if with_yaw:
-                yaw = -yaw
+                if correct_yaw:
+                    yaw_vector = torch.cat([
+                        torch.cos(-yaw),
+                        torch.zeros_like(yaw),
+                        torch.sin(-yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = -yaw
         elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
             if rt_mat is None:
                 rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
             xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
             if with_yaw:
-                yaw = yaw + np.pi / 2
-                yaw = limit_period(yaw, period=np.pi * 2)
+                if correct_yaw:
+                    yaw_vector = torch.cat([
+                        torch.cos(yaw),
+                        torch.sin(yaw),
+                        torch.zeros_like(yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = yaw + np.pi / 2
+                    yaw = limit_period(yaw, period=np.pi * 2)
         elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
             if rt_mat is None:
                 rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
             xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
             if with_yaw:
-                yaw = yaw - np.pi / 2
-                yaw = limit_period(yaw, period=np.pi * 2)
+                if correct_yaw:
+                    yaw_vector = torch.cat([
+                        torch.cos(yaw),
+                        torch.sin(yaw),
+                        torch.zeros_like(yaw)
+                    ],
+                                           dim=1)
+                else:
+                    yaw = yaw - np.pi / 2
+                    yaw = limit_period(yaw, period=np.pi * 2)
         else:
             raise NotImplementedError(
                 f'Conversion from Box3DMode {src} to {dst} '
@@ -168,6 +217,18 @@ def convert(box, src, dst, rt_mat=None, with_yaw=True):
         else:
             xyz = arr[..., :3] @ rt_mat.t()
 
+        # Note: we only use rotation in rt_mat
+        # so don't need to extend yaw_vector
+        if with_yaw and correct_yaw:
+            rot_yaw_vector = yaw_vector @ rt_mat[:3, :3].t()
+            if dst == Box3DMode.CAM:
+                yaw = torch.atan2(-rot_yaw_vector[:, [2]], rot_yaw_vector[:,
+                                                                          [0]])
+            elif dst in [Box3DMode.LIDAR, Box3DMode.DEPTH]:
+                yaw = torch.atan2(rot_yaw_vector[:, [1]], rot_yaw_vector[:,
+                                                                         [0]])
+            yaw = limit_period(yaw, period=np.pi * 2)
+
         if with_yaw:
             remains = arr[..., 7:]
             arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1)

diff --git a/mmdet3d/structures/bbox_3d/cam_box3d.py b/mmdet3d/structures/bbox_3d/cam_box3d.py
@@ -280,7 +280,7 @@ def height_overlaps(cls, boxes1, boxes2, mode='iou'):
         overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
         return overlaps_h
 
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self, dst, rt_mat=None, correct_yaw=False):
         """Convert self to ``dst`` mode.
 
         Args:
@@ -291,14 +291,19 @@ def convert_to(self, dst, rt_mat=None):
                 The conversion from ``src`` coordinates to ``dst`` coordinates
                 usually comes along the change of sensors, e.g., from camera
                 to LiDAR. This requires a transformation matrix.
-
+            correct_yaw (bool): If convert the yaw angle to the target
+                coordinate. Defaults to False.
         Returns:
             :obj:`BaseInstance3DBoxes`:
                 The converted box of the same type in the ``dst`` mode.
         """
         from .box_3d_mode import Box3DMode
         return Box3DMode.convert(
-            box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat)
+            box=self,
+            src=Box3DMode.CAM,
+            dst=dst,
+            rt_mat=rt_mat,
+            correct_yaw=correct_yaw)
 
     def points_in_boxes_part(self, points, boxes_override=None):
         """Find the box in which each point is.

diff --git a/mmdet3d/structures/bbox_3d/lidar_box3d.py b/mmdet3d/structures/bbox_3d/lidar_box3d.py
@@ -174,7 +174,7 @@ def flip(self, bev_direction='horizontal', points=None):
                 points.flip(bev_direction)
             return points
 
-    def convert_to(self, dst, rt_mat=None):
+    def convert_to(self, dst, rt_mat=None, correct_yaw=False):
         """Convert self to ``dst`` mode.
 
         Args:
@@ -185,14 +185,19 @@ def convert_to(self, dst, rt_mat=None):
                 The conversion from ``src`` coordinates to ``dst`` coordinates
                 usually comes along the change of sensors, e.g., from camera
                 to LiDAR. This requires a transformation matrix.
-
+            correct_yaw (bool): If convert the yaw angle to the target
+                coordinate. Defaults to False.
         Returns:
             :obj:`BaseInstance3DBoxes`:
                 The converted box of the same type in the ``dst`` mode.
         """
         from .box_3d_mode import Box3DMode
         return Box3DMode.convert(
-            box=self, src=Box3DMode.LIDAR, dst=dst, rt_mat=rt_mat)
+            box=self,
+            src=Box3DMode.LIDAR,
+            dst=dst,
+            rt_mat=rt_mat,
+            correct_yaw=correct_yaw)
 
     def enlarged_box(self, extra_width):
         """Enlarge the length, width and height boxes.

diff --git a/tools/create_data.py b/tools/create_data.py
@@ -191,7 +191,9 @@ def waymo_data_prep(root_path,
     """
     from tools.dataset_converters import waymo_converter as waymo
 
-    splits = ['training', 'validation', 'testing']
+    splits = [
+        'training', 'validation', 'testing', 'testing_3d_camera_only_detection'
+    ]
     for i, split in enumerate(splits):
         load_dir = osp.join(root_path, 'waymo_format', split)
         if split == 'validation':
@@ -203,7 +205,8 @@ def waymo_data_prep(root_path,
             save_dir,
             prefix=str(i),
             workers=workers,
-            test_mode=(split == 'testing'))
+            test_mode=(split
+                       in ['testing', 'testing_3d_camera_only_detection']))
         converter.convert()
     # Generate waymo infos
     out_dir = osp.join(out_dir, 'kitti_format')

diff --git a/tools/dataset_converters/kitti_data_utils.py b/tools/dataset_converters/kitti_data_utils.py
@@ -394,9 +394,18 @@ def gather_single(self, idx):
                 self.relative_path,
                 info_type='label_all',
                 use_prefix_id=True)
+            cam_sync_label_path = get_label_path(
+                idx,
+                self.path,
+                self.training,
+                self.relative_path,
+                info_type='cam_sync_label_all',
+                use_prefix_id=True)
             if self.relative_path:
                 label_path = str(root_path / label_path)
+                cam_sync_label_path = str(root_path / cam_sync_label_path)
             annotations = get_label_anno(label_path)
+            cam_sync_annotations = get_label_anno(cam_sync_label_path)
         info['image'] = image_info
         info['point_cloud'] = pc_info
         if self.calib:
@@ -437,16 +446,37 @@ def gather_single(self, idx):
             Tr_velo_to_cam = np.array([
                 float(info) for info in lines[6].split(' ')[1:13]
             ]).reshape([3, 4])
+            Tr_velo_to_cam1 = np.array([
+                float(info) for info in lines[7].split(' ')[1:13]
+            ]).reshape([3, 4])
+            Tr_velo_to_cam2 = np.array([
+                float(info) for info in lines[8].split(' ')[1:13]
+            ]).reshape([3, 4])
+            Tr_velo_to_cam3 = np.array([
+                float(info) for info in lines[9].split(' ')[1:13]
+            ]).reshape([3, 4])
+            Tr_velo_to_cam4 = np.array([
+                float(info) for info in lines[10].split(' ')[1:13]
+            ]).reshape([3, 4])
             if self.extend_matrix:
                 Tr_velo_to_cam = _extend_matrix(Tr_velo_to_cam)
+                Tr_velo_to_cam1 = _extend_matrix(Tr_velo_to_cam1)
+                Tr_velo_to_cam2 = _extend_matrix(Tr_velo_to_cam2)
+                Tr_velo_to_cam3 = _extend_matrix(Tr_velo_to_cam3)
+                Tr_velo_to_cam4 = _extend_matrix(Tr_velo_to_cam4)
             calib_info['P0'] = P0
             calib_info['P1'] = P1
             calib_info['P2'] = P2
             calib_info['P3'] = P3
             calib_info['P4'] = P4
             calib_info['R0_rect'] = rect_4x4
             calib_info['Tr_velo_to_cam'] = Tr_velo_to_cam
+            calib_info['Tr_velo_to_cam1'] = Tr_velo_to_cam1
+            calib_info['Tr_velo_to_cam2'] = Tr_velo_to_cam2
+            calib_info['Tr_velo_to_cam3'] = Tr_velo_to_cam3
+            calib_info['Tr_velo_to_cam4'] = Tr_velo_to_cam4
             info['calib'] = calib_info
+
         if self.pose:
             pose_path = get_pose_path(
                 idx,
@@ -460,6 +490,13 @@ def gather_single(self, idx):
             info['annos'] = annotations
             info['annos']['camera_id'] = info['annos'].pop('score')
             add_difficulty_to_annos(info)
+            info['cam_sync_annos'] = cam_sync_annotations
+            # NOTE: the 2D labels do not have strict correspondence with
+            # the projected 2D lidar labels
+            # e.g.: the projected 2D labels can be in camera 2
+            # while the most_visible_camera can have id 4
+            info['cam_sync_annos']['camera_id'] = info['cam_sync_annos'].pop(
+                'score')
 
         sweeps = []
         prev_idx = idx

diff --git a/tools/dataset_converters/update_infos_to_v2.py b/tools/dataset_converters/update_infos_to_v2.py
@@ -195,7 +195,7 @@ def clear_data_info_unused_keys(data_info):
     empty_flag = True
     for key in keys:
         # we allow no annotations in datainfo
-        if key == 'instances':
+        if key in ['instances', 'cam_sync_instances', 'cam_instances']:
             empty_flag = False
             continue
         if isinstance(data_info[key], list):
@@ -1056,4 +1056,4 @@ def update_pkl_infos(dataset, out_dir, pkl_path):
     if args.out_dir is None:
         args.out_dir = args.root_dir
     update_pkl_infos(
-        dataset=args.dataset, out_dir=args.out_dir, pkl_path=args.pkl_path)
+        dataset=args.dataset, out_dir=args.out_dir, pkl_path=args.pkl)