From c707568fa5f2e90bcae1955eba26efcf4c919058 Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Thu, 9 Feb 2023 10:38:39 +0800
Subject: [PATCH 01/22] support polarmix

---
 mmdet3d/datasets/transforms/transforms_3d.py | 146 ++++++++++++++++++-
 1 file changed, 145 insertions(+), 1 deletion(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 495f0c1e2d..0dd82ae35d 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -9,7 +9,8 @@
 from mmcv.transforms import BaseTransform, Compose, RandomResize, Resize
 from mmdet.datasets.transforms import (PhotoMetricDistortion, RandomCrop,
                                        RandomFlip)
-from mmengine import is_tuple_of
+from mmengine import is_list_of, is_tuple_of
+from mmengine.dataset import BaseDataset
 
 from mmdet3d.models.task_modules import VoxelGenerator
 from mmdet3d.registry import TRANSFORMS
@@ -2352,3 +2353,146 @@ def transform(self, input_dict: dict) -> dict:
             if len(input_dict[key]) == 0:
                 input_dict.pop(key)
         return input_dict
+
+
+@TRANSFORMS.register_module()
+class PolarMix(BaseTransform):
+    """PolarMix data augmentation.
+
+    Required Keys:
+
+    - points (:obj:`BasePoints`)
+    - pts_semantic_mask (np.int64)
+    - mix_results (List[dict])
+
+    Modified Keys:
+
+    - points (:obj:`BasePoints`)
+    - pts_semantic_mask (np.int64)
+
+    Args:
+        instance_classes (List[int]): Semantic masks which represent the
+            instance.
+        swap_ratio (float): Swap ratio of two point cloud. Defaults to 0.5.
+    """
+
+    def __init__(self,
+                 instance_classes: List[int],
+                 swap_ratio: float = 0.5) -> None:
+        assert is_list_of(instance_classes, int)
+        self.instance_classes = instance_classes
+        self.swap_ratio = swap_ratio
+        self.omega = [
+            np.random.random() * np.pi * 2 / 3,
+            (np.random.random() + 1) * np.pi * 2 / 3
+        ]
+
+    def get_indexes(self, dataset: BaseDataset) -> int:
+        """Call function to collect indexes.
+
+        Args:
+            dataset (:obj:`BaseDataset`): The dataset.
+
+        Returns:
+            int: Index.
+        """
+        index = random.randint(0, len(dataset))
+        return index
+
+    def transform(self, input_dict: dict) -> dict:
+        """PolarMix transform function.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: output dict after transformtaion
+        """
+
+        assert 'mix_results' in input_dict
+        assert len(input_dict['mix_results']) == 1, \
+            'MixUp only support 2 point cloud now!'
+
+        retrieve_results = input_dict['mix_results'][0]
+        retrieve_points = retrieve_results['points']
+        retrieve_points_numpy = retrieve_points.tensor.numpy()
+        retrieve_pts_semantic_mask = retrieve_results['pts_semantic_mask']
+
+        points = input_dict['points']
+        attribute_dims = points.attribute_dims
+
+        points_numpy = points.tensor.numpy()
+        pts_semantic_mask = retrieve_results['pts_semantic_mask']
+
+        point_type = type(points)
+
+        # 1. swap point cloud
+        if np.random.random() < self.swap_ratio:
+            start_angle = (np.random.random() - 1) * np.pi  # -pi~pi
+            end_angle = start_angle + np.pi
+            # calculate horizontal angle for each point
+            yaw = -np.arctan2(points_numpy[:, 1], points_numpy[:, 0])
+            retrieve_yaw = -np.arctan2(retrieve_points_numpy[:, 1],
+                                       retrieve_points_numpy[:, 0])
+
+            # select points in sector
+            idx = np.where((yaw > start_angle) & (yaw < end_angle))
+            retrieve_idx = np.where((retrieve_yaw > start_angle)
+                                    & (retrieve_yaw < end_angle))
+
+            # swap
+            points_numpy = np.delete(points_numpy, idx, axis=0)
+            points_numpy = np.concatenate(
+                (points_numpy, retrieve_points_numpy[retrieve_idx]), axis=0)
+            pts_semantic_mask = np.delete(pts_semantic_mask, idx, axis=0)
+            pts_semantic_mask = np.concatenate(
+                (pts_semantic_mask, retrieve_pts_semantic_mask[retrieve_idx]),
+                axis=0)
+
+        # 2. rotate-pasting
+        # extract instance points
+        instance_points, instance_pts_semantic_mask = [], []
+        for instance_class in self.instance_classes:
+            retrieve_idx = np.where(
+                (retrieve_pts_semantic_mask == instance_class))
+            instance_points.append(retrieve_points_numpy[retrieve_idx])
+            instance_pts_semantic_mask.append(
+                retrieve_pts_semantic_mask[retrieve_idx])
+        instance_points = np.concatenate(instance_points, axis=0)
+        instance_pts_semantic_mask = np.concatenate(
+            instance_pts_semantic_mask, axis=0)
+
+        # rotate-copy
+        copy_points = [instance_points]
+        copy_pts_semantic_mask = [instance_pts_semantic_mask]
+        for omega in self.omega:
+            rot_mat = np.array([[np.cos(omega),
+                                 np.sin(omega), 0],
+                                [-np.sin(omega),
+                                 np.cos(omega), 0], [0, 0, 1]])
+            new_points = np.zeros_like(instance_points)
+            new_points[:, :3] = np.dot(instance_points[:, :3], rot_mat)
+            new_points[:, 3:] = instance_points[:, 3:]
+            copy_points.append(new_points)
+            copy_pts_semantic_mask.append(instance_pts_semantic_mask)
+        copy_points = np.concatenate(copy_points, axis=0)
+        copy_pts_semantic_mask = np.concatenate(copy_pts_semantic_mask, axis=0)
+
+        points_numpy = np.concatenate((points_numpy, copy_points), axis=0)
+        points = point_type(
+            points_numpy,
+            points_dim=points_numpy.shape[1],
+            attribute_dims=attribute_dims)
+        pts_semantic_mask = np.concatenate(
+            (pts_semantic_mask, copy_pts_semantic_mask), axis=0)
+
+        input_dict['points'] = points
+        input_dict['pts_semantic_mask'] = pts_semantic_mask
+        return input_dict
+
+    def __repr__(self) -> dict:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(instance_classes={self.instance_classes}, '
+        repr_str += f'swap_ratio={self.swap_ratio})'
+        return repr_str

From a98e024fe9c20a2c1f2b3573b55a25e8b9442427 Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Thu, 9 Feb 2023 10:39:07 +0800
Subject: [PATCH 02/22] Update __init__.py

---
 mmdet3d/datasets/transforms/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mmdet3d/datasets/transforms/__init__.py b/mmdet3d/datasets/transforms/__init__.py
index 72d5a8f42b..c8969f8b60 100644
--- a/mmdet3d/datasets/transforms/__init__.py
+++ b/mmdet3d/datasets/transforms/__init__.py
@@ -14,7 +14,7 @@
                             MultiViewWrapper, ObjectNameFilter, ObjectNoise,
                             ObjectRangeFilter, ObjectSample,
                             PhotoMetricDistortion3D, PointSample, PointShuffle,
-                            PointsRangeFilter, RandomDropPointsColor,
+                            PointsRangeFilter, PolarMix, RandomDropPointsColor,
                             RandomFlip3D, RandomJitterPoints, RandomResize3D,
                             RandomShiftScale, Resize3D, VoxelBasedPointSampler)
 
@@ -30,5 +30,5 @@
     'RandomDropPointsColor', 'RandomJitterPoints', 'AffineResize',
     'RandomShiftScale', 'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
     'MultiViewWrapper', 'PhotoMetricDistortion3D', 'MonoDet3DInferencerLoader',
-    'LidarDet3DInferencerLoader'
+    'LidarDet3DInferencerLoader', 'PolarMix'
 ]

From 606f5468741e2323a58881ec13a379d7dcc9096d Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Thu, 9 Feb 2023 10:39:45 +0800
Subject: [PATCH 03/22] add UT

---
 .../test_transforms/test_transforms_3d.py     | 35 ++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/tests/test_datasets/test_transforms/test_transforms_3d.py b/tests/test_datasets/test_transforms/test_transforms_3d.py
index 81e222000c..175451c3ee 100644
--- a/tests/test_datasets/test_transforms/test_transforms_3d.py
+++ b/tests/test_datasets/test_transforms/test_transforms_3d.py
@@ -7,7 +7,8 @@
 from mmengine.testing import assert_allclose
 
 from mmdet3d.datasets import GlobalAlignment, RandomFlip3D
-from mmdet3d.datasets.transforms import GlobalRotScaleTrans
+from mmdet3d.datasets.transforms import GlobalRotScaleTrans, PolarMix
+from mmdet3d.structures import BasePoints
 from mmdet3d.testing import create_data_info_after_loading
 
 
@@ -99,3 +100,35 @@ def test_global_alignment(self):
         # assert the rot metric
         with self.assertRaises(AssertionError):
             global_align_transform(data_info)
+
+
+class TestPolarMix(unittest.TestCase):
+
+    def setUp(self):
+        points = np.random.random((100, 4))
+        self.results = {
+            'points': BasePoints(points, points_dim=4),
+            'pts_semantic_mask': np.random.randint(0, 5, (100, ))
+        }
+
+    def test_transform(self):
+        # test assertion for invalid instance_classes
+        with self.assertRaises(AssertionError):
+            transform = PolarMix(instance_classes=1)
+
+        with self.assertRaises(AssertionError):
+            transform = PolarMix(instance_classes=[1.0, 2.0])
+
+        transform = PolarMix(instance_classes=[1, 2])
+        # test assertion for invalid mix_results
+        with self.assertRaises(AssertionError):
+            results = transform(copy.deepcopy(self.results))
+
+        with self.assertRaises(AssertionError):
+            self.results['mix_results'] = [copy.deepcopy(self.results)] * 2
+            results = transform(copy.deepcopy(self.results))
+
+        self.results['mix_results'] = [copy.deepcopy(self.results)]
+        results = transform(copy.deepcopy(self.results))
+        self.assertTrue(results['points'].shape[0] ==
+                        results['pts_semantic_mask'].shape[0])

From a4346f540dcafe127ee4b44ffc66dcb3255be32a Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Mon, 13 Feb 2023 11:02:09 +0800
Subject: [PATCH 04/22] use `BasePoints` instead of numpy

---
 mmdet3d/datasets/transforms/transforms_3d.py | 105 +++++++++----------
 1 file changed, 47 insertions(+), 58 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 0dd82ae35d..a729f5739c 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -6,6 +6,7 @@
 import cv2
 import mmcv
 import numpy as np
+import torch
 from mmcv.transforms import BaseTransform, Compose, RandomResize, Resize
 from mmdet.datasets.transforms import (PhotoMetricDistortion, RandomCrop,
                                        RandomFlip)
@@ -2374,18 +2375,17 @@ class PolarMix(BaseTransform):
         instance_classes (List[int]): Semantic masks which represent the
             instance.
         swap_ratio (float): Swap ratio of two point cloud. Defaults to 0.5.
+        rotate_paste_ratio (float): Rotate paste ratio. Defaults to 1.0.
     """
 
     def __init__(self,
                  instance_classes: List[int],
-                 swap_ratio: float = 0.5) -> None:
+                 swap_ratio: float = 0.5,
+                 rotate_paste_ratio: float = 1.0) -> None:
         assert is_list_of(instance_classes, int)
         self.instance_classes = instance_classes
         self.swap_ratio = swap_ratio
-        self.omega = [
-            np.random.random() * np.pi * 2 / 3,
-            (np.random.random() + 1) * np.pi * 2 / 3
-        ]
+        self.rotate_paste_ratio = rotate_paste_ratio
 
     def get_indexes(self, dataset: BaseDataset) -> int:
         """Call function to collect indexes.
@@ -2415,76 +2415,65 @@ def transform(self, input_dict: dict) -> dict:
 
         retrieve_results = input_dict['mix_results'][0]
         retrieve_points = retrieve_results['points']
-        retrieve_points_numpy = retrieve_points.tensor.numpy()
         retrieve_pts_semantic_mask = retrieve_results['pts_semantic_mask']
 
         points = input_dict['points']
-        attribute_dims = points.attribute_dims
-
-        points_numpy = points.tensor.numpy()
-        pts_semantic_mask = retrieve_results['pts_semantic_mask']
-
-        point_type = type(points)
+        pts_semantic_mask = input_dict['pts_semantic_mask']
 
         # 1. swap point cloud
         if np.random.random() < self.swap_ratio:
             start_angle = (np.random.random() - 1) * np.pi  # -pi~pi
             end_angle = start_angle + np.pi
             # calculate horizontal angle for each point
-            yaw = -np.arctan2(points_numpy[:, 1], points_numpy[:, 0])
-            retrieve_yaw = -np.arctan2(retrieve_points_numpy[:, 1],
-                                       retrieve_points_numpy[:, 0])
+            yaw = torch.atan2(points.coord[:, 1], points.coord[:, 0])
+            retrieve_yaw = torch.atan2(retrieve_points.coord[:, 1],
+                                       retrieve_points.coord[:, 0])
 
             # select points in sector
-            idx = np.where((yaw > start_angle) & (yaw < end_angle))
-            retrieve_idx = np.where((retrieve_yaw > start_angle)
-                                    & (retrieve_yaw < end_angle))
+            idx = (yaw <= start_angle) | (yaw >= end_angle)
+            retrieve_idx = (retrieve_yaw > start_angle) & (
+                retrieve_yaw < end_angle)
 
             # swap
-            points_numpy = np.delete(points_numpy, idx, axis=0)
-            points_numpy = np.concatenate(
-                (points_numpy, retrieve_points_numpy[retrieve_idx]), axis=0)
-            pts_semantic_mask = np.delete(pts_semantic_mask, idx, axis=0)
+            points = points[idx]
+            points = points.cat([points, retrieve_points[retrieve_idx]])
             pts_semantic_mask = np.concatenate(
-                (pts_semantic_mask, retrieve_pts_semantic_mask[retrieve_idx]),
+                (pts_semantic_mask[idx.numpy()],
+                 retrieve_pts_semantic_mask[retrieve_idx.numpy()]),
                 axis=0)
 
         # 2. rotate-pasting
-        # extract instance points
-        instance_points, instance_pts_semantic_mask = [], []
-        for instance_class in self.instance_classes:
-            retrieve_idx = np.where(
-                (retrieve_pts_semantic_mask == instance_class))
-            instance_points.append(retrieve_points_numpy[retrieve_idx])
-            instance_pts_semantic_mask.append(
-                retrieve_pts_semantic_mask[retrieve_idx])
-        instance_points = np.concatenate(instance_points, axis=0)
-        instance_pts_semantic_mask = np.concatenate(
-            instance_pts_semantic_mask, axis=0)
-
-        # rotate-copy
-        copy_points = [instance_points]
-        copy_pts_semantic_mask = [instance_pts_semantic_mask]
-        for omega in self.omega:
-            rot_mat = np.array([[np.cos(omega),
-                                 np.sin(omega), 0],
-                                [-np.sin(omega),
-                                 np.cos(omega), 0], [0, 0, 1]])
-            new_points = np.zeros_like(instance_points)
-            new_points[:, :3] = np.dot(instance_points[:, :3], rot_mat)
-            new_points[:, 3:] = instance_points[:, 3:]
-            copy_points.append(new_points)
-            copy_pts_semantic_mask.append(instance_pts_semantic_mask)
-        copy_points = np.concatenate(copy_points, axis=0)
-        copy_pts_semantic_mask = np.concatenate(copy_pts_semantic_mask, axis=0)
-
-        points_numpy = np.concatenate((points_numpy, copy_points), axis=0)
-        points = point_type(
-            points_numpy,
-            points_dim=points_numpy.shape[1],
-            attribute_dims=attribute_dims)
-        pts_semantic_mask = np.concatenate(
-            (pts_semantic_mask, copy_pts_semantic_mask), axis=0)
+        if np.random.random() < self.rotate_paste_ratio:
+            # extract instance points
+            instance_points, instance_pts_semantic_mask = [], []
+            for instance_class in self.instance_classes:
+                retrieve_idx = retrieve_pts_semantic_mask == instance_class
+                instance_points.append(retrieve_points[retrieve_idx])
+                instance_pts_semantic_mask.append(
+                    retrieve_pts_semantic_mask[retrieve_idx])
+            instance_points = retrieve_points.cat(instance_points)
+            instance_pts_semantic_mask = np.concatenate(
+                instance_pts_semantic_mask, axis=0)
+
+            # rotate-copy
+            copy_points = [instance_points]
+            copy_pts_semantic_mask = [instance_pts_semantic_mask]
+            angle_list = [
+                np.random.random() * np.pi * 2 / 3,
+                (np.random.random() + 1) * np.pi * 2 / 3
+            ]
+            for angle in angle_list:
+                new_points = instance_points.clone()
+                new_points.rotate(angle)
+                copy_points.append(new_points)
+                copy_pts_semantic_mask.append(instance_pts_semantic_mask)
+            copy_points = instance_points.cat(copy_points)
+            copy_pts_semantic_mask = np.concatenate(
+                copy_pts_semantic_mask, axis=0)
+
+            points = points.cat([points, copy_points])
+            pts_semantic_mask = np.concatenate(
+                (pts_semantic_mask, copy_pts_semantic_mask), axis=0)
 
         input_dict['points'] = points
         input_dict['pts_semantic_mask'] = pts_semantic_mask

From 915fd06d36e36a044e27c08edad15e53983945ea Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Mon, 13 Feb 2023 11:03:22 +0800
Subject: [PATCH 05/22] Update transforms_3d.py

---
 mmdet3d/datasets/transforms/transforms_3d.py | 2613 +-----------------
 1 file changed, 130 insertions(+), 2483 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index a729f5739c..59fda6f834 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -1,2487 +1,134 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import random
-import warnings
-from typing import List, Optional, Tuple, Union
+import copy
+import unittest
 
-import cv2
-import mmcv
 import numpy as np
 import torch
-from mmcv.transforms import BaseTransform, Compose, RandomResize, Resize
-from mmdet.datasets.transforms import (PhotoMetricDistortion, RandomCrop,
-                                       RandomFlip)
-from mmengine import is_list_of, is_tuple_of
-from mmengine.dataset import BaseDataset
-
-from mmdet3d.models.task_modules import VoxelGenerator
-from mmdet3d.registry import TRANSFORMS
-from mmdet3d.structures import (CameraInstance3DBoxes, DepthInstance3DBoxes,
-                                LiDARInstance3DBoxes)
-from mmdet3d.structures.ops import box_np_ops
-from mmdet3d.structures.points import BasePoints
-from .data_augment_utils import noise_per_object_v3_
-
-
-@TRANSFORMS.register_module()
-class RandomDropPointsColor(BaseTransform):
-    r"""Randomly set the color of points to all zeros.
-
-    Once this transform is executed, all the points' color will be dropped.
-    Refer to `PAConv <https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/
-    util/transform.py#L223>`_ for more details.
-
-    Args:
-        drop_ratio (float): The probability of dropping point colors.
-            Defaults to 0.2.
-    """
-
-    def __init__(self, drop_ratio: float = 0.2) -> None:
-        assert isinstance(drop_ratio, (int, float)) and 0 <= drop_ratio <= 1, \
-            f'invalid drop_ratio value {drop_ratio}'
-        self.drop_ratio = drop_ratio
-
-    def transform(self, input_dict: dict) -> dict:
-        """Call function to drop point colors.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after color dropping, 'points' key is updated
-            in the result dict.
-        """
-        points = input_dict['points']
-        assert points.attribute_dims is not None and \
-            'color' in points.attribute_dims, \
-            'Expect points have color attribute'
-
-        # this if-expression is a bit strange
-        # `RandomDropPointsColor` is used in training 3D segmentor PAConv
-        # we discovered in our experiments that, using
-        # `if np.random.rand() > 1.0 - self.drop_ratio` consistently leads to
-        # better results than using `if np.random.rand() < self.drop_ratio`
-        # so we keep this hack in our codebase
-        if np.random.rand() > 1.0 - self.drop_ratio:
-            points.color = points.color * 0.0
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(drop_ratio={self.drop_ratio})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class RandomFlip3D(RandomFlip):
-    """Flip the points & bbox.
-
-    If the input dict contains the key "flip", then the flag will be used,
-    otherwise it will be randomly decided by a ratio specified in the init
-    method.
-
-    Required Keys:
-
-    - points (np.float32)
-    - gt_bboxes_3d (np.float32)
-
-    Modified Keys:
-
-    - points (np.float32)
-    - gt_bboxes_3d (np.float32)
-
-    Added Keys:
-
-    - points (np.float32)
-    - pcd_trans (np.float32)
-    - pcd_rotation (np.float32)
-    - pcd_rotation_angle (np.float32)
-    - pcd_scale_factor (np.float32)
-
-    Args:
-        sync_2d (bool): Whether to apply flip according to the 2D
-            images. If True, it will apply the same flip as that to 2D images.
-            If False, it will decide whether to flip randomly and independently
-            to that of 2D images. Defaults to True.
-        flip_ratio_bev_horizontal (float): The flipping probability
-            in horizontal direction. Defaults to 0.0.
-        flip_ratio_bev_vertical (float): The flipping probability
-            in vertical direction. Defaults to 0.0.
-        flip_box3d (bool): Whether to flip bounding box. In most of the case,
-            the box should be fliped. In cam-based bev detection, this is set
-            to False, since the flip of 2D images does not influence the 3D
-            box. Defaults to True.
-    """
-
-    def __init__(self,
-                 sync_2d: bool = True,
-                 flip_ratio_bev_horizontal: float = 0.0,
-                 flip_ratio_bev_vertical: float = 0.0,
-                 flip_box3d: bool = True,
-                 **kwargs) -> None:
-        # `flip_ratio_bev_horizontal` is equal to
-        # for flip prob of 2d image when
-        # `sync_2d` is True
-        super(RandomFlip3D, self).__init__(
-            prob=flip_ratio_bev_horizontal, direction='horizontal', **kwargs)
-        self.sync_2d = sync_2d
-        self.flip_ratio_bev_horizontal = flip_ratio_bev_horizontal
-        self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
-        self.flip_box3d = flip_box3d
-        if flip_ratio_bev_horizontal is not None:
-            assert isinstance(
-                flip_ratio_bev_horizontal,
-                (int, float)) and 0 <= flip_ratio_bev_horizontal <= 1
-        if flip_ratio_bev_vertical is not None:
-            assert isinstance(
-                flip_ratio_bev_vertical,
-                (int, float)) and 0 <= flip_ratio_bev_vertical <= 1
-
-    def random_flip_data_3d(self,
-                            input_dict: dict,
-                            direction: str = 'horizontal') -> None:
-        """Flip 3D data randomly.
-
-        `random_flip_data_3d` should take these situations into consideration:
-
-        - 1. LIDAR-based 3d detection
-        - 2. LIDAR-based 3d segmentation
-        - 3. vision-only detection
-        - 4. multi-modality 3d detection.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-            direction (str): Flip direction. Defaults to 'horizontal'.
-
-        Returns:
-            dict: Flipped results, 'points', 'bbox3d_fields' keys are
-            updated in the result dict.
-        """
-        assert direction in ['horizontal', 'vertical']
-        if self.flip_box3d:
-            if 'gt_bboxes_3d' in input_dict:
-                if 'points' in input_dict:
-                    input_dict['points'] = input_dict['gt_bboxes_3d'].flip(
-                        direction, points=input_dict['points'])
-                else:
-                    # vision-only detection
-                    input_dict['gt_bboxes_3d'].flip(direction)
-            else:
-                input_dict['points'].flip(direction)
-
-        if 'centers_2d' in input_dict:
-            assert self.sync_2d is True and direction == 'horizontal', \
-                'Only support sync_2d=True and horizontal flip with images'
-            w = input_dict['img_shape'][1]
-            input_dict['centers_2d'][..., 0] = \
-                w - input_dict['centers_2d'][..., 0]
-            # need to modify the horizontal position of camera center
-            # along u-axis in the image (flip like centers2d)
-            # ['cam2img'][0][2] = c_u
-            # see more details and examples at
-            # https://github.com/open-mmlab/mmdetection3d/pull/744
-            input_dict['cam2img'][0][2] = w - input_dict['cam2img'][0][2]
-
-    def _flip_on_direction(self, results: dict) -> None:
-        """Function to flip images, bounding boxes, semantic segmentation map
-        and keypoints.
-
-        Add the override feature that if 'flip' is already in results, use it
-        to do the augmentation.
-        """
-        if 'flip' not in results:
-            cur_dir = self._choose_direction()
-        else:
-            cur_dir = results['flip_direction']
-        if cur_dir is None:
-            results['flip'] = False
-            results['flip_direction'] = None
-        else:
-            results['flip'] = True
-            results['flip_direction'] = cur_dir
-            self._flip(results)
-
-    def transform(self, input_dict: dict) -> dict:
-        """Call function to flip points, values in the ``bbox3d_fields`` and
-        also flip 2D image and its annotations.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Flipped results, 'flip', 'flip_direction',
-            'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added
-            into result dict.
-        """
-        # flip 2D image and its annotations
-        if 'img' in input_dict:
-            super(RandomFlip3D, self).transform(input_dict)
-
-        if self.sync_2d and 'img' in input_dict:
-            input_dict['pcd_horizontal_flip'] = input_dict['flip']
-            input_dict['pcd_vertical_flip'] = False
-        else:
-            if 'pcd_horizontal_flip' not in input_dict:
-                flip_horizontal = True if np.random.rand(
-                ) < self.flip_ratio_bev_horizontal else False
-                input_dict['pcd_horizontal_flip'] = flip_horizontal
-            if 'pcd_vertical_flip' not in input_dict:
-                flip_vertical = True if np.random.rand(
-                ) < self.flip_ratio_bev_vertical else False
-                input_dict['pcd_vertical_flip'] = flip_vertical
-
-        if 'transformation_3d_flow' not in input_dict:
-            input_dict['transformation_3d_flow'] = []
-
-        if input_dict['pcd_horizontal_flip']:
-            self.random_flip_data_3d(input_dict, 'horizontal')
-            input_dict['transformation_3d_flow'].extend(['HF'])
-        if input_dict['pcd_vertical_flip']:
-            self.random_flip_data_3d(input_dict, 'vertical')
-            input_dict['transformation_3d_flow'].extend(['VF'])
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(sync_2d={self.sync_2d},'
-        repr_str += f' flip_ratio_bev_vertical={self.flip_ratio_bev_vertical})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class RandomJitterPoints(BaseTransform):
-    """Randomly jitter point coordinates.
-
-    Different from the global translation in ``GlobalRotScaleTrans``, here we
-    apply different noises to each point in a scene.
-
-    Args:
-        jitter_std (list[float]): The standard deviation of jittering noise.
-            This applies random noise to all points in a 3D scene, which is
-            sampled from a gaussian distribution whose standard deviation is
-            set by ``jitter_std``. Defaults to [0.01, 0.01, 0.01]
-        clip_range (list[float]): Clip the randomly generated jitter
-            noise into this range. If None is given, don't perform clipping.
-            Defaults to [-0.05, 0.05]
-
-    Note:
-        This transform should only be used in point cloud segmentation tasks
-        because we don't transform ground-truth bboxes accordingly.
-        For similar transform in detection task, please refer to `ObjectNoise`.
-    """
-
-    def __init__(self,
-                 jitter_std: List[float] = [0.01, 0.01, 0.01],
-                 clip_range: List[float] = [-0.05, 0.05]) -> None:
-        seq_types = (list, tuple, np.ndarray)
-        if not isinstance(jitter_std, seq_types):
-            assert isinstance(jitter_std, (int, float)), \
-                f'unsupported jitter_std type {type(jitter_std)}'
-            jitter_std = [jitter_std, jitter_std, jitter_std]
-        self.jitter_std = jitter_std
-
-        if clip_range is not None:
-            if not isinstance(clip_range, seq_types):
-                assert isinstance(clip_range, (int, float)), \
-                    f'unsupported clip_range type {type(clip_range)}'
-                clip_range = [-clip_range, clip_range]
-        self.clip_range = clip_range
-
-    def transform(self, input_dict: dict) -> dict:
-        """Call function to jitter all the points in the scene.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after adding noise to each point,
-            'points' key is updated in the result dict.
-        """
-        points = input_dict['points']
-        jitter_std = np.array(self.jitter_std, dtype=np.float32)
-        jitter_noise = \
-            np.random.randn(points.shape[0], 3) * jitter_std[None, :]
-        if self.clip_range is not None:
-            jitter_noise = np.clip(jitter_noise, self.clip_range[0],
-                                   self.clip_range[1])
-
-        points.translate(jitter_noise)
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(jitter_std={self.jitter_std},'
-        repr_str += f' clip_range={self.clip_range})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class ObjectSample(BaseTransform):
-    """Sample GT objects to the data.
-
-    Required Keys:
-
-    - points
-    - ann_info
-    - gt_bboxes_3d
-    - gt_labels_3d
-    - img (optional)
-    - gt_bboxes (optional)
-
-    Modified Keys:
-
-    - points
-    - gt_bboxes_3d
-    - gt_labels_3d
-    - img (optional)
-    - gt_bboxes (optional)
-
-    Added Keys:
-
-    - plane (optional)
-
-    Args:
-        db_sampler (dict): Config dict of the database sampler.
-        sample_2d (bool): Whether to also paste 2D image patch to the images.
-            This should be true when applying multi-modality cut-and-paste.
-            Defaults to False.
-        use_ground_plane (bool): Whether to use ground plane to adjust the
-            3D labels. Defaults to False.
-    """
-
-    def __init__(self,
-                 db_sampler: dict,
-                 sample_2d: bool = False,
-                 use_ground_plane: bool = False) -> None:
-        self.sampler_cfg = db_sampler
-        self.sample_2d = sample_2d
-        if 'type' not in db_sampler.keys():
-            db_sampler['type'] = 'DataBaseSampler'
-        self.db_sampler = TRANSFORMS.build(db_sampler)
-        self.use_ground_plane = use_ground_plane
-        self.disabled = False
-
-    @staticmethod
-    def remove_points_in_boxes(points: BasePoints,
-                               boxes: np.ndarray) -> np.ndarray:
-        """Remove the points in the sampled bounding boxes.
-
-        Args:
-            points (:obj:`BasePoints`): Input point cloud array.
-            boxes (np.ndarray): Sampled ground truth boxes.
-
-        Returns:
-            np.ndarray: Points with those in the boxes removed.
-        """
-        masks = box_np_ops.points_in_rbbox(points.coord.numpy(), boxes)
-        points = points[np.logical_not(masks.any(-1))]
-        return points
-
-    def transform(self, input_dict: dict) -> dict:
-        """Transform function to sample ground truth objects to the data.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after object sampling augmentation,
-            'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated
-            in the result dict.
-        """
-        if self.disabled:
-            return input_dict
-
-        gt_bboxes_3d = input_dict['gt_bboxes_3d']
-        gt_labels_3d = input_dict['gt_labels_3d']
-
-        if self.use_ground_plane:
-            ground_plane = input_dict.get('plane', None)
-            assert ground_plane is not None, '`use_ground_plane` is True ' \
-                                             'but find plane is None'
-        else:
-            ground_plane = None
-        # change to float for blending operation
-        points = input_dict['points']
-        if self.sample_2d:
-            img = input_dict['img']
-            gt_bboxes_2d = input_dict['gt_bboxes']
-            # Assume for now 3D & 2D bboxes are the same
-            sampled_dict = self.db_sampler.sample_all(
-                gt_bboxes_3d.tensor.numpy(),
-                gt_labels_3d,
-                gt_bboxes_2d=gt_bboxes_2d,
-                img=img)
-        else:
-            sampled_dict = self.db_sampler.sample_all(
-                gt_bboxes_3d.tensor.numpy(),
-                gt_labels_3d,
-                img=None,
-                ground_plane=ground_plane)
-
-        if sampled_dict is not None:
-            sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
-            sampled_points = sampled_dict['points']
-            sampled_gt_labels = sampled_dict['gt_labels_3d']
-
-            gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],
-                                          axis=0)
-            gt_bboxes_3d = gt_bboxes_3d.new_box(
-                np.concatenate(
-                    [gt_bboxes_3d.tensor.numpy(), sampled_gt_bboxes_3d]))
-
-            points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
-            # check the points dimension
-            points = points.cat([sampled_points, points])
-
-            if self.sample_2d:
-                sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
-                gt_bboxes_2d = np.concatenate(
-                    [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
-
-                input_dict['gt_bboxes'] = gt_bboxes_2d
-                input_dict['img'] = sampled_dict['img']
-
-        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
-        input_dict['gt_labels_3d'] = gt_labels_3d.astype(np.int64)
-        input_dict['points'] = points
-
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(db_sampler={self.db_sampler},'
-        repr_str += f' sample_2d={self.sample_2d},'
-        repr_str += f' use_ground_plane={self.use_ground_plane})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class ObjectNoise(BaseTransform):
-    """Apply noise to each GT objects in the scene.
-
-    Required Keys:
-
-    - points
-    - gt_bboxes_3d
-
-    Modified Keys:
-
-    - points
-    - gt_bboxes_3d
-
-    Args:
-        translation_std (list[float]): Standard deviation of the
-            distribution where translation noise are sampled from.
-            Defaults to [0.25, 0.25, 0.25].
-        global_rot_range (list[float]): Global rotation to the scene.
-            Defaults to [0.0, 0.0].
-        rot_range (list[float]): Object rotation range.
-            Defaults to [-0.15707963267, 0.15707963267].
-        num_try (int): Number of times to try if the noise applied is invalid.
-            Defaults to 100.
-    """
-
-    def __init__(self,
-                 translation_std: List[float] = [0.25, 0.25, 0.25],
-                 global_rot_range: List[float] = [0.0, 0.0],
-                 rot_range: List[float] = [-0.15707963267, 0.15707963267],
-                 num_try: int = 100) -> None:
-        self.translation_std = translation_std
-        self.global_rot_range = global_rot_range
-        self.rot_range = rot_range
-        self.num_try = num_try
-
-    def transform(self, input_dict: dict) -> dict:
-        """Transform function to apply noise to each ground truth in the scene.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after adding noise to each object,
-            'points', 'gt_bboxes_3d' keys are updated in the result dict.
-        """
-        gt_bboxes_3d = input_dict['gt_bboxes_3d']
-        points = input_dict['points']
-
-        # TODO: this is inplace operation
-        numpy_box = gt_bboxes_3d.tensor.numpy()
-        numpy_points = points.tensor.numpy()
-
-        noise_per_object_v3_(
-            numpy_box,
-            numpy_points,
-            rotation_perturb=self.rot_range,
-            center_noise_std=self.translation_std,
-            global_random_rot_range=self.global_rot_range,
-            num_try=self.num_try)
-
-        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)
-        input_dict['points'] = points.new_point(numpy_points)
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(num_try={self.num_try},'
-        repr_str += f' translation_std={self.translation_std},'
-        repr_str += f' global_rot_range={self.global_rot_range},'
-        repr_str += f' rot_range={self.rot_range})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class GlobalAlignment(BaseTransform):
-    """Apply global alignment to 3D scene points by rotation and translation.
-
-    Args:
-        rotation_axis (int): Rotation axis for points and bboxes rotation.
-
-    Note:
-        We do not record the applied rotation and translation as in
-        GlobalRotScaleTrans. Because usually, we do not need to reverse
-        the alignment step.
-        For example, ScanNet 3D detection task uses aligned ground-truth
-        bounding boxes for evaluation.
-    """
-
-    def __init__(self, rotation_axis: int) -> None:
-        self.rotation_axis = rotation_axis
-
-    def _trans_points(self, results: dict, trans_factor: np.ndarray) -> None:
-        """Private function to translate points.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-            trans_factor (np.ndarray): Translation vector to be applied.
-
-        Returns:
-            dict: Results after translation, 'points' is updated in the dict.
-        """
-        results['points'].translate(trans_factor)
-
-    def _rot_points(self, results: dict, rot_mat: np.ndarray) -> None:
-        """Private function to rotate bounding boxes and points.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-            rot_mat (np.ndarray): Rotation matrix to be applied.
-
-        Returns:
-            dict: Results after rotation, 'points' is updated in the dict.
-        """
-        # input should be rot_mat_T so I transpose it here
-        results['points'].rotate(rot_mat.T)
-
-    def _check_rot_mat(self, rot_mat: np.ndarray) -> None:
-        """Check if rotation matrix is valid for self.rotation_axis.
-
-        Args:
-            rot_mat (np.ndarray): Rotation matrix to be applied.
-        """
-        is_valid = np.allclose(np.linalg.det(rot_mat), 1.0)
-        valid_array = np.zeros(3)
-        valid_array[self.rotation_axis] = 1.0
-        is_valid &= (rot_mat[self.rotation_axis, :] == valid_array).all()
-        is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all()
-        assert is_valid, f'invalid rotation matrix {rot_mat}'
-
-    def transform(self, results: dict) -> dict:
-        """Call function to shuffle points.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after global alignment, 'points' and keys in
-            input_dict['bbox3d_fields'] are updated in the result dict.
-        """
-        assert 'axis_align_matrix' in results, \
-            'axis_align_matrix is not provided in GlobalAlignment'
-
-        axis_align_matrix = results['axis_align_matrix']
-        assert axis_align_matrix.shape == (4, 4), \
-            f'invalid shape {axis_align_matrix.shape} for axis_align_matrix'
-        rot_mat = axis_align_matrix[:3, :3]
-        trans_vec = axis_align_matrix[:3, -1]
-
-        self._check_rot_mat(rot_mat)
-        self._rot_points(results, rot_mat)
-        self._trans_points(results, trans_vec)
-
-        return results
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(rotation_axis={self.rotation_axis})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class GlobalRotScaleTrans(BaseTransform):
-    """Apply global rotation, scaling and translation to a 3D scene.
-
-    Required Keys:
-
-    - points (np.float32)
-    - gt_bboxes_3d (np.float32)
-
-    Modified Keys:
-
-    - points (np.float32)
-    - gt_bboxes_3d (np.float32)
-
-    Added Keys:
-
-    - points (np.float32)
-    - pcd_trans (np.float32)
-    - pcd_rotation (np.float32)
-    - pcd_rotation_angle (np.float32)
-    - pcd_scale_factor (np.float32)
-
-    Args:
-        rot_range (list[float]): Range of rotation angle.
-            Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).
-        scale_ratio_range (list[float]): Range of scale ratio.
-            Defaults to [0.95, 1.05].
-        translation_std (list[float]): The standard deviation of
-            translation noise applied to a scene, which
-            is sampled from a gaussian distribution whose standard deviation
-            is set by ``translation_std``. Defaults to [0, 0, 0].
-        shift_height (bool): Whether to shift height.
-            (the fourth dimension of indoor points) when scaling.
-            Defaults to False.
-    """
-
-    def __init__(self,
-                 rot_range: List[float] = [-0.78539816, 0.78539816],
-                 scale_ratio_range: List[float] = [0.95, 1.05],
-                 translation_std: List[int] = [0, 0, 0],
-                 shift_height: bool = False) -> None:
-        seq_types = (list, tuple, np.ndarray)
-        if not isinstance(rot_range, seq_types):
-            assert isinstance(rot_range, (int, float)), \
-                f'unsupported rot_range type {type(rot_range)}'
-            rot_range = [-rot_range, rot_range]
-        self.rot_range = rot_range
-
-        assert isinstance(scale_ratio_range, seq_types), \
-            f'unsupported scale_ratio_range type {type(scale_ratio_range)}'
-
-        self.scale_ratio_range = scale_ratio_range
-
-        if not isinstance(translation_std, seq_types):
-            assert isinstance(translation_std, (int, float)), \
-                f'unsupported translation_std type {type(translation_std)}'
-            translation_std = [
-                translation_std, translation_std, translation_std
-            ]
-        assert all([std >= 0 for std in translation_std]), \
-            'translation_std should be positive'
-        self.translation_std = translation_std
-        self.shift_height = shift_height
-
-    def _trans_bbox_points(self, input_dict: dict) -> None:
-        """Private function to translate bounding boxes and points.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after translation, 'points', 'pcd_trans'
-            and `gt_bboxes_3d` is updated in the result dict.
-        """
-        translation_std = np.array(self.translation_std, dtype=np.float32)
-        trans_factor = np.random.normal(scale=translation_std, size=3).T
-
-        input_dict['points'].translate(trans_factor)
-        input_dict['pcd_trans'] = trans_factor
-        if 'gt_bboxes_3d' in input_dict:
-            input_dict['gt_bboxes_3d'].translate(trans_factor)
-
-    def _rot_bbox_points(self, input_dict: dict) -> None:
-        """Private function to rotate bounding boxes and points.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after rotation, 'points', 'pcd_rotation'
-            and `gt_bboxes_3d` is updated in the result dict.
-        """
-        rotation = self.rot_range
-        noise_rotation = np.random.uniform(rotation[0], rotation[1])
-
-        if 'gt_bboxes_3d' in input_dict and \
-                len(input_dict['gt_bboxes_3d'].tensor) != 0:
-            # rotate points with bboxes
-            points, rot_mat_T = input_dict['gt_bboxes_3d'].rotate(
-                noise_rotation, input_dict['points'])
-            input_dict['points'] = points
-        else:
-            # if no bbox in input_dict, only rotate points
-            rot_mat_T = input_dict['points'].rotate(noise_rotation)
-
-        input_dict['pcd_rotation'] = rot_mat_T
-        input_dict['pcd_rotation_angle'] = noise_rotation
-
-    def _scale_bbox_points(self, input_dict: dict) -> None:
-        """Private function to scale bounding boxes and points.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after scaling, 'points' and
-            `gt_bboxes_3d` is updated in the result dict.
-        """
-        scale = input_dict['pcd_scale_factor']
-        points = input_dict['points']
-        points.scale(scale)
-        if self.shift_height:
-            assert 'height' in points.attribute_dims.keys(), \
-                'setting shift_height=True but points have no height attribute'
-            points.tensor[:, points.attribute_dims['height']] *= scale
-        input_dict['points'] = points
-
-        if 'gt_bboxes_3d' in input_dict and \
-                len(input_dict['gt_bboxes_3d'].tensor) != 0:
-            input_dict['gt_bboxes_3d'].scale(scale)
-
-    def _random_scale(self, input_dict: dict) -> None:
-        """Private function to randomly set the scale factor.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after scaling, 'pcd_scale_factor'
-            are updated in the result dict.
-        """
-        scale_factor = np.random.uniform(self.scale_ratio_range[0],
-                                         self.scale_ratio_range[1])
-        input_dict['pcd_scale_factor'] = scale_factor
-
-    def transform(self, input_dict: dict) -> dict:
-        """Private function to rotate, scale and translate bounding boxes and
-        points.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after scaling, 'points', 'pcd_rotation',
-            'pcd_scale_factor', 'pcd_trans' and `gt_bboxes_3d` are updated
-            in the result dict.
-        """
-        if 'transformation_3d_flow' not in input_dict:
-            input_dict['transformation_3d_flow'] = []
-
-        self._rot_bbox_points(input_dict)
-
-        if 'pcd_scale_factor' not in input_dict:
-            self._random_scale(input_dict)
-        self._scale_bbox_points(input_dict)
-
-        self._trans_bbox_points(input_dict)
-
-        input_dict['transformation_3d_flow'].extend(['R', 'S', 'T'])
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(rot_range={self.rot_range},'
-        repr_str += f' scale_ratio_range={self.scale_ratio_range},'
-        repr_str += f' translation_std={self.translation_std},'
-        repr_str += f' shift_height={self.shift_height})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class PointShuffle(BaseTransform):
-    """Shuffle input points."""
-
-    def transform(self, input_dict: dict) -> dict:
-        """Call function to shuffle points.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after filtering, 'points', 'pts_instance_mask'
-            and 'pts_semantic_mask' keys are updated in the result dict.
-        """
-        idx = input_dict['points'].shuffle()
-        idx = idx.numpy()
-
-        pts_instance_mask = input_dict.get('pts_instance_mask', None)
-        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
-
-        if pts_instance_mask is not None:
-            input_dict['pts_instance_mask'] = pts_instance_mask[idx]
-
-        if pts_semantic_mask is not None:
-            input_dict['pts_semantic_mask'] = pts_semantic_mask[idx]
-
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        return self.__class__.__name__
-
-
-@TRANSFORMS.register_module()
-class ObjectRangeFilter(BaseTransform):
-    """Filter objects by the range.
-
-    Required Keys:
-
-    - gt_bboxes_3d
-
-    Modified Keys:
-
-    - gt_bboxes_3d
-
-    Args:
-        point_cloud_range (list[float]): Point cloud range.
-    """
-
-    def __init__(self, point_cloud_range: List[float]) -> None:
-        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
-
-    def transform(self, input_dict: dict) -> dict:
-        """Transform function to filter objects by the range.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
-            keys are updated in the result dict.
-        """
-        # Check points instance type and initialise bev_range
-        if isinstance(input_dict['gt_bboxes_3d'],
-                      (LiDARInstance3DBoxes, DepthInstance3DBoxes)):
-            bev_range = self.pcd_range[[0, 1, 3, 4]]
-        elif isinstance(input_dict['gt_bboxes_3d'], CameraInstance3DBoxes):
-            bev_range = self.pcd_range[[0, 2, 3, 5]]
-
-        gt_bboxes_3d = input_dict['gt_bboxes_3d']
-        gt_labels_3d = input_dict['gt_labels_3d']
-        mask = gt_bboxes_3d.in_range_bev(bev_range)
-        gt_bboxes_3d = gt_bboxes_3d[mask]
-        # mask is a torch tensor but gt_labels_3d is still numpy array
-        # using mask to index gt_labels_3d will cause bug when
-        # len(gt_labels_3d) == 1, where mask=1 will be interpreted
-        # as gt_labels_3d[1] and cause out of index error
-        gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]
-
-        # limit rad to [-pi, pi]
-        gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)
-        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
-        input_dict['gt_labels_3d'] = gt_labels_3d
-
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(point_cloud_range={self.pcd_range.tolist()})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class PointsRangeFilter(BaseTransform):
-    """Filter points by the range.
-
-    Required Keys:
-
-    - points
-    - pts_instance_mask (optional)
-
-    Modified Keys:
-
-    - points
-    - pts_instance_mask (optional)
-
-    Args:
-        point_cloud_range (list[float]): Point cloud range.
-    """
-
-    def __init__(self, point_cloud_range: List[float]) -> None:
-        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
-
-    def transform(self, input_dict: dict) -> dict:
-        """Transform function to filter points by the range.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after filtering, 'points', 'pts_instance_mask'
-            and 'pts_semantic_mask' keys are updated in the result dict.
-        """
-        points = input_dict['points']
-        points_mask = points.in_range_3d(self.pcd_range)
-        clean_points = points[points_mask]
-        input_dict['points'] = clean_points
-        points_mask = points_mask.numpy()
-
-        pts_instance_mask = input_dict.get('pts_instance_mask', None)
-        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
-
-        if pts_instance_mask is not None:
-            input_dict['pts_instance_mask'] = pts_instance_mask[points_mask]
-
-        if pts_semantic_mask is not None:
-            input_dict['pts_semantic_mask'] = pts_semantic_mask[points_mask]
-
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(point_cloud_range={self.pcd_range.tolist()})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class ObjectNameFilter(BaseTransform):
-    """Filter GT objects by their names.
-
-    Required Keys:
-
-    - gt_labels_3d
-
-    Modified Keys:
-
-    - gt_labels_3d
-
-    Args:
-        classes (list[str]): List of class names to be kept for training.
-    """
-
-    def __init__(self, classes: List[str]) -> None:
-        self.classes = classes
-        self.labels = list(range(len(self.classes)))
-
-    def transform(self, input_dict: dict) -> dict:
-        """Transform function to filter objects by their names.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
-            keys are updated in the result dict.
-        """
-        gt_labels_3d = input_dict['gt_labels_3d']
-        gt_bboxes_mask = np.array([n in self.labels for n in gt_labels_3d],
-                                  dtype=np.bool_)
-        input_dict['gt_bboxes_3d'] = input_dict['gt_bboxes_3d'][gt_bboxes_mask]
-        input_dict['gt_labels_3d'] = input_dict['gt_labels_3d'][gt_bboxes_mask]
-
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(classes={self.classes})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class PointSample(BaseTransform):
-    """Point sample.
-
-    Sampling data to a certain number.
-
-    Required Keys:
-
-    - points
-    - pts_instance_mask (optional)
-    - pts_semantic_mask (optional)
-
-    Modified Keys:
-
-    - points
-    - pts_instance_mask (optional)
-    - pts_semantic_mask (optional)
-
-    Args:
-        num_points (int): Number of points to be sampled.
-        sample_range (float, optional): The range where to sample points.
-            If not None, the points with depth larger than `sample_range` are
-            prior to be sampled. Defaults to None.
-        replace (bool): Whether the sampling is with or without replacement.
-            Defaults to False.
-    """
-
-    def __init__(self,
-                 num_points: int,
-                 sample_range: Optional[float] = None,
-                 replace: bool = False) -> None:
-        self.num_points = num_points
-        self.sample_range = sample_range
-        self.replace = replace
-
-    def _points_random_sampling(
-        self,
-        points: BasePoints,
-        num_samples: int,
-        sample_range: Optional[float] = None,
-        replace: bool = False,
-        return_choices: bool = False
-    ) -> Union[Tuple[BasePoints, np.ndarray], BasePoints]:
-        """Points random sampling.
-
-        Sample points to a certain number.
-
-        Args:
-            points (:obj:`BasePoints`): 3D Points.
-            num_samples (int): Number of samples to be sampled.
-            sample_range (float, optional): Indicating the range where the
-                points will be sampled. Defaults to None.
-            replace (bool): Sampling with or without replacement.
-                Defaults to False.
-            return_choices (bool): Whether return choice. Defaults to False.
-
-        Returns:
-            tuple[:obj:`BasePoints`, np.ndarray] | :obj:`BasePoints`:
-
-                - points (:obj:`BasePoints`): 3D Points.
-                - choices (np.ndarray, optional): The generated random samples.
-        """
-        if not replace:
-            replace = (points.shape[0] < num_samples)
-        point_range = range(len(points))
-        if sample_range is not None and not replace:
-            # Only sampling the near points when len(points) >= num_samples
-            dist = np.linalg.norm(points.coord.numpy(), axis=1)
-            far_inds = np.where(dist >= sample_range)[0]
-            near_inds = np.where(dist < sample_range)[0]
-            # in case there are too many far points
-            if len(far_inds) > num_samples:
-                far_inds = np.random.choice(
-                    far_inds, num_samples, replace=False)
-            point_range = near_inds
-            num_samples -= len(far_inds)
-        choices = np.random.choice(point_range, num_samples, replace=replace)
-        if sample_range is not None and not replace:
-            choices = np.concatenate((far_inds, choices))
-            # Shuffle points after sampling
-            np.random.shuffle(choices)
-        if return_choices:
-            return points[choices], choices
-        else:
-            return points[choices]
-
-    def transform(self, input_dict: dict) -> dict:
-        """Transform function to sample points to in indoor scenes.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after sampling, 'points', 'pts_instance_mask'
-            and 'pts_semantic_mask' keys are updated in the result dict.
-        """
-        points = input_dict['points']
-        points, choices = self._points_random_sampling(
-            points,
-            self.num_points,
-            self.sample_range,
-            self.replace,
-            return_choices=True)
-        input_dict['points'] = points
-
-        pts_instance_mask = input_dict.get('pts_instance_mask', None)
-        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
-
-        if pts_instance_mask is not None:
-            pts_instance_mask = pts_instance_mask[choices]
-            input_dict['pts_instance_mask'] = pts_instance_mask
-
-        if pts_semantic_mask is not None:
-            pts_semantic_mask = pts_semantic_mask[choices]
-            input_dict['pts_semantic_mask'] = pts_semantic_mask
-
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(num_points={self.num_points},'
-        repr_str += f' sample_range={self.sample_range},'
-        repr_str += f' replace={self.replace})'
-
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class IndoorPointSample(PointSample):
-    """Indoor point sample.
-
-    Sampling data to a certain number.
-    NOTE: IndoorPointSample is deprecated in favor of PointSample
-
-    Args:
-        num_points (int): Number of points to be sampled.
-    """
-
-    def __init__(self, *args, **kwargs):
-        warnings.warn(
-            'IndoorPointSample is deprecated in favor of PointSample')
-        super(IndoorPointSample, self).__init__(*args, **kwargs)
-
-
-@TRANSFORMS.register_module()
-class IndoorPatchPointSample(BaseTransform):
-    r"""Indoor point sample within a patch. Modified from `PointNet++ <https://
-    github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py>`_.
-
-    Sampling data to a certain number for semantic segmentation.
-
-    Args:
-        num_points (int): Number of points to be sampled.
-        block_size (float): Size of a block to sample points from.
-            Defaults to 1.5.
-        sample_rate (float, optional): Stride used in sliding patch generation.
-            This parameter is unused in `IndoorPatchPointSample` and thus has
-            been deprecated. We plan to remove it in the future.
-            Defaults to None.
-        ignore_index (int, optional): Label index that won't be used for the
-            segmentation task. This is set in PointSegClassMapping as neg_cls.
-            If not None, will be used as a patch selection criterion.
-            Defaults to None.
-        use_normalized_coord (bool): Whether to use normalized xyz as
-            additional features. Defaults to False.
-        num_try (int): Number of times to try if the patch selected is invalid.
-            Defaults to 10.
-        enlarge_size (float): Enlarge the sampled patch to
-            [-block_size / 2 - enlarge_size, block_size / 2 + enlarge_size] as
-            an augmentation. If None, set it as 0. Defaults to 0.2.
-        min_unique_num (int, optional): Minimum number of unique points
-            the sampled patch should contain. If None, use PointNet++'s method
-            to judge uniqueness. Defaults to None.
-        eps (float): A value added to patch boundary to guarantee
-            points coverage. Defaults to 1e-2.
-
-    Note:
-        This transform should only be used in the training process of point
-        cloud segmentation tasks. For the sliding patch generation and
-        inference process in testing, please refer to the `slide_inference`
-        function of `EncoderDecoder3D` class.
-    """
-
-    def __init__(self,
-                 num_points: int,
-                 block_size: float = 1.5,
-                 sample_rate: Optional[float] = None,
-                 ignore_index: Optional[int] = None,
-                 use_normalized_coord: bool = False,
-                 num_try: int = 10,
-                 enlarge_size: float = 0.2,
-                 min_unique_num: Optional[int] = None,
-                 eps: float = 1e-2) -> None:
-        self.num_points = num_points
-        self.block_size = block_size
-        self.ignore_index = ignore_index
-        self.use_normalized_coord = use_normalized_coord
-        self.num_try = num_try
-        self.enlarge_size = enlarge_size if enlarge_size is not None else 0.0
-        self.min_unique_num = min_unique_num
-        self.eps = eps
-
-        if sample_rate is not None:
-            warnings.warn(
-                "'sample_rate' has been deprecated and will be removed in "
-                'the future. Please remove them from your code.')
-
-    def _input_generation(self, coords: np.ndarray, patch_center: np.ndarray,
-                          coord_max: np.ndarray, attributes: np.ndarray,
-                          attribute_dims: dict,
-                          point_type: type) -> BasePoints:
-        """Generating model input.
-
-        Generate input by subtracting patch center and adding additional
-            features. Currently support colors and normalized xyz as features.
-
-        Args:
-            coords (np.ndarray): Sampled 3D Points.
-            patch_center (np.ndarray): Center coordinate of the selected patch.
-            coord_max (np.ndarray): Max coordinate of all 3D Points.
-            attributes (np.ndarray): features of input points.
-            attribute_dims (dict): Dictionary to indicate the meaning of extra
-                dimension.
-            point_type (type): class of input points inherited from BasePoints.
-
-        Returns:
-            :obj:`BasePoints`: The generated input data.
-        """
-        # subtract patch center, the z dimension is not centered
-        centered_coords = coords.copy()
-        centered_coords[:, 0] -= patch_center[0]
-        centered_coords[:, 1] -= patch_center[1]
-
-        if self.use_normalized_coord:
-            normalized_coord = coords / coord_max
-            attributes = np.concatenate([attributes, normalized_coord], axis=1)
-            if attribute_dims is None:
-                attribute_dims = dict()
-            attribute_dims.update(
-                dict(normalized_coord=[
-                    attributes.shape[1], attributes.shape[1] +
-                    1, attributes.shape[1] + 2
-                ]))
-
-        points = np.concatenate([centered_coords, attributes], axis=1)
-        points = point_type(
-            points, points_dim=points.shape[1], attribute_dims=attribute_dims)
-
-        return points
-
-    def _patch_points_sampling(
-            self, points: BasePoints,
-            sem_mask: np.ndarray) -> Tuple[BasePoints, np.ndarray]:
-        """Patch points sampling.
-
-        First sample a valid patch.
-        Then sample points within that patch to a certain number.
-
-        Args:
-            points (:obj:`BasePoints`): 3D Points.
-            sem_mask (np.ndarray): semantic segmentation mask for input points.
-
-        Returns:
-            tuple[:obj:`BasePoints`, np.ndarray]:
-
-                - points (:obj:`BasePoints`): 3D Points.
-                - choices (np.ndarray): The generated random samples.
-        """
-        coords = points.coord.numpy()
-        attributes = points.tensor[:, 3:].numpy()
-        attribute_dims = points.attribute_dims
-        point_type = type(points)
-
-        coord_max = np.amax(coords, axis=0)
-        coord_min = np.amin(coords, axis=0)
-
-        for _ in range(self.num_try):
-            # random sample a point as patch center
-            cur_center = coords[np.random.choice(coords.shape[0])]
-
-            # boundary of a patch, which would be enlarged by
-            # `self.enlarge_size` as an augmentation
-            cur_max = cur_center + np.array(
-                [self.block_size / 2.0, self.block_size / 2.0, 0.0])
-            cur_min = cur_center - np.array(
-                [self.block_size / 2.0, self.block_size / 2.0, 0.0])
-            cur_max[2] = coord_max[2]
-            cur_min[2] = coord_min[2]
-            cur_choice = np.sum(
-                (coords >= (cur_min - self.enlarge_size)) *
-                (coords <= (cur_max + self.enlarge_size)),
-                axis=1) == 3
-
-            if not cur_choice.any():  # no points in this patch
-                continue
-
-            cur_coords = coords[cur_choice, :]
-            cur_sem_mask = sem_mask[cur_choice]
-            point_idxs = np.where(cur_choice)[0]
-            mask = np.sum(
-                (cur_coords >= (cur_min - self.eps)) * (cur_coords <=
-                                                        (cur_max + self.eps)),
-                axis=1) == 3
-
-            # two criteria for patch sampling, adopted from PointNet++
-            # 1. selected patch should contain enough unique points
-            if self.min_unique_num is None:
-                # use PointNet++'s method as default
-                # [31, 31, 62] are just some big values used to transform
-                # coords from 3d array to 1d and then check their uniqueness
-                # this is used in all the ScanNet code following PointNet++
-                vidx = np.ceil(
-                    (cur_coords[mask, :] - cur_min) / (cur_max - cur_min) *
-                    np.array([31.0, 31.0, 62.0]))
-                vidx = np.unique(vidx[:, 0] * 31.0 * 62.0 + vidx[:, 1] * 62.0 +
-                                 vidx[:, 2])
-                flag1 = len(vidx) / 31.0 / 31.0 / 62.0 >= 0.02
-            else:
-                # if `min_unique_num` is provided, directly compare with it
-                flag1 = mask.sum() >= self.min_unique_num
-
-            # 2. selected patch should contain enough annotated points
-            if self.ignore_index is None:
-                flag2 = True
-            else:
-                flag2 = np.sum(cur_sem_mask != self.ignore_index) / \
-                               len(cur_sem_mask) >= 0.7
-
-            if flag1 and flag2:
-                break
-
-        # sample idx to `self.num_points`
-        if point_idxs.size >= self.num_points:
-            # no duplicate in sub-sampling
-            choices = np.random.choice(
-                point_idxs, self.num_points, replace=False)
-        else:
-            # do not use random choice here to avoid some points not counted
-            dup = np.random.choice(point_idxs.size,
-                                   self.num_points - point_idxs.size)
-            idx_dup = np.concatenate(
-                [np.arange(point_idxs.size),
-                 np.array(dup)], 0)
-            choices = point_idxs[idx_dup]
-
-        # construct model input
-        points = self._input_generation(coords[choices], cur_center, coord_max,
-                                        attributes[choices], attribute_dims,
-                                        point_type)
-
-        return points, choices
-
-    def transform(self, input_dict: dict) -> dict:
-        """Call function to sample points to in indoor scenes.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after sampling, 'points', 'pts_instance_mask'
-            and 'pts_semantic_mask' keys are updated in the result dict.
-        """
-        points = input_dict['points']
-
-        assert 'pts_semantic_mask' in input_dict.keys(), \
-            'semantic mask should be provided in training and evaluation'
-        pts_semantic_mask = input_dict['pts_semantic_mask']
-
-        points, choices = self._patch_points_sampling(points,
-                                                      pts_semantic_mask)
-
-        input_dict['points'] = points
-        input_dict['pts_semantic_mask'] = pts_semantic_mask[choices]
-
-        # 'eval_ann_info' will be passed to evaluator
-        if 'eval_ann_info' in input_dict:
-            input_dict['eval_ann_info']['pts_semantic_mask'] = \
-                pts_semantic_mask[choices]
-
-        pts_instance_mask = input_dict.get('pts_instance_mask', None)
-
-        if pts_instance_mask is not None:
-            input_dict['pts_instance_mask'] = pts_instance_mask[choices]
-            # 'eval_ann_info' will be passed to evaluator
-            if 'eval_ann_info' in input_dict:
-                input_dict['eval_ann_info']['pts_instance_mask'] = \
-                    pts_instance_mask[choices]
-
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(num_points={self.num_points},'
-        repr_str += f' block_size={self.block_size},'
-        repr_str += f' ignore_index={self.ignore_index},'
-        repr_str += f' use_normalized_coord={self.use_normalized_coord},'
-        repr_str += f' num_try={self.num_try},'
-        repr_str += f' enlarge_size={self.enlarge_size},'
-        repr_str += f' min_unique_num={self.min_unique_num},'
-        repr_str += f' eps={self.eps})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class BackgroundPointsFilter(BaseTransform):
-    """Filter background points near the bounding box.
-
-    Args:
-        bbox_enlarge_range (tuple[float] | float): Bbox enlarge range.
-    """
-
-    def __init__(self, bbox_enlarge_range: Union[Tuple[float], float]) -> None:
-        assert (is_tuple_of(bbox_enlarge_range, float)
-                and len(bbox_enlarge_range) == 3) \
-            or isinstance(bbox_enlarge_range, float), \
-            f'Invalid arguments bbox_enlarge_range {bbox_enlarge_range}'
-
-        if isinstance(bbox_enlarge_range, float):
-            bbox_enlarge_range = [bbox_enlarge_range] * 3
-        self.bbox_enlarge_range = np.array(
-            bbox_enlarge_range, dtype=np.float32)[np.newaxis, :]
-
-    def transform(self, input_dict: dict) -> dict:
-        """Call function to filter points by the range.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after filtering, 'points', 'pts_instance_mask'
-            and 'pts_semantic_mask' keys are updated in the result dict.
-        """
-        points = input_dict['points']
-        gt_bboxes_3d = input_dict['gt_bboxes_3d']
-
-        # avoid groundtruth being modified
-        gt_bboxes_3d_np = gt_bboxes_3d.tensor.clone().numpy()
-        gt_bboxes_3d_np[:, :3] = gt_bboxes_3d.gravity_center.clone().numpy()
-
-        enlarged_gt_bboxes_3d = gt_bboxes_3d_np.copy()
-        enlarged_gt_bboxes_3d[:, 3:6] += self.bbox_enlarge_range
-        points_numpy = points.tensor.clone().numpy()
-        foreground_masks = box_np_ops.points_in_rbbox(
-            points_numpy, gt_bboxes_3d_np, origin=(0.5, 0.5, 0.5))
-        enlarge_foreground_masks = box_np_ops.points_in_rbbox(
-            points_numpy, enlarged_gt_bboxes_3d, origin=(0.5, 0.5, 0.5))
-        foreground_masks = foreground_masks.max(1)
-        enlarge_foreground_masks = enlarge_foreground_masks.max(1)
-        valid_masks = ~np.logical_and(~foreground_masks,
-                                      enlarge_foreground_masks)
-
-        input_dict['points'] = points[valid_masks]
-        pts_instance_mask = input_dict.get('pts_instance_mask', None)
-        if pts_instance_mask is not None:
-            input_dict['pts_instance_mask'] = pts_instance_mask[valid_masks]
-
-        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
-        if pts_semantic_mask is not None:
-            input_dict['pts_semantic_mask'] = pts_semantic_mask[valid_masks]
-        return input_dict
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(bbox_enlarge_range={self.bbox_enlarge_range.tolist()})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class VoxelBasedPointSampler(BaseTransform):
-    """Voxel based point sampler.
-
-    Apply voxel sampling to multiple sweep points.
-
-    Args:
-        cur_sweep_cfg (dict): Config for sampling current points.
-        prev_sweep_cfg (dict, optional): Config for sampling previous points.
-            Defaults to None.
-        time_dim (int): Index that indicate the time dimension
-            for input points. Defaults to 3.
-    """
-
-    def __init__(self,
-                 cur_sweep_cfg: dict,
-                 prev_sweep_cfg: Optional[dict] = None,
-                 time_dim: int = 3) -> None:
-        self.cur_voxel_generator = VoxelGenerator(**cur_sweep_cfg)
-        self.cur_voxel_num = self.cur_voxel_generator._max_voxels
-        self.time_dim = time_dim
-        if prev_sweep_cfg is not None:
-            assert prev_sweep_cfg['max_num_points'] == \
-                cur_sweep_cfg['max_num_points']
-            self.prev_voxel_generator = VoxelGenerator(**prev_sweep_cfg)
-            self.prev_voxel_num = self.prev_voxel_generator._max_voxels
-        else:
-            self.prev_voxel_generator = None
-            self.prev_voxel_num = 0
-
-    def _sample_points(self, points: np.ndarray, sampler: VoxelGenerator,
-                       point_dim: int) -> np.ndarray:
-        """Sample points for each points subset.
-
-        Args:
-            points (np.ndarray): Points subset to be sampled.
-            sampler (VoxelGenerator): Voxel based sampler for
-                each points subset.
-            point_dim (int): The dimension of each points.
-
-        Returns:
-            np.ndarray: Sampled points.
-        """
-        voxels, coors, num_points_per_voxel = sampler.generate(points)
-        if voxels.shape[0] < sampler._max_voxels:
-            padding_points = np.zeros([
-                sampler._max_voxels - voxels.shape[0], sampler._max_num_points,
-                point_dim
-            ],
-                                      dtype=points.dtype)
-            padding_points[:] = voxels[0]
-            sample_points = np.concatenate([voxels, padding_points], axis=0)
-        else:
-            sample_points = voxels
-
-        return sample_points
-
-    def transform(self, results: dict) -> dict:
-        """Call function to sample points from multiple sweeps.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after sampling, 'points', 'pts_instance_mask'
-            and 'pts_semantic_mask' keys are updated in the result dict.
-        """
-        points = results['points']
-        original_dim = points.shape[1]
-
-        # TODO: process instance and semantic mask while _max_num_points
-        # is larger than 1
-        # Extend points with seg and mask fields
-        map_fields2dim = []
-        start_dim = original_dim
-        points_numpy = points.tensor.numpy()
-        extra_channel = [points_numpy]
-        for idx, key in enumerate(results['pts_mask_fields']):
-            map_fields2dim.append((key, idx + start_dim))
-            extra_channel.append(results[key][..., None])
-
-        start_dim += len(results['pts_mask_fields'])
-        for idx, key in enumerate(results['pts_seg_fields']):
-            map_fields2dim.append((key, idx + start_dim))
-            extra_channel.append(results[key][..., None])
-
-        points_numpy = np.concatenate(extra_channel, axis=-1)
-
-        # Split points into two part, current sweep points and
-        # previous sweeps points.
-        # TODO: support different sampling methods for next sweeps points
-        # and previous sweeps points.
-        cur_points_flag = (points_numpy[:, self.time_dim] == 0)
-        cur_sweep_points = points_numpy[cur_points_flag]
-        prev_sweeps_points = points_numpy[~cur_points_flag]
-        if prev_sweeps_points.shape[0] == 0:
-            prev_sweeps_points = cur_sweep_points
-
-        # Shuffle points before sampling
-        np.random.shuffle(cur_sweep_points)
-        np.random.shuffle(prev_sweeps_points)
-
-        cur_sweep_points = self._sample_points(cur_sweep_points,
-                                               self.cur_voxel_generator,
-                                               points_numpy.shape[1])
-        if self.prev_voxel_generator is not None:
-            prev_sweeps_points = self._sample_points(prev_sweeps_points,
-                                                     self.prev_voxel_generator,
-                                                     points_numpy.shape[1])
-
-            points_numpy = np.concatenate(
-                [cur_sweep_points, prev_sweeps_points], 0)
-        else:
-            points_numpy = cur_sweep_points
-
-        if self.cur_voxel_generator._max_num_points == 1:
-            points_numpy = points_numpy.squeeze(1)
-        results['points'] = points.new_point(points_numpy[..., :original_dim])
-
-        # Restore the corresponding seg and mask fields
-        for key, dim_index in map_fields2dim:
-            results[key] = points_numpy[..., dim_index]
-
-        return results
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-
-        def _auto_indent(repr_str, indent):
-            repr_str = repr_str.split('\n')
-            repr_str = [' ' * indent + t + '\n' for t in repr_str]
-            repr_str = ''.join(repr_str)[:-1]
-            return repr_str
-
-        repr_str = self.__class__.__name__
-        indent = 4
-        repr_str += '(\n'
-        repr_str += ' ' * indent + f'num_cur_sweep={self.cur_voxel_num},\n'
-        repr_str += ' ' * indent + f'num_prev_sweep={self.prev_voxel_num},\n'
-        repr_str += ' ' * indent + f'time_dim={self.time_dim},\n'
-        repr_str += ' ' * indent + 'cur_voxel_generator=\n'
-        repr_str += f'{_auto_indent(repr(self.cur_voxel_generator), 8)},\n'
-        repr_str += ' ' * indent + 'prev_voxel_generator=\n'
-        repr_str += f'{_auto_indent(repr(self.prev_voxel_generator), 8)})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class AffineResize(BaseTransform):
-    """Get the affine transform matrices to the target size.
-
-    Different from :class:`RandomAffine` in MMDetection, this class can
-    calculate the affine transform matrices while resizing the input image
-    to a fixed size. The affine transform matrices include: 1) matrix
-    transforming original image to the network input image size. 2) matrix
-    transforming original image to the network output feature map size.
-
-    Args:
-        img_scale (tuple): Images scales for resizing.
-        down_ratio (int): The down ratio of feature map.
-            Actually the arg should be >= 1.
-        bbox_clip_border (bool): Whether clip the objects
-            outside the border of the image. Defaults to True.
-    """
-
-    def __init__(self,
-                 img_scale: Tuple,
-                 down_ratio: int,
-                 bbox_clip_border: bool = True) -> None:
-
-        self.img_scale = img_scale
-        self.down_ratio = down_ratio
-        self.bbox_clip_border = bbox_clip_border
-
-    def transform(self, results: dict) -> dict:
-        """Call function to do affine transform to input image and labels.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after affine resize, 'affine_aug', 'trans_mat'
-            keys are added in the result dict.
-        """
-        # The results have gone through RandomShiftScale before AffineResize
-        if 'center' not in results:
-            img = results['img']
-            height, width = img.shape[:2]
-            center = np.array([width / 2, height / 2], dtype=np.float32)
-            size = np.array([width, height], dtype=np.float32)
-            results['affine_aug'] = False
-        else:
-            # The results did not go through RandomShiftScale before
-            # AffineResize
-            img = results['img']
-            center = results['center']
-            size = results['size']
-
-        trans_affine = self._get_transform_matrix(center, size, self.img_scale)
-
-        img = cv2.warpAffine(img, trans_affine[:2, :], self.img_scale)
-
-        if isinstance(self.down_ratio, tuple):
-            trans_mat = [
-                self._get_transform_matrix(
-                    center, size,
-                    (self.img_scale[0] // ratio, self.img_scale[1] // ratio))
-                for ratio in self.down_ratio
-            ]  # (3, 3)
-        else:
-            trans_mat = self._get_transform_matrix(
-                center, size, (self.img_scale[0] // self.down_ratio,
-                               self.img_scale[1] // self.down_ratio))
-
-        results['img'] = img
-        results['img_shape'] = img.shape
-        results['pad_shape'] = img.shape
-        results['trans_mat'] = trans_mat
-
-        if 'gt_bboxes' in results:
-            self._affine_bboxes(results, trans_affine)
-
-        if 'centers_2d' in results:
-            centers2d = self._affine_transform(results['centers_2d'],
-                                               trans_affine)
-            valid_index = (centers2d[:, 0] >
-                           0) & (centers2d[:, 0] <
-                                 self.img_scale[0]) & (centers2d[:, 1] > 0) & (
-                                     centers2d[:, 1] < self.img_scale[1])
-            results['centers_2d'] = centers2d[valid_index]
-
-            if 'gt_bboxes' in results:
-                results['gt_bboxes'] = results['gt_bboxes'][valid_index]
-                if 'gt_bboxes_labels' in results:
-                    results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
-                        valid_index]
-                if 'gt_masks' in results:
-                    raise NotImplementedError(
-                        'AffineResize only supports bbox.')
-
-            if 'gt_bboxes_3d' in results:
-                results['gt_bboxes_3d'].tensor = results[
-                    'gt_bboxes_3d'].tensor[valid_index]
-                if 'gt_labels_3d' in results:
-                    results['gt_labels_3d'] = results['gt_labels_3d'][
-                        valid_index]
-
-            results['depths'] = results['depths'][valid_index]
-
-        return results
-
-    def _affine_bboxes(self, results: dict, matrix: np.ndarray) -> None:
-        """Affine transform bboxes to input image.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-            matrix (np.ndarray): Matrix transforming original
-                image to the network input image size.
-                shape: (3, 3)
-        """
-
-        bboxes = results['gt_bboxes']
-        bboxes[:, :2] = self._affine_transform(bboxes[:, :2], matrix)
-        bboxes[:, 2:] = self._affine_transform(bboxes[:, 2:], matrix)
-        if self.bbox_clip_border:
-            bboxes[:, [0, 2]] = bboxes[:, [0, 2]].clip(0,
-                                                       self.img_scale[0] - 1)
-            bboxes[:, [1, 3]] = bboxes[:, [1, 3]].clip(0,
-                                                       self.img_scale[1] - 1)
-        results['gt_bboxes'] = bboxes
-
-    def _affine_transform(self, points: np.ndarray,
-                          matrix: np.ndarray) -> np.ndarray:
-        """Affine transform bbox points to input image.
-
-        Args:
-            points (np.ndarray): Points to be transformed.
-                shape: (N, 2)
-            matrix (np.ndarray): Affine transform matrix.
-                shape: (3, 3)
-
-        Returns:
-            np.ndarray: Transformed points.
-        """
-        num_points = points.shape[0]
-        hom_points_2d = np.concatenate((points, np.ones((num_points, 1))),
-                                       axis=1)
-        hom_points_2d = hom_points_2d.T
-        affined_points = np.matmul(matrix, hom_points_2d).T
-        return affined_points[:, :2]
-
-    def _get_transform_matrix(self, center: Tuple, scale: Tuple,
-                              output_scale: Tuple[float]) -> np.ndarray:
-        """Get affine transform matrix.
-
-        Args:
-            center (tuple): Center of current image.
-            scale (tuple): Scale of current image.
-            output_scale (tuple[float]): The transform target image scales.
-
-        Returns:
-            np.ndarray: Affine transform matrix.
-        """
-        # TODO: further add rot and shift here.
-        src_w = scale[0]
-        dst_w = output_scale[0]
-        dst_h = output_scale[1]
-
-        src_dir = np.array([0, src_w * -0.5])
-        dst_dir = np.array([0, dst_w * -0.5])
-
-        src = np.zeros((3, 2), dtype=np.float32)
-        dst = np.zeros((3, 2), dtype=np.float32)
-        src[0, :] = center
-        src[1, :] = center + src_dir
-        dst[0, :] = np.array([dst_w * 0.5, dst_h * 0.5])
-        dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
-
-        src[2, :] = self._get_ref_point(src[0, :], src[1, :])
-        dst[2, :] = self._get_ref_point(dst[0, :], dst[1, :])
-
-        get_matrix = cv2.getAffineTransform(src, dst)
-
-        matrix = np.concatenate((get_matrix, [[0., 0., 1.]]))
-
-        return matrix.astype(np.float32)
-
-    def _get_ref_point(self, ref_point1: np.ndarray,
-                       ref_point2: np.ndarray) -> np.ndarray:
-        """Get reference point to calculate affine transform matrix.
-
-        While using opencv to calculate the affine matrix, we need at least
-        three corresponding points separately on original image and target
-        image. Here we use two points to get the the third reference point.
-        """
-        d = ref_point1 - ref_point2
-        ref_point3 = ref_point2 + np.array([-d[1], d[0]])
-        return ref_point3
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(img_scale={self.img_scale}, '
-        repr_str += f'down_ratio={self.down_ratio}) '
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class RandomShiftScale(BaseTransform):
-    """Random shift scale.
-
-    Different from the normal shift and scale function, it doesn't
-    directly shift or scale image. It can record the shift and scale
-    infos into loading TRANSFORMS. It's designed to be used with
-    AffineResize together.
-
-    Args:
-        shift_scale (tuple[float]): Shift and scale range.
-        aug_prob (float): The shifting and scaling probability.
-    """
-
-    def __init__(self, shift_scale: Tuple[float], aug_prob: float) -> None:
-
-        self.shift_scale = shift_scale
-        self.aug_prob = aug_prob
-
-    def transform(self, results: dict) -> dict:
-        """Call function to record random shift and scale infos.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Results after random shift and scale, 'center', 'size'
-            and 'affine_aug' keys are added in the result dict.
-        """
-        img = results['img']
-
-        height, width = img.shape[:2]
-
-        center = np.array([width / 2, height / 2], dtype=np.float32)
-        size = np.array([width, height], dtype=np.float32)
-
-        if random.random() < self.aug_prob:
-            shift, scale = self.shift_scale[0], self.shift_scale[1]
-            shift_ranges = np.arange(-shift, shift + 0.1, 0.1)
-            center[0] += size[0] * random.choice(shift_ranges)
-            center[1] += size[1] * random.choice(shift_ranges)
-            scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1)
-            size *= random.choice(scale_ranges)
-            results['affine_aug'] = True
-        else:
-            results['affine_aug'] = False
-
-        results['center'] = center
-        results['size'] = size
-
-        return results
-
-    def __repr__(self) -> str:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(shift_scale={self.shift_scale}, '
-        repr_str += f'aug_prob={self.aug_prob}) '
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class Resize3D(Resize):
-
-    def _resize_3d(self, results: dict) -> None:
-        """Resize centers_2d and modify camera intrinisc with
-        ``results['scale']``."""
-        if 'centers_2d' in results:
-            results['centers_2d'] *= results['scale_factor'][:2]
-        results['cam2img'][0] *= np.array(results['scale_factor'][0])
-        results['cam2img'][1] *= np.array(results['scale_factor'][1])
-
-    def transform(self, results: dict) -> dict:
-        """Transform function to resize images, bounding boxes, semantic
-        segmentation map and keypoints.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map',
-            'gt_keypoints', 'scale', 'scale_factor', 'img_shape',
-            and 'keep_ratio' keys are updated in result dict.
-        """
-
-        super(Resize3D, self).transform(results)
-        self._resize_3d(results)
-        return results
-
-
-@TRANSFORMS.register_module()
-class RandomResize3D(RandomResize):
-    """The difference between RandomResize3D and RandomResize:
-
-    1. Compared to RandomResize, this class would further
-        check if scale is already set in results.
-    2. During resizing, this class would modify the centers_2d
-        and cam2img with ``results['scale']``.
-    """
-
-    def _resize_3d(self, results: dict) -> None:
-        """Resize centers_2d and modify camera intrinisc with
-        ``results['scale']``."""
-        if 'centers_2d' in results:
-            results['centers_2d'] *= results['scale_factor'][:2]
-        results['cam2img'][0] *= np.array(results['scale_factor'][0])
-        results['cam2img'][1] *= np.array(results['scale_factor'][1])
-
-    def transform(self, results: dict) -> dict:
-        """Transform function to resize images, bounding boxes, masks, semantic
-        segmentation map. Compared to RandomResize, this function would further
-        check if scale is already set in results.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
-            'keep_ratio' keys are added into result dict.
-        """
-        if 'scale' not in results:
-            results['scale'] = self._random_scale()
-        self.resize.scale = results['scale']
-        results = self.resize(results)
-        self._resize_3d(results)
-
-        return results
-
-
-@TRANSFORMS.register_module()
-class RandomCrop3D(RandomCrop):
-    """3D version of RandomCrop. RamdomCrop3D supports the modifications of
-    camera intrinsic matrix and using predefined randomness variable to do the
-    augmentation.
-
-    The absolute ``crop_size`` is sampled based on ``crop_type`` and
-    ``image_size``, then the cropped results are generated.
-
-    Required Keys:
-
-    - img
-    - gt_bboxes (np.float32) (optional)
-    - gt_bboxes_labels (np.int64) (optional)
-    - gt_masks (BitmapMasks | PolygonMasks) (optional)
-    - gt_ignore_flags (np.bool) (optional)
-    - gt_seg_map (np.uint8) (optional)
-
-    Modified Keys:
-
-    - img
-    - img_shape
-    - gt_bboxes (optional)
-    - gt_bboxes_labels (optional)
-    - gt_masks (optional)
-    - gt_ignore_flags (optional)
-    - gt_seg_map (optional)
-
-    Added Keys:
-
-    - homography_matrix
-
-    Args:
-        crop_size (tuple): The relative ratio or absolute pixels of
-            height and width.
-        crop_type (str): One of "relative_range", "relative",
-            "absolute", "absolute_range". "relative" randomly crops
-            (h * crop_size[0], w * crop_size[1]) part from an input of size
-            (h, w). "relative_range" uniformly samples relative crop size from
-            range [crop_size[0], 1] and [crop_size[1], 1] for height and width
-            respectively. "absolute" crops from an input with absolute size
-            (crop_size[0], crop_size[1]). "absolute_range" uniformly samples
-            crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w
-            in range [crop_size[0], min(w, crop_size[1])].
-            Defaults to "absolute".
-        allow_negative_crop (bool): Whether to allow a crop that does
-            not contain any bbox area. Defaults to False.
-        recompute_bbox (bool): Whether to re-compute the boxes based
-            on cropped instance masks. Defaults to False.
-        bbox_clip_border (bool): Whether clip the objects outside
-            the border of the image. Defaults to True.
-        rel_offset_h (tuple): The cropping interval of image height. Defaults
-            to (0., 1.).
-        rel_offset_w (tuple): The cropping interval of image width. Defaults
-            to (0., 1.).
-
-    Note:
-        - If the image is smaller than the absolute crop size, return the
-          original image.
-        - The keys for bboxes, labels and masks must be aligned. That is,
-          ``gt_bboxes`` corresponds to ``gt_labels`` and ``gt_masks``, and
-          ``gt_bboxes_ignore`` corresponds to ``gt_labels_ignore`` and
-          ``gt_masks_ignore``.
-        - If the crop does not contain any gt-bbox region and
-          ``allow_negative_crop`` is set to False, skip this image.
-    """
-
-    def __init__(
-        self,
-        crop_size: tuple,
-        crop_type: str = 'absolute',
-        allow_negative_crop: bool = False,
-        recompute_bbox: bool = False,
-        bbox_clip_border: bool = True,
-        rel_offset_h: tuple = (0., 1.),
-        rel_offset_w: tuple = (0., 1.)
-    ) -> None:
-        super().__init__(
-            crop_size=crop_size,
-            crop_type=crop_type,
-            allow_negative_crop=allow_negative_crop,
-            recompute_bbox=recompute_bbox,
-            bbox_clip_border=bbox_clip_border)
-        # rel_offset specifies the relative offset range of cropping origin
-        # [0., 1.] means starting from 0*margin to 1*margin + 1
-        self.rel_offset_h = rel_offset_h
-        self.rel_offset_w = rel_offset_w
-
-    def _crop_data(self,
-                   results: dict,
-                   crop_size: tuple,
-                   allow_negative_crop: bool = False) -> dict:
-        """Function to randomly crop images, bounding boxes, masks, semantic
-        segmentation maps.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-            crop_size (tuple): Expected absolute size after cropping, (h, w).
-            allow_negative_crop (bool): Whether to allow a crop that does not
-                contain any bbox area. Defaults to False.
-
-        Returns:
-            dict: Randomly cropped results, 'img_shape' key in result dict is
-            updated according to crop size.
-        """
-        assert crop_size[0] > 0 and crop_size[1] > 0
-        for key in results.get('img_fields', ['img']):
-            img = results[key]
-            if 'img_crop_offset' not in results:
-                margin_h = max(img.shape[0] - crop_size[0], 0)
-                margin_w = max(img.shape[1] - crop_size[1], 0)
-                # TOCHECK: a little different from LIGA implementation
-                offset_h = np.random.randint(
-                    self.rel_offset_h[0] * margin_h,
-                    self.rel_offset_h[1] * margin_h + 1)
-                offset_w = np.random.randint(
-                    self.rel_offset_w[0] * margin_w,
-                    self.rel_offset_w[1] * margin_w + 1)
-            else:
-                offset_w, offset_h = results['img_crop_offset']
-
-            crop_h = min(crop_size[0], img.shape[0])
-            crop_w = min(crop_size[1], img.shape[1])
-            crop_y1, crop_y2 = offset_h, offset_h + crop_h
-            crop_x1, crop_x2 = offset_w, offset_w + crop_w
-
-            # crop the image
-            img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
-            img_shape = img.shape
-            results[key] = img
-        results['img_shape'] = img_shape
-
-        # crop bboxes accordingly and clip to the image boundary
-        for key in results.get('bbox_fields', []):
-            # e.g. gt_bboxes and gt_bboxes_ignore
-            bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h],
-                                   dtype=np.float32)
-            bboxes = results[key] - bbox_offset
-            if self.bbox_clip_border:
-                bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
-                bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
-            valid_inds = (bboxes[:, 2] > bboxes[:, 0]) & (
-                bboxes[:, 3] > bboxes[:, 1])
-            # If the crop does not contain any gt-bbox area and
-            # allow_negative_crop is False, skip this image.
-            if (key == 'gt_bboxes' and not valid_inds.any()
-                    and not allow_negative_crop):
-                return None
-            results[key] = bboxes[valid_inds, :]
-            # label fields. e.g. gt_labels and gt_labels_ignore
-            label_key = self.bbox2label.get(key)
-            if label_key in results:
-                results[label_key] = results[label_key][valid_inds]
-
-            # mask fields, e.g. gt_masks and gt_masks_ignore
-            mask_key = self.bbox2mask.get(key)
-            if mask_key in results:
-                results[mask_key] = results[mask_key][
-                    valid_inds.nonzero()[0]].crop(
-                        np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]))
-                if self.recompute_bbox:
-                    results[key] = results[mask_key].get_bboxes()
-
-        # crop semantic seg
-        for key in results.get('seg_fields', []):
-            results[key] = results[key][crop_y1:crop_y2, crop_x1:crop_x2]
-
-        # manipulate camera intrinsic matrix
-        # needs to apply offset to K instead of P2 (on KITTI)
-        if isinstance(results['cam2img'], list):
-            # TODO ignore this, but should handle it in the future
-            pass
-        else:
-            K = results['cam2img'][:3, :3].copy()
-            inv_K = np.linalg.inv(K)
-            T = np.matmul(inv_K, results['cam2img'][:3])
-            K[0, 2] -= crop_x1
-            K[1, 2] -= crop_y1
-            offset_cam2img = np.matmul(K, T)
-            results['cam2img'][:offset_cam2img.shape[0], :offset_cam2img.
-                               shape[1]] = offset_cam2img
-
-        results['img_crop_offset'] = [offset_w, offset_h]
-
-        return results
-
-    def transform(self, results: dict) -> dict:
-        """Transform function to randomly crop images, bounding boxes, masks,
-        semantic segmentation maps.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Randomly cropped results, 'img_shape' key in result dict is
-            updated according to crop size.
-        """
-        image_size = results['img'].shape[:2]
-        if 'crop_size' not in results:
-            crop_size = self._get_crop_size(image_size)
-            results['crop_size'] = crop_size
-        else:
-            crop_size = results['crop_size']
-        results = self._crop_data(results, crop_size, self.allow_negative_crop)
-        return results
-
-    def __repr__(self) -> dict:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(crop_size={self.crop_size}, '
-        repr_str += f'crop_type={self.crop_type}, '
-        repr_str += f'allow_negative_crop={self.allow_negative_crop}, '
-        repr_str += f'bbox_clip_border={self.bbox_clip_border}), '
-        repr_str += f'rel_offset_h={self.rel_offset_h}), '
-        repr_str += f'rel_offset_w={self.rel_offset_w})'
-        return repr_str
-
-
-@TRANSFORMS.register_module()
-class PhotoMetricDistortion3D(PhotoMetricDistortion):
-    """Apply photometric distortion to image sequentially, every transformation
-    is applied with a probability of 0.5. The position of random contrast is in
-    second or second to last.
-
-    PhotoMetricDistortion3D further support using predefined randomness
-    variable to do the augmentation.
-
-    1. random brightness
-    2. random contrast (mode 0)
-    3. convert color from BGR to HSV
-    4. random saturation
-    5. random hue
-    6. convert color from HSV to BGR
-    7. random contrast (mode 1)
-    8. randomly swap channels
-
-    Required Keys:
-
-    - img (np.uint8)
-
-    Modified Keys:
-
-    - img (np.float32)
-
-    Args:
-        brightness_delta (int): delta of brightness.
-        contrast_range (sequence): range of contrast.
-        saturation_range (sequence): range of saturation.
-        hue_delta (int): delta of hue.
-    """
-
-    def transform(self, results: dict) -> dict:
-        """Transform function to perform photometric distortion on images.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: Result dict with images distorted.
-        """
-        assert 'img' in results, '`img` is not found in results'
-        img = results['img']
-        img = img.astype(np.float32)
-        if 'photometric_param' not in results:
-            photometric_param = self._random_flags()
-            results['photometric_param'] = photometric_param
-        else:
-            photometric_param = results['photometric_param']
-
-        (mode, brightness_flag, contrast_flag, saturation_flag, hue_flag,
-         swap_flag, delta_value, alpha_value, saturation_value, hue_value,
-         swap_value) = photometric_param
-
-        # random brightness
-        if brightness_flag:
-            img += delta_value
-
-        # mode == 0 --> do random contrast first
-        # mode == 1 --> do random contrast last
-        if mode == 1:
-            if contrast_flag:
-                img *= alpha_value
-
-        # convert color from BGR to HSV
-        img = mmcv.bgr2hsv(img)
-
-        # random saturation
-        if saturation_flag:
-            img[..., 1] *= saturation_value
-
-        # random hue
-        if hue_flag:
-            img[..., 0] += hue_value
-            img[..., 0][img[..., 0] > 360] -= 360
-            img[..., 0][img[..., 0] < 0] += 360
-
-        # convert color from HSV to BGR
-        img = mmcv.hsv2bgr(img)
-
-        # random contrast
-        if mode == 0:
-            if contrast_flag:
-                img *= alpha_value
-
-        # randomly swap channels
-        if swap_flag:
-            img = img[..., swap_value]
-
-        results['img'] = img
-        return results
-
-
-@TRANSFORMS.register_module()
-class MultiViewWrapper(BaseTransform):
-    """Wrap transformation from single-view into multi-view.
-
-    The wrapper processes the images from multi-view one by one. For each
-    image, it constructs a pseudo dict according to the keys specified by the
-    'process_fields' parameter. After the transformation is finished, desired
-    information can be collected by specifying the keys in the 'collected_keys'
-    parameter. Multi-view images share the same transformation parameters
-    but do not share the same magnitude when a random transformation is
-    conducted.
-
-    Args:
-        transforms (list[dict]): A list of dict specifying the transformations
-            for the monocular situation.
-        override_aug_config (bool): flag of whether to use the same aug config
-            for multiview image. Defaults to True.
-        process_fields (list): Desired keys that the transformations should
-            be conducted on. Defaults to ['img', 'cam2img', 'lidar2cam'].
-        collected_keys (list): Collect information in transformation
-            like rotate angles, crop roi, and flip state. Defaults to
-                ['scale', 'scale_factor', 'crop',
-                 'crop_offset', 'ori_shape',
-                 'pad_shape', 'img_shape',
-                 'pad_fixed_size', 'pad_size_divisor',
-                 'flip', 'flip_direction', 'rotate'].
-        randomness_keys (list): The keys that related to the randomness
-            in transformation. Defaults to
-                    ['scale', 'scale_factor', 'crop_size', 'flip',
-                     'flip_direction', 'photometric_param']
-    """
-
-    def __init__(
-        self,
-        transforms: dict,
-        override_aug_config: bool = True,
-        process_fields: list = ['img', 'cam2img', 'lidar2cam'],
-        collected_keys: list = [
-            'scale', 'scale_factor', 'crop', 'img_crop_offset', 'ori_shape',
-            'pad_shape', 'img_shape', 'pad_fixed_size', 'pad_size_divisor',
-            'flip', 'flip_direction', 'rotate'
-        ],
-        randomness_keys: list = [
-            'scale', 'scale_factor', 'crop_size', 'img_crop_offset', 'flip',
-            'flip_direction', 'photometric_param'
-        ]
-    ) -> None:
-        self.transforms = Compose(transforms)
-        self.override_aug_config = override_aug_config
-        self.collected_keys = collected_keys
-        self.process_fields = process_fields
-        self.randomness_keys = randomness_keys
-
-    def transform(self, input_dict: dict) -> dict:
-        """Transform function to do the transform for multiview image.
-
-        Args:
-            results (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: output dict after transformtaion
-        """
-        # store the augmentation related keys for each image.
-        for key in self.collected_keys:
-            if key not in input_dict or \
-                    not isinstance(input_dict[key], list):
-                input_dict[key] = []
-        prev_process_dict = {}
-        for img_id in range(len(input_dict['img'])):
-            process_dict = {}
-
-            # override the process dict (e.g. scale in random scale,
-            # crop_size in random crop, flip, flip_direction in
-            # random flip)
-            if img_id != 0 and self.override_aug_config:
-                for key in self.randomness_keys:
-                    if key in prev_process_dict:
-                        process_dict[key] = prev_process_dict[key]
-
-            for key in self.process_fields:
-                if key in input_dict:
-                    process_dict[key] = input_dict[key][img_id]
-            process_dict = self.transforms(process_dict)
-            # store the randomness variable in transformation.
-            prev_process_dict = process_dict
-
-            # store the related results to results_dict
-            for key in self.process_fields:
-                if key in process_dict:
-                    input_dict[key][img_id] = process_dict[key]
-            # update the keys
-            for key in self.collected_keys:
-                if key in process_dict:
-                    if len(input_dict[key]) == img_id + 1:
-                        input_dict[key][img_id] = process_dict[key]
-                    else:
-                        input_dict[key].append(process_dict[key])
-
-        for key in self.collected_keys:
-            if len(input_dict[key]) == 0:
-                input_dict.pop(key)
-        return input_dict
-
-
-@TRANSFORMS.register_module()
-class PolarMix(BaseTransform):
-    """PolarMix data augmentation.
-
-    Required Keys:
-
-    - points (:obj:`BasePoints`)
-    - pts_semantic_mask (np.int64)
-    - mix_results (List[dict])
-
-    Modified Keys:
-
-    - points (:obj:`BasePoints`)
-    - pts_semantic_mask (np.int64)
-
-    Args:
-        instance_classes (List[int]): Semantic masks which represent the
-            instance.
-        swap_ratio (float): Swap ratio of two point cloud. Defaults to 0.5.
-        rotate_paste_ratio (float): Rotate paste ratio. Defaults to 1.0.
-    """
-
-    def __init__(self,
-                 instance_classes: List[int],
-                 swap_ratio: float = 0.5,
-                 rotate_paste_ratio: float = 1.0) -> None:
-        assert is_list_of(instance_classes, int)
-        self.instance_classes = instance_classes
-        self.swap_ratio = swap_ratio
-        self.rotate_paste_ratio = rotate_paste_ratio
-
-    def get_indexes(self, dataset: BaseDataset) -> int:
-        """Call function to collect indexes.
-
-        Args:
-            dataset (:obj:`BaseDataset`): The dataset.
-
-        Returns:
-            int: Index.
-        """
-        index = random.randint(0, len(dataset))
-        return index
-
-    def transform(self, input_dict: dict) -> dict:
-        """PolarMix transform function.
-
-        Args:
-            input_dict (dict): Result dict from loading pipeline.
-
-        Returns:
-            dict: output dict after transformtaion
-        """
-
-        assert 'mix_results' in input_dict
-        assert len(input_dict['mix_results']) == 1, \
-            'MixUp only support 2 point cloud now!'
-
-        retrieve_results = input_dict['mix_results'][0]
-        retrieve_points = retrieve_results['points']
-        retrieve_pts_semantic_mask = retrieve_results['pts_semantic_mask']
-
-        points = input_dict['points']
-        pts_semantic_mask = input_dict['pts_semantic_mask']
-
-        # 1. swap point cloud
-        if np.random.random() < self.swap_ratio:
-            start_angle = (np.random.random() - 1) * np.pi  # -pi~pi
-            end_angle = start_angle + np.pi
-            # calculate horizontal angle for each point
-            yaw = torch.atan2(points.coord[:, 1], points.coord[:, 0])
-            retrieve_yaw = torch.atan2(retrieve_points.coord[:, 1],
-                                       retrieve_points.coord[:, 0])
-
-            # select points in sector
-            idx = (yaw <= start_angle) | (yaw >= end_angle)
-            retrieve_idx = (retrieve_yaw > start_angle) & (
-                retrieve_yaw < end_angle)
-
-            # swap
-            points = points[idx]
-            points = points.cat([points, retrieve_points[retrieve_idx]])
-            pts_semantic_mask = np.concatenate(
-                (pts_semantic_mask[idx.numpy()],
-                 retrieve_pts_semantic_mask[retrieve_idx.numpy()]),
-                axis=0)
-
-        # 2. rotate-pasting
-        if np.random.random() < self.rotate_paste_ratio:
-            # extract instance points
-            instance_points, instance_pts_semantic_mask = [], []
-            for instance_class in self.instance_classes:
-                retrieve_idx = retrieve_pts_semantic_mask == instance_class
-                instance_points.append(retrieve_points[retrieve_idx])
-                instance_pts_semantic_mask.append(
-                    retrieve_pts_semantic_mask[retrieve_idx])
-            instance_points = retrieve_points.cat(instance_points)
-            instance_pts_semantic_mask = np.concatenate(
-                instance_pts_semantic_mask, axis=0)
-
-            # rotate-copy
-            copy_points = [instance_points]
-            copy_pts_semantic_mask = [instance_pts_semantic_mask]
-            angle_list = [
-                np.random.random() * np.pi * 2 / 3,
-                (np.random.random() + 1) * np.pi * 2 / 3
-            ]
-            for angle in angle_list:
-                new_points = instance_points.clone()
-                new_points.rotate(angle)
-                copy_points.append(new_points)
-                copy_pts_semantic_mask.append(instance_pts_semantic_mask)
-            copy_points = instance_points.cat(copy_points)
-            copy_pts_semantic_mask = np.concatenate(
-                copy_pts_semantic_mask, axis=0)
-
-            points = points.cat([points, copy_points])
-            pts_semantic_mask = np.concatenate(
-                (pts_semantic_mask, copy_pts_semantic_mask), axis=0)
-
-        input_dict['points'] = points
-        input_dict['pts_semantic_mask'] = pts_semantic_mask
-        return input_dict
-
-    def __repr__(self) -> dict:
-        """str: Return a string that describes the module."""
-        repr_str = self.__class__.__name__
-        repr_str += f'(instance_classes={self.instance_classes}, '
-        repr_str += f'swap_ratio={self.swap_ratio})'
-        return repr_str
+from mmengine.testing import assert_allclose
+
+from mmdet3d.datasets import GlobalAlignment, RandomFlip3D
+from mmdet3d.datasets.transforms import GlobalRotScaleTrans, PolarMix
+from mmdet3d.structures import BasePoints
+from mmdet3d.testing import create_data_info_after_loading
+
+
+class TestGlobalRotScaleTrans(unittest.TestCase):
+
+    def test_globle_rotation_scale_trans(self):
+        rot_trans = GlobalRotScaleTrans(
+            rot_range=[-0.78, 0.78], scale_ratio_range=[1, 1])
+        scale_trans = GlobalRotScaleTrans(
+            rot_range=[0, 0], scale_ratio_range=[0.95, 1.05])
+
+        ori_data_info = create_data_info_after_loading()
+
+        data_info = copy.deepcopy(ori_data_info)
+        rot_data_info = rot_trans(data_info)
+        self.assertIn('pcd_rotation', rot_data_info)
+        self.assertIn('pcd_rotation_angle', rot_data_info)
+        self.assertIn('pcd_scale_factor', rot_data_info)
+        self.assertEqual(rot_data_info['pcd_scale_factor'], 1)
+        self.assertIs(-0.79 < rot_data_info['pcd_rotation_angle'] < 0.79, True)
+
+        # assert the rot angle should in rot_range
+        before_rot_gt_bbox_3d = ori_data_info['gt_bboxes_3d']
+        after_rot_gt_bbox_3d = rot_data_info['gt_bboxes_3d']
+        assert (after_rot_gt_bbox_3d.tensor[:, -1] -
+                before_rot_gt_bbox_3d.tensor[:, -1]).abs().max() < 0.79
+
+        data_info = copy.deepcopy(ori_data_info)
+        scale_data_info = scale_trans(data_info)
+        # assert the rot angle should in rot_range
+        before_scale_gt_bbox_3d = ori_data_info['gt_bboxes_3d'].tensor
+        after_scale_gt_bbox_3d = scale_data_info['gt_bboxes_3d'].tensor
+        before_scale_points = ori_data_info['points'].tensor
+        after_scale_points = scale_data_info['points'].tensor
+        self.assertEqual(scale_data_info['pcd_rotation_angle'], 0)
+        # assert  scale_factor range
+        assert (0.94 < (after_scale_points / before_scale_points)).all()
+        assert (1.06 >
+                (after_scale_gt_bbox_3d / before_scale_gt_bbox_3d)).all()
+
+
+class TestRandomFlip3D(unittest.TestCase):
+
+    def test_random_flip3d(self):
+        ori_data_info = create_data_info_after_loading()
+        no_flip_transform = RandomFlip3D(flip_ratio_bev_horizontal=0.)
+        always_flip_transform = RandomFlip3D(flip_ratio_bev_horizontal=1.)
+        data_info = copy.deepcopy(ori_data_info)
+        data_info = no_flip_transform(data_info)
+        self.assertIn('pcd_horizontal_flip', data_info)
+        assert_allclose(data_info['points'].tensor,
+                        ori_data_info['points'].tensor)
+
+        torch.allclose(data_info['gt_bboxes_3d'].tensor,
+                       ori_data_info['gt_bboxes_3d'].tensor)
+        data_info = copy.deepcopy(ori_data_info)
+        data_info = always_flip_transform(data_info)
+        assert_allclose(data_info['points'].tensor[:, 0],
+                        ori_data_info['points'].tensor[:, 0])
+        assert_allclose(data_info['points'].tensor[:, 1],
+                        -ori_data_info['points'].tensor[:, 1])
+        assert_allclose(data_info['points'].tensor[:, 2],
+                        ori_data_info['points'].tensor[:, 2])
+
+        assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 0],
+                        ori_data_info['gt_bboxes_3d'].tensor[:, 0])
+        assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 1],
+                        -ori_data_info['gt_bboxes_3d'].tensor[:, 1])
+        assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 2],
+                        ori_data_info['gt_bboxes_3d'].tensor[:, 2])
+
+
+class TestGlobalAlignment(unittest.TestCase):
+
+    def test_global_alignment(self):
+        data_info = create_data_info_after_loading()
+        global_align_transform = GlobalAlignment(rotation_axis=2)
+        data_info['axis_align_matrix'] = np.array(
+            [[0.945519, 0.325568, 0., -5.38439],
+             [-0.325568, 0.945519, 0., -2.87178], [0., 0., 1., -0.06435],
+             [0., 0., 0., 1.]],
+            dtype=np.float32)
+        global_align_transform(data_info)
+
+        data_info['axis_align_matrix'] = np.array(
+            [[0.945519, 0.325568, 0., -5.38439], [0, 2, 0., -2.87178],
+             [0., 0., 1., -0.06435], [0., 0., 0., 1.]],
+            dtype=np.float32)
+        # assert the rot metric
+        with self.assertRaises(AssertionError):
+            global_align_transform(data_info)
+
+
+class TestPolarMix(unittest.TestCase):
+
+    def setUp(self):
+        points = np.random.random((100, 4))
+        self.results = {
+            'points': BasePoints(points, points_dim=4),
+            'pts_semantic_mask': np.random.randint(0, 5, (100, ))
+        }
+
+    def test_transform(self):
+        # test assertion for invalid instance_classes
+        with self.assertRaises(AssertionError):
+            transform = PolarMix(instance_classes=1)
+
+        with self.assertRaises(AssertionError):
+            transform = PolarMix(instance_classes=[1.0, 2.0])
+
+        transform = PolarMix(instance_classes=[1, 2], swap_ratio=1.0)
+        # test assertion for invalid mix_results
+        with self.assertRaises(AssertionError):
+            results = transform(copy.deepcopy(self.results))
+
+        with self.assertRaises(AssertionError):
+            self.results['mix_results'] = [copy.deepcopy(self.results)] * 2
+            results = transform(copy.deepcopy(self.results))
+
+        self.results['mix_results'] = [copy.deepcopy(self.results)]
+        results = transform(copy.deepcopy(self.results))
+        self.assertTrue(results['points'].shape[0] ==
+                        results['pts_semantic_mask'].shape[0])

From 51a0ead0ad02f98ee73b0964595b32bbed431b88 Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Mon, 13 Feb 2023 11:08:25 +0800
Subject: [PATCH 06/22] Update transforms_3d.py

---
 mmdet3d/datasets/transforms/transforms_3d.py | 2613 +++++++++++++++++-
 1 file changed, 2483 insertions(+), 130 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 59fda6f834..a729f5739c 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -1,134 +1,2487 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import copy
-import unittest
+import random
+import warnings
+from typing import List, Optional, Tuple, Union
 
+import cv2
+import mmcv
 import numpy as np
 import torch
-from mmengine.testing import assert_allclose
-
-from mmdet3d.datasets import GlobalAlignment, RandomFlip3D
-from mmdet3d.datasets.transforms import GlobalRotScaleTrans, PolarMix
-from mmdet3d.structures import BasePoints
-from mmdet3d.testing import create_data_info_after_loading
-
-
-class TestGlobalRotScaleTrans(unittest.TestCase):
-
-    def test_globle_rotation_scale_trans(self):
-        rot_trans = GlobalRotScaleTrans(
-            rot_range=[-0.78, 0.78], scale_ratio_range=[1, 1])
-        scale_trans = GlobalRotScaleTrans(
-            rot_range=[0, 0], scale_ratio_range=[0.95, 1.05])
-
-        ori_data_info = create_data_info_after_loading()
-
-        data_info = copy.deepcopy(ori_data_info)
-        rot_data_info = rot_trans(data_info)
-        self.assertIn('pcd_rotation', rot_data_info)
-        self.assertIn('pcd_rotation_angle', rot_data_info)
-        self.assertIn('pcd_scale_factor', rot_data_info)
-        self.assertEqual(rot_data_info['pcd_scale_factor'], 1)
-        self.assertIs(-0.79 < rot_data_info['pcd_rotation_angle'] < 0.79, True)
-
-        # assert the rot angle should in rot_range
-        before_rot_gt_bbox_3d = ori_data_info['gt_bboxes_3d']
-        after_rot_gt_bbox_3d = rot_data_info['gt_bboxes_3d']
-        assert (after_rot_gt_bbox_3d.tensor[:, -1] -
-                before_rot_gt_bbox_3d.tensor[:, -1]).abs().max() < 0.79
-
-        data_info = copy.deepcopy(ori_data_info)
-        scale_data_info = scale_trans(data_info)
-        # assert the rot angle should in rot_range
-        before_scale_gt_bbox_3d = ori_data_info['gt_bboxes_3d'].tensor
-        after_scale_gt_bbox_3d = scale_data_info['gt_bboxes_3d'].tensor
-        before_scale_points = ori_data_info['points'].tensor
-        after_scale_points = scale_data_info['points'].tensor
-        self.assertEqual(scale_data_info['pcd_rotation_angle'], 0)
-        # assert  scale_factor range
-        assert (0.94 < (after_scale_points / before_scale_points)).all()
-        assert (1.06 >
-                (after_scale_gt_bbox_3d / before_scale_gt_bbox_3d)).all()
-
-
-class TestRandomFlip3D(unittest.TestCase):
-
-    def test_random_flip3d(self):
-        ori_data_info = create_data_info_after_loading()
-        no_flip_transform = RandomFlip3D(flip_ratio_bev_horizontal=0.)
-        always_flip_transform = RandomFlip3D(flip_ratio_bev_horizontal=1.)
-        data_info = copy.deepcopy(ori_data_info)
-        data_info = no_flip_transform(data_info)
-        self.assertIn('pcd_horizontal_flip', data_info)
-        assert_allclose(data_info['points'].tensor,
-                        ori_data_info['points'].tensor)
-
-        torch.allclose(data_info['gt_bboxes_3d'].tensor,
-                       ori_data_info['gt_bboxes_3d'].tensor)
-        data_info = copy.deepcopy(ori_data_info)
-        data_info = always_flip_transform(data_info)
-        assert_allclose(data_info['points'].tensor[:, 0],
-                        ori_data_info['points'].tensor[:, 0])
-        assert_allclose(data_info['points'].tensor[:, 1],
-                        -ori_data_info['points'].tensor[:, 1])
-        assert_allclose(data_info['points'].tensor[:, 2],
-                        ori_data_info['points'].tensor[:, 2])
-
-        assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 0],
-                        ori_data_info['gt_bboxes_3d'].tensor[:, 0])
-        assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 1],
-                        -ori_data_info['gt_bboxes_3d'].tensor[:, 1])
-        assert_allclose(data_info['gt_bboxes_3d'].tensor[:, 2],
-                        ori_data_info['gt_bboxes_3d'].tensor[:, 2])
-
-
-class TestGlobalAlignment(unittest.TestCase):
-
-    def test_global_alignment(self):
-        data_info = create_data_info_after_loading()
-        global_align_transform = GlobalAlignment(rotation_axis=2)
-        data_info['axis_align_matrix'] = np.array(
-            [[0.945519, 0.325568, 0., -5.38439],
-             [-0.325568, 0.945519, 0., -2.87178], [0., 0., 1., -0.06435],
-             [0., 0., 0., 1.]],
-            dtype=np.float32)
-        global_align_transform(data_info)
-
-        data_info['axis_align_matrix'] = np.array(
-            [[0.945519, 0.325568, 0., -5.38439], [0, 2, 0., -2.87178],
-             [0., 0., 1., -0.06435], [0., 0., 0., 1.]],
-            dtype=np.float32)
-        # assert the rot metric
-        with self.assertRaises(AssertionError):
-            global_align_transform(data_info)
-
-
-class TestPolarMix(unittest.TestCase):
-
-    def setUp(self):
-        points = np.random.random((100, 4))
-        self.results = {
-            'points': BasePoints(points, points_dim=4),
-            'pts_semantic_mask': np.random.randint(0, 5, (100, ))
-        }
-
-    def test_transform(self):
-        # test assertion for invalid instance_classes
-        with self.assertRaises(AssertionError):
-            transform = PolarMix(instance_classes=1)
-
-        with self.assertRaises(AssertionError):
-            transform = PolarMix(instance_classes=[1.0, 2.0])
-
-        transform = PolarMix(instance_classes=[1, 2], swap_ratio=1.0)
-        # test assertion for invalid mix_results
-        with self.assertRaises(AssertionError):
-            results = transform(copy.deepcopy(self.results))
-
-        with self.assertRaises(AssertionError):
-            self.results['mix_results'] = [copy.deepcopy(self.results)] * 2
-            results = transform(copy.deepcopy(self.results))
-
-        self.results['mix_results'] = [copy.deepcopy(self.results)]
-        results = transform(copy.deepcopy(self.results))
-        self.assertTrue(results['points'].shape[0] ==
-                        results['pts_semantic_mask'].shape[0])
+from mmcv.transforms import BaseTransform, Compose, RandomResize, Resize
+from mmdet.datasets.transforms import (PhotoMetricDistortion, RandomCrop,
+                                       RandomFlip)
+from mmengine import is_list_of, is_tuple_of
+from mmengine.dataset import BaseDataset
+
+from mmdet3d.models.task_modules import VoxelGenerator
+from mmdet3d.registry import TRANSFORMS
+from mmdet3d.structures import (CameraInstance3DBoxes, DepthInstance3DBoxes,
+                                LiDARInstance3DBoxes)
+from mmdet3d.structures.ops import box_np_ops
+from mmdet3d.structures.points import BasePoints
+from .data_augment_utils import noise_per_object_v3_
+
+
+@TRANSFORMS.register_module()
+class RandomDropPointsColor(BaseTransform):
+    r"""Randomly set the color of points to all zeros.
+
+    Once this transform is executed, all the points' color will be dropped.
+    Refer to `PAConv <https://github.com/CVMI-Lab/PAConv/blob/main/scene_seg/
+    util/transform.py#L223>`_ for more details.
+
+    Args:
+        drop_ratio (float): The probability of dropping point colors.
+            Defaults to 0.2.
+    """
+
+    def __init__(self, drop_ratio: float = 0.2) -> None:
+        assert isinstance(drop_ratio, (int, float)) and 0 <= drop_ratio <= 1, \
+            f'invalid drop_ratio value {drop_ratio}'
+        self.drop_ratio = drop_ratio
+
+    def transform(self, input_dict: dict) -> dict:
+        """Call function to drop point colors.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after color dropping, 'points' key is updated
+            in the result dict.
+        """
+        points = input_dict['points']
+        assert points.attribute_dims is not None and \
+            'color' in points.attribute_dims, \
+            'Expect points have color attribute'
+
+        # this if-expression is a bit strange
+        # `RandomDropPointsColor` is used in training 3D segmentor PAConv
+        # we discovered in our experiments that, using
+        # `if np.random.rand() > 1.0 - self.drop_ratio` consistently leads to
+        # better results than using `if np.random.rand() < self.drop_ratio`
+        # so we keep this hack in our codebase
+        if np.random.rand() > 1.0 - self.drop_ratio:
+            points.color = points.color * 0.0
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(drop_ratio={self.drop_ratio})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class RandomFlip3D(RandomFlip):
+    """Flip the points & bbox.
+
+    If the input dict contains the key "flip", then the flag will be used,
+    otherwise it will be randomly decided by a ratio specified in the init
+    method.
+
+    Required Keys:
+
+    - points (np.float32)
+    - gt_bboxes_3d (np.float32)
+
+    Modified Keys:
+
+    - points (np.float32)
+    - gt_bboxes_3d (np.float32)
+
+    Added Keys:
+
+    - points (np.float32)
+    - pcd_trans (np.float32)
+    - pcd_rotation (np.float32)
+    - pcd_rotation_angle (np.float32)
+    - pcd_scale_factor (np.float32)
+
+    Args:
+        sync_2d (bool): Whether to apply flip according to the 2D
+            images. If True, it will apply the same flip as that to 2D images.
+            If False, it will decide whether to flip randomly and independently
+            to that of 2D images. Defaults to True.
+        flip_ratio_bev_horizontal (float): The flipping probability
+            in horizontal direction. Defaults to 0.0.
+        flip_ratio_bev_vertical (float): The flipping probability
+            in vertical direction. Defaults to 0.0.
+        flip_box3d (bool): Whether to flip bounding box. In most of the case,
+            the box should be fliped. In cam-based bev detection, this is set
+            to False, since the flip of 2D images does not influence the 3D
+            box. Defaults to True.
+    """
+
+    def __init__(self,
+                 sync_2d: bool = True,
+                 flip_ratio_bev_horizontal: float = 0.0,
+                 flip_ratio_bev_vertical: float = 0.0,
+                 flip_box3d: bool = True,
+                 **kwargs) -> None:
+        # `flip_ratio_bev_horizontal` is equal to
+        # for flip prob of 2d image when
+        # `sync_2d` is True
+        super(RandomFlip3D, self).__init__(
+            prob=flip_ratio_bev_horizontal, direction='horizontal', **kwargs)
+        self.sync_2d = sync_2d
+        self.flip_ratio_bev_horizontal = flip_ratio_bev_horizontal
+        self.flip_ratio_bev_vertical = flip_ratio_bev_vertical
+        self.flip_box3d = flip_box3d
+        if flip_ratio_bev_horizontal is not None:
+            assert isinstance(
+                flip_ratio_bev_horizontal,
+                (int, float)) and 0 <= flip_ratio_bev_horizontal <= 1
+        if flip_ratio_bev_vertical is not None:
+            assert isinstance(
+                flip_ratio_bev_vertical,
+                (int, float)) and 0 <= flip_ratio_bev_vertical <= 1
+
+    def random_flip_data_3d(self,
+                            input_dict: dict,
+                            direction: str = 'horizontal') -> None:
+        """Flip 3D data randomly.
+
+        `random_flip_data_3d` should take these situations into consideration:
+
+        - 1. LIDAR-based 3d detection
+        - 2. LIDAR-based 3d segmentation
+        - 3. vision-only detection
+        - 4. multi-modality 3d detection.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+            direction (str): Flip direction. Defaults to 'horizontal'.
+
+        Returns:
+            dict: Flipped results, 'points', 'bbox3d_fields' keys are
+            updated in the result dict.
+        """
+        assert direction in ['horizontal', 'vertical']
+        if self.flip_box3d:
+            if 'gt_bboxes_3d' in input_dict:
+                if 'points' in input_dict:
+                    input_dict['points'] = input_dict['gt_bboxes_3d'].flip(
+                        direction, points=input_dict['points'])
+                else:
+                    # vision-only detection
+                    input_dict['gt_bboxes_3d'].flip(direction)
+            else:
+                input_dict['points'].flip(direction)
+
+        if 'centers_2d' in input_dict:
+            assert self.sync_2d is True and direction == 'horizontal', \
+                'Only support sync_2d=True and horizontal flip with images'
+            w = input_dict['img_shape'][1]
+            input_dict['centers_2d'][..., 0] = \
+                w - input_dict['centers_2d'][..., 0]
+            # need to modify the horizontal position of camera center
+            # along u-axis in the image (flip like centers2d)
+            # ['cam2img'][0][2] = c_u
+            # see more details and examples at
+            # https://github.com/open-mmlab/mmdetection3d/pull/744
+            input_dict['cam2img'][0][2] = w - input_dict['cam2img'][0][2]
+
+    def _flip_on_direction(self, results: dict) -> None:
+        """Function to flip images, bounding boxes, semantic segmentation map
+        and keypoints.
+
+        Add the override feature that if 'flip' is already in results, use it
+        to do the augmentation.
+        """
+        if 'flip' not in results:
+            cur_dir = self._choose_direction()
+        else:
+            cur_dir = results['flip_direction']
+        if cur_dir is None:
+            results['flip'] = False
+            results['flip_direction'] = None
+        else:
+            results['flip'] = True
+            results['flip_direction'] = cur_dir
+            self._flip(results)
+
+    def transform(self, input_dict: dict) -> dict:
+        """Call function to flip points, values in the ``bbox3d_fields`` and
+        also flip 2D image and its annotations.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Flipped results, 'flip', 'flip_direction',
+            'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added
+            into result dict.
+        """
+        # flip 2D image and its annotations
+        if 'img' in input_dict:
+            super(RandomFlip3D, self).transform(input_dict)
+
+        if self.sync_2d and 'img' in input_dict:
+            input_dict['pcd_horizontal_flip'] = input_dict['flip']
+            input_dict['pcd_vertical_flip'] = False
+        else:
+            if 'pcd_horizontal_flip' not in input_dict:
+                flip_horizontal = True if np.random.rand(
+                ) < self.flip_ratio_bev_horizontal else False
+                input_dict['pcd_horizontal_flip'] = flip_horizontal
+            if 'pcd_vertical_flip' not in input_dict:
+                flip_vertical = True if np.random.rand(
+                ) < self.flip_ratio_bev_vertical else False
+                input_dict['pcd_vertical_flip'] = flip_vertical
+
+        if 'transformation_3d_flow' not in input_dict:
+            input_dict['transformation_3d_flow'] = []
+
+        if input_dict['pcd_horizontal_flip']:
+            self.random_flip_data_3d(input_dict, 'horizontal')
+            input_dict['transformation_3d_flow'].extend(['HF'])
+        if input_dict['pcd_vertical_flip']:
+            self.random_flip_data_3d(input_dict, 'vertical')
+            input_dict['transformation_3d_flow'].extend(['VF'])
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(sync_2d={self.sync_2d},'
+        repr_str += f' flip_ratio_bev_vertical={self.flip_ratio_bev_vertical})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class RandomJitterPoints(BaseTransform):
+    """Randomly jitter point coordinates.
+
+    Different from the global translation in ``GlobalRotScaleTrans``, here we
+    apply different noises to each point in a scene.
+
+    Args:
+        jitter_std (list[float]): The standard deviation of jittering noise.
+            This applies random noise to all points in a 3D scene, which is
+            sampled from a gaussian distribution whose standard deviation is
+            set by ``jitter_std``. Defaults to [0.01, 0.01, 0.01]
+        clip_range (list[float]): Clip the randomly generated jitter
+            noise into this range. If None is given, don't perform clipping.
+            Defaults to [-0.05, 0.05]
+
+    Note:
+        This transform should only be used in point cloud segmentation tasks
+        because we don't transform ground-truth bboxes accordingly.
+        For similar transform in detection task, please refer to `ObjectNoise`.
+    """
+
+    def __init__(self,
+                 jitter_std: List[float] = [0.01, 0.01, 0.01],
+                 clip_range: List[float] = [-0.05, 0.05]) -> None:
+        seq_types = (list, tuple, np.ndarray)
+        if not isinstance(jitter_std, seq_types):
+            assert isinstance(jitter_std, (int, float)), \
+                f'unsupported jitter_std type {type(jitter_std)}'
+            jitter_std = [jitter_std, jitter_std, jitter_std]
+        self.jitter_std = jitter_std
+
+        if clip_range is not None:
+            if not isinstance(clip_range, seq_types):
+                assert isinstance(clip_range, (int, float)), \
+                    f'unsupported clip_range type {type(clip_range)}'
+                clip_range = [-clip_range, clip_range]
+        self.clip_range = clip_range
+
+    def transform(self, input_dict: dict) -> dict:
+        """Call function to jitter all the points in the scene.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after adding noise to each point,
+            'points' key is updated in the result dict.
+        """
+        points = input_dict['points']
+        jitter_std = np.array(self.jitter_std, dtype=np.float32)
+        jitter_noise = \
+            np.random.randn(points.shape[0], 3) * jitter_std[None, :]
+        if self.clip_range is not None:
+            jitter_noise = np.clip(jitter_noise, self.clip_range[0],
+                                   self.clip_range[1])
+
+        points.translate(jitter_noise)
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(jitter_std={self.jitter_std},'
+        repr_str += f' clip_range={self.clip_range})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class ObjectSample(BaseTransform):
+    """Sample GT objects to the data.
+
+    Required Keys:
+
+    - points
+    - ann_info
+    - gt_bboxes_3d
+    - gt_labels_3d
+    - img (optional)
+    - gt_bboxes (optional)
+
+    Modified Keys:
+
+    - points
+    - gt_bboxes_3d
+    - gt_labels_3d
+    - img (optional)
+    - gt_bboxes (optional)
+
+    Added Keys:
+
+    - plane (optional)
+
+    Args:
+        db_sampler (dict): Config dict of the database sampler.
+        sample_2d (bool): Whether to also paste 2D image patch to the images.
+            This should be true when applying multi-modality cut-and-paste.
+            Defaults to False.
+        use_ground_plane (bool): Whether to use ground plane to adjust the
+            3D labels. Defaults to False.
+    """
+
+    def __init__(self,
+                 db_sampler: dict,
+                 sample_2d: bool = False,
+                 use_ground_plane: bool = False) -> None:
+        self.sampler_cfg = db_sampler
+        self.sample_2d = sample_2d
+        if 'type' not in db_sampler.keys():
+            db_sampler['type'] = 'DataBaseSampler'
+        self.db_sampler = TRANSFORMS.build(db_sampler)
+        self.use_ground_plane = use_ground_plane
+        self.disabled = False
+
+    @staticmethod
+    def remove_points_in_boxes(points: BasePoints,
+                               boxes: np.ndarray) -> np.ndarray:
+        """Remove the points in the sampled bounding boxes.
+
+        Args:
+            points (:obj:`BasePoints`): Input point cloud array.
+            boxes (np.ndarray): Sampled ground truth boxes.
+
+        Returns:
+            np.ndarray: Points with those in the boxes removed.
+        """
+        masks = box_np_ops.points_in_rbbox(points.coord.numpy(), boxes)
+        points = points[np.logical_not(masks.any(-1))]
+        return points
+
+    def transform(self, input_dict: dict) -> dict:
+        """Transform function to sample ground truth objects to the data.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after object sampling augmentation,
+            'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated
+            in the result dict.
+        """
+        if self.disabled:
+            return input_dict
+
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        gt_labels_3d = input_dict['gt_labels_3d']
+
+        if self.use_ground_plane:
+            ground_plane = input_dict.get('plane', None)
+            assert ground_plane is not None, '`use_ground_plane` is True ' \
+                                             'but find plane is None'
+        else:
+            ground_plane = None
+        # change to float for blending operation
+        points = input_dict['points']
+        if self.sample_2d:
+            img = input_dict['img']
+            gt_bboxes_2d = input_dict['gt_bboxes']
+            # Assume for now 3D & 2D bboxes are the same
+            sampled_dict = self.db_sampler.sample_all(
+                gt_bboxes_3d.tensor.numpy(),
+                gt_labels_3d,
+                gt_bboxes_2d=gt_bboxes_2d,
+                img=img)
+        else:
+            sampled_dict = self.db_sampler.sample_all(
+                gt_bboxes_3d.tensor.numpy(),
+                gt_labels_3d,
+                img=None,
+                ground_plane=ground_plane)
+
+        if sampled_dict is not None:
+            sampled_gt_bboxes_3d = sampled_dict['gt_bboxes_3d']
+            sampled_points = sampled_dict['points']
+            sampled_gt_labels = sampled_dict['gt_labels_3d']
+
+            gt_labels_3d = np.concatenate([gt_labels_3d, sampled_gt_labels],
+                                          axis=0)
+            gt_bboxes_3d = gt_bboxes_3d.new_box(
+                np.concatenate(
+                    [gt_bboxes_3d.tensor.numpy(), sampled_gt_bboxes_3d]))
+
+            points = self.remove_points_in_boxes(points, sampled_gt_bboxes_3d)
+            # check the points dimension
+            points = points.cat([sampled_points, points])
+
+            if self.sample_2d:
+                sampled_gt_bboxes_2d = sampled_dict['gt_bboxes_2d']
+                gt_bboxes_2d = np.concatenate(
+                    [gt_bboxes_2d, sampled_gt_bboxes_2d]).astype(np.float32)
+
+                input_dict['gt_bboxes'] = gt_bboxes_2d
+                input_dict['img'] = sampled_dict['img']
+
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
+        input_dict['gt_labels_3d'] = gt_labels_3d.astype(np.int64)
+        input_dict['points'] = points
+
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(db_sampler={self.db_sampler},'
+        repr_str += f' sample_2d={self.sample_2d},'
+        repr_str += f' use_ground_plane={self.use_ground_plane})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class ObjectNoise(BaseTransform):
+    """Apply noise to each GT objects in the scene.
+
+    Required Keys:
+
+    - points
+    - gt_bboxes_3d
+
+    Modified Keys:
+
+    - points
+    - gt_bboxes_3d
+
+    Args:
+        translation_std (list[float]): Standard deviation of the
+            distribution where translation noise are sampled from.
+            Defaults to [0.25, 0.25, 0.25].
+        global_rot_range (list[float]): Global rotation to the scene.
+            Defaults to [0.0, 0.0].
+        rot_range (list[float]): Object rotation range.
+            Defaults to [-0.15707963267, 0.15707963267].
+        num_try (int): Number of times to try if the noise applied is invalid.
+            Defaults to 100.
+    """
+
+    def __init__(self,
+                 translation_std: List[float] = [0.25, 0.25, 0.25],
+                 global_rot_range: List[float] = [0.0, 0.0],
+                 rot_range: List[float] = [-0.15707963267, 0.15707963267],
+                 num_try: int = 100) -> None:
+        self.translation_std = translation_std
+        self.global_rot_range = global_rot_range
+        self.rot_range = rot_range
+        self.num_try = num_try
+
+    def transform(self, input_dict: dict) -> dict:
+        """Transform function to apply noise to each ground truth in the scene.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after adding noise to each object,
+            'points', 'gt_bboxes_3d' keys are updated in the result dict.
+        """
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        points = input_dict['points']
+
+        # TODO: this is inplace operation
+        numpy_box = gt_bboxes_3d.tensor.numpy()
+        numpy_points = points.tensor.numpy()
+
+        noise_per_object_v3_(
+            numpy_box,
+            numpy_points,
+            rotation_perturb=self.rot_range,
+            center_noise_std=self.translation_std,
+            global_random_rot_range=self.global_rot_range,
+            num_try=self.num_try)
+
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d.new_box(numpy_box)
+        input_dict['points'] = points.new_point(numpy_points)
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(num_try={self.num_try},'
+        repr_str += f' translation_std={self.translation_std},'
+        repr_str += f' global_rot_range={self.global_rot_range},'
+        repr_str += f' rot_range={self.rot_range})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class GlobalAlignment(BaseTransform):
+    """Apply global alignment to 3D scene points by rotation and translation.
+
+    Args:
+        rotation_axis (int): Rotation axis for points and bboxes rotation.
+
+    Note:
+        We do not record the applied rotation and translation as in
+        GlobalRotScaleTrans. Because usually, we do not need to reverse
+        the alignment step.
+        For example, ScanNet 3D detection task uses aligned ground-truth
+        bounding boxes for evaluation.
+    """
+
+    def __init__(self, rotation_axis: int) -> None:
+        self.rotation_axis = rotation_axis
+
+    def _trans_points(self, results: dict, trans_factor: np.ndarray) -> None:
+        """Private function to translate points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+            trans_factor (np.ndarray): Translation vector to be applied.
+
+        Returns:
+            dict: Results after translation, 'points' is updated in the dict.
+        """
+        results['points'].translate(trans_factor)
+
+    def _rot_points(self, results: dict, rot_mat: np.ndarray) -> None:
+        """Private function to rotate bounding boxes and points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+            rot_mat (np.ndarray): Rotation matrix to be applied.
+
+        Returns:
+            dict: Results after rotation, 'points' is updated in the dict.
+        """
+        # input should be rot_mat_T so I transpose it here
+        results['points'].rotate(rot_mat.T)
+
+    def _check_rot_mat(self, rot_mat: np.ndarray) -> None:
+        """Check if rotation matrix is valid for self.rotation_axis.
+
+        Args:
+            rot_mat (np.ndarray): Rotation matrix to be applied.
+        """
+        is_valid = np.allclose(np.linalg.det(rot_mat), 1.0)
+        valid_array = np.zeros(3)
+        valid_array[self.rotation_axis] = 1.0
+        is_valid &= (rot_mat[self.rotation_axis, :] == valid_array).all()
+        is_valid &= (rot_mat[:, self.rotation_axis] == valid_array).all()
+        assert is_valid, f'invalid rotation matrix {rot_mat}'
+
+    def transform(self, results: dict) -> dict:
+        """Call function to shuffle points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after global alignment, 'points' and keys in
+            input_dict['bbox3d_fields'] are updated in the result dict.
+        """
+        assert 'axis_align_matrix' in results, \
+            'axis_align_matrix is not provided in GlobalAlignment'
+
+        axis_align_matrix = results['axis_align_matrix']
+        assert axis_align_matrix.shape == (4, 4), \
+            f'invalid shape {axis_align_matrix.shape} for axis_align_matrix'
+        rot_mat = axis_align_matrix[:3, :3]
+        trans_vec = axis_align_matrix[:3, -1]
+
+        self._check_rot_mat(rot_mat)
+        self._rot_points(results, rot_mat)
+        self._trans_points(results, trans_vec)
+
+        return results
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(rotation_axis={self.rotation_axis})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class GlobalRotScaleTrans(BaseTransform):
+    """Apply global rotation, scaling and translation to a 3D scene.
+
+    Required Keys:
+
+    - points (np.float32)
+    - gt_bboxes_3d (np.float32)
+
+    Modified Keys:
+
+    - points (np.float32)
+    - gt_bboxes_3d (np.float32)
+
+    Added Keys:
+
+    - points (np.float32)
+    - pcd_trans (np.float32)
+    - pcd_rotation (np.float32)
+    - pcd_rotation_angle (np.float32)
+    - pcd_scale_factor (np.float32)
+
+    Args:
+        rot_range (list[float]): Range of rotation angle.
+            Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]).
+        scale_ratio_range (list[float]): Range of scale ratio.
+            Defaults to [0.95, 1.05].
+        translation_std (list[float]): The standard deviation of
+            translation noise applied to a scene, which
+            is sampled from a gaussian distribution whose standard deviation
+            is set by ``translation_std``. Defaults to [0, 0, 0].
+        shift_height (bool): Whether to shift height.
+            (the fourth dimension of indoor points) when scaling.
+            Defaults to False.
+    """
+
+    def __init__(self,
+                 rot_range: List[float] = [-0.78539816, 0.78539816],
+                 scale_ratio_range: List[float] = [0.95, 1.05],
+                 translation_std: List[int] = [0, 0, 0],
+                 shift_height: bool = False) -> None:
+        seq_types = (list, tuple, np.ndarray)
+        if not isinstance(rot_range, seq_types):
+            assert isinstance(rot_range, (int, float)), \
+                f'unsupported rot_range type {type(rot_range)}'
+            rot_range = [-rot_range, rot_range]
+        self.rot_range = rot_range
+
+        assert isinstance(scale_ratio_range, seq_types), \
+            f'unsupported scale_ratio_range type {type(scale_ratio_range)}'
+
+        self.scale_ratio_range = scale_ratio_range
+
+        if not isinstance(translation_std, seq_types):
+            assert isinstance(translation_std, (int, float)), \
+                f'unsupported translation_std type {type(translation_std)}'
+            translation_std = [
+                translation_std, translation_std, translation_std
+            ]
+        assert all([std >= 0 for std in translation_std]), \
+            'translation_std should be positive'
+        self.translation_std = translation_std
+        self.shift_height = shift_height
+
+    def _trans_bbox_points(self, input_dict: dict) -> None:
+        """Private function to translate bounding boxes and points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after translation, 'points', 'pcd_trans'
+            and `gt_bboxes_3d` is updated in the result dict.
+        """
+        translation_std = np.array(self.translation_std, dtype=np.float32)
+        trans_factor = np.random.normal(scale=translation_std, size=3).T
+
+        input_dict['points'].translate(trans_factor)
+        input_dict['pcd_trans'] = trans_factor
+        if 'gt_bboxes_3d' in input_dict:
+            input_dict['gt_bboxes_3d'].translate(trans_factor)
+
+    def _rot_bbox_points(self, input_dict: dict) -> None:
+        """Private function to rotate bounding boxes and points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after rotation, 'points', 'pcd_rotation'
+            and `gt_bboxes_3d` is updated in the result dict.
+        """
+        rotation = self.rot_range
+        noise_rotation = np.random.uniform(rotation[0], rotation[1])
+
+        if 'gt_bboxes_3d' in input_dict and \
+                len(input_dict['gt_bboxes_3d'].tensor) != 0:
+            # rotate points with bboxes
+            points, rot_mat_T = input_dict['gt_bboxes_3d'].rotate(
+                noise_rotation, input_dict['points'])
+            input_dict['points'] = points
+        else:
+            # if no bbox in input_dict, only rotate points
+            rot_mat_T = input_dict['points'].rotate(noise_rotation)
+
+        input_dict['pcd_rotation'] = rot_mat_T
+        input_dict['pcd_rotation_angle'] = noise_rotation
+
+    def _scale_bbox_points(self, input_dict: dict) -> None:
+        """Private function to scale bounding boxes and points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after scaling, 'points' and
+            `gt_bboxes_3d` is updated in the result dict.
+        """
+        scale = input_dict['pcd_scale_factor']
+        points = input_dict['points']
+        points.scale(scale)
+        if self.shift_height:
+            assert 'height' in points.attribute_dims.keys(), \
+                'setting shift_height=True but points have no height attribute'
+            points.tensor[:, points.attribute_dims['height']] *= scale
+        input_dict['points'] = points
+
+        if 'gt_bboxes_3d' in input_dict and \
+                len(input_dict['gt_bboxes_3d'].tensor) != 0:
+            input_dict['gt_bboxes_3d'].scale(scale)
+
+    def _random_scale(self, input_dict: dict) -> None:
+        """Private function to randomly set the scale factor.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after scaling, 'pcd_scale_factor'
+            are updated in the result dict.
+        """
+        scale_factor = np.random.uniform(self.scale_ratio_range[0],
+                                         self.scale_ratio_range[1])
+        input_dict['pcd_scale_factor'] = scale_factor
+
+    def transform(self, input_dict: dict) -> dict:
+        """Private function to rotate, scale and translate bounding boxes and
+        points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after scaling, 'points', 'pcd_rotation',
+            'pcd_scale_factor', 'pcd_trans' and `gt_bboxes_3d` are updated
+            in the result dict.
+        """
+        if 'transformation_3d_flow' not in input_dict:
+            input_dict['transformation_3d_flow'] = []
+
+        self._rot_bbox_points(input_dict)
+
+        if 'pcd_scale_factor' not in input_dict:
+            self._random_scale(input_dict)
+        self._scale_bbox_points(input_dict)
+
+        self._trans_bbox_points(input_dict)
+
+        input_dict['transformation_3d_flow'].extend(['R', 'S', 'T'])
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(rot_range={self.rot_range},'
+        repr_str += f' scale_ratio_range={self.scale_ratio_range},'
+        repr_str += f' translation_std={self.translation_std},'
+        repr_str += f' shift_height={self.shift_height})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class PointShuffle(BaseTransform):
+    """Shuffle input points."""
+
+    def transform(self, input_dict: dict) -> dict:
+        """Call function to shuffle points.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after filtering, 'points', 'pts_instance_mask'
+            and 'pts_semantic_mask' keys are updated in the result dict.
+        """
+        idx = input_dict['points'].shuffle()
+        idx = idx.numpy()
+
+        pts_instance_mask = input_dict.get('pts_instance_mask', None)
+        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
+
+        if pts_instance_mask is not None:
+            input_dict['pts_instance_mask'] = pts_instance_mask[idx]
+
+        if pts_semantic_mask is not None:
+            input_dict['pts_semantic_mask'] = pts_semantic_mask[idx]
+
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        return self.__class__.__name__
+
+
+@TRANSFORMS.register_module()
+class ObjectRangeFilter(BaseTransform):
+    """Filter objects by the range.
+
+    Required Keys:
+
+    - gt_bboxes_3d
+
+    Modified Keys:
+
+    - gt_bboxes_3d
+
+    Args:
+        point_cloud_range (list[float]): Point cloud range.
+    """
+
+    def __init__(self, point_cloud_range: List[float]) -> None:
+        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
+
+    def transform(self, input_dict: dict) -> dict:
+        """Transform function to filter objects by the range.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
+            keys are updated in the result dict.
+        """
+        # Check points instance type and initialise bev_range
+        if isinstance(input_dict['gt_bboxes_3d'],
+                      (LiDARInstance3DBoxes, DepthInstance3DBoxes)):
+            bev_range = self.pcd_range[[0, 1, 3, 4]]
+        elif isinstance(input_dict['gt_bboxes_3d'], CameraInstance3DBoxes):
+            bev_range = self.pcd_range[[0, 2, 3, 5]]
+
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+        gt_labels_3d = input_dict['gt_labels_3d']
+        mask = gt_bboxes_3d.in_range_bev(bev_range)
+        gt_bboxes_3d = gt_bboxes_3d[mask]
+        # mask is a torch tensor but gt_labels_3d is still numpy array
+        # using mask to index gt_labels_3d will cause bug when
+        # len(gt_labels_3d) == 1, where mask=1 will be interpreted
+        # as gt_labels_3d[1] and cause out of index error
+        gt_labels_3d = gt_labels_3d[mask.numpy().astype(np.bool)]
+
+        # limit rad to [-pi, pi]
+        gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi)
+        input_dict['gt_bboxes_3d'] = gt_bboxes_3d
+        input_dict['gt_labels_3d'] = gt_labels_3d
+
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(point_cloud_range={self.pcd_range.tolist()})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class PointsRangeFilter(BaseTransform):
+    """Filter points by the range.
+
+    Required Keys:
+
+    - points
+    - pts_instance_mask (optional)
+
+    Modified Keys:
+
+    - points
+    - pts_instance_mask (optional)
+
+    Args:
+        point_cloud_range (list[float]): Point cloud range.
+    """
+
+    def __init__(self, point_cloud_range: List[float]) -> None:
+        self.pcd_range = np.array(point_cloud_range, dtype=np.float32)
+
+    def transform(self, input_dict: dict) -> dict:
+        """Transform function to filter points by the range.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after filtering, 'points', 'pts_instance_mask'
+            and 'pts_semantic_mask' keys are updated in the result dict.
+        """
+        points = input_dict['points']
+        points_mask = points.in_range_3d(self.pcd_range)
+        clean_points = points[points_mask]
+        input_dict['points'] = clean_points
+        points_mask = points_mask.numpy()
+
+        pts_instance_mask = input_dict.get('pts_instance_mask', None)
+        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
+
+        if pts_instance_mask is not None:
+            input_dict['pts_instance_mask'] = pts_instance_mask[points_mask]
+
+        if pts_semantic_mask is not None:
+            input_dict['pts_semantic_mask'] = pts_semantic_mask[points_mask]
+
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(point_cloud_range={self.pcd_range.tolist()})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class ObjectNameFilter(BaseTransform):
+    """Filter GT objects by their names.
+
+    Required Keys:
+
+    - gt_labels_3d
+
+    Modified Keys:
+
+    - gt_labels_3d
+
+    Args:
+        classes (list[str]): List of class names to be kept for training.
+    """
+
+    def __init__(self, classes: List[str]) -> None:
+        self.classes = classes
+        self.labels = list(range(len(self.classes)))
+
+    def transform(self, input_dict: dict) -> dict:
+        """Transform function to filter objects by their names.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d'
+            keys are updated in the result dict.
+        """
+        gt_labels_3d = input_dict['gt_labels_3d']
+        gt_bboxes_mask = np.array([n in self.labels for n in gt_labels_3d],
+                                  dtype=np.bool_)
+        input_dict['gt_bboxes_3d'] = input_dict['gt_bboxes_3d'][gt_bboxes_mask]
+        input_dict['gt_labels_3d'] = input_dict['gt_labels_3d'][gt_bboxes_mask]
+
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(classes={self.classes})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class PointSample(BaseTransform):
+    """Point sample.
+
+    Sampling data to a certain number.
+
+    Required Keys:
+
+    - points
+    - pts_instance_mask (optional)
+    - pts_semantic_mask (optional)
+
+    Modified Keys:
+
+    - points
+    - pts_instance_mask (optional)
+    - pts_semantic_mask (optional)
+
+    Args:
+        num_points (int): Number of points to be sampled.
+        sample_range (float, optional): The range where to sample points.
+            If not None, the points with depth larger than `sample_range` are
+            prior to be sampled. Defaults to None.
+        replace (bool): Whether the sampling is with or without replacement.
+            Defaults to False.
+    """
+
+    def __init__(self,
+                 num_points: int,
+                 sample_range: Optional[float] = None,
+                 replace: bool = False) -> None:
+        self.num_points = num_points
+        self.sample_range = sample_range
+        self.replace = replace
+
+    def _points_random_sampling(
+        self,
+        points: BasePoints,
+        num_samples: int,
+        sample_range: Optional[float] = None,
+        replace: bool = False,
+        return_choices: bool = False
+    ) -> Union[Tuple[BasePoints, np.ndarray], BasePoints]:
+        """Points random sampling.
+
+        Sample points to a certain number.
+
+        Args:
+            points (:obj:`BasePoints`): 3D Points.
+            num_samples (int): Number of samples to be sampled.
+            sample_range (float, optional): Indicating the range where the
+                points will be sampled. Defaults to None.
+            replace (bool): Sampling with or without replacement.
+                Defaults to False.
+            return_choices (bool): Whether return choice. Defaults to False.
+
+        Returns:
+            tuple[:obj:`BasePoints`, np.ndarray] | :obj:`BasePoints`:
+
+                - points (:obj:`BasePoints`): 3D Points.
+                - choices (np.ndarray, optional): The generated random samples.
+        """
+        if not replace:
+            replace = (points.shape[0] < num_samples)
+        point_range = range(len(points))
+        if sample_range is not None and not replace:
+            # Only sampling the near points when len(points) >= num_samples
+            dist = np.linalg.norm(points.coord.numpy(), axis=1)
+            far_inds = np.where(dist >= sample_range)[0]
+            near_inds = np.where(dist < sample_range)[0]
+            # in case there are too many far points
+            if len(far_inds) > num_samples:
+                far_inds = np.random.choice(
+                    far_inds, num_samples, replace=False)
+            point_range = near_inds
+            num_samples -= len(far_inds)
+        choices = np.random.choice(point_range, num_samples, replace=replace)
+        if sample_range is not None and not replace:
+            choices = np.concatenate((far_inds, choices))
+            # Shuffle points after sampling
+            np.random.shuffle(choices)
+        if return_choices:
+            return points[choices], choices
+        else:
+            return points[choices]
+
+    def transform(self, input_dict: dict) -> dict:
+        """Transform function to sample points to in indoor scenes.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after sampling, 'points', 'pts_instance_mask'
+            and 'pts_semantic_mask' keys are updated in the result dict.
+        """
+        points = input_dict['points']
+        points, choices = self._points_random_sampling(
+            points,
+            self.num_points,
+            self.sample_range,
+            self.replace,
+            return_choices=True)
+        input_dict['points'] = points
+
+        pts_instance_mask = input_dict.get('pts_instance_mask', None)
+        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
+
+        if pts_instance_mask is not None:
+            pts_instance_mask = pts_instance_mask[choices]
+            input_dict['pts_instance_mask'] = pts_instance_mask
+
+        if pts_semantic_mask is not None:
+            pts_semantic_mask = pts_semantic_mask[choices]
+            input_dict['pts_semantic_mask'] = pts_semantic_mask
+
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(num_points={self.num_points},'
+        repr_str += f' sample_range={self.sample_range},'
+        repr_str += f' replace={self.replace})'
+
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class IndoorPointSample(PointSample):
+    """Indoor point sample.
+
+    Sampling data to a certain number.
+    NOTE: IndoorPointSample is deprecated in favor of PointSample
+
+    Args:
+        num_points (int): Number of points to be sampled.
+    """
+
+    def __init__(self, *args, **kwargs):
+        warnings.warn(
+            'IndoorPointSample is deprecated in favor of PointSample')
+        super(IndoorPointSample, self).__init__(*args, **kwargs)
+
+
+@TRANSFORMS.register_module()
+class IndoorPatchPointSample(BaseTransform):
+    r"""Indoor point sample within a patch. Modified from `PointNet++ <https://
+    github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py>`_.
+
+    Sampling data to a certain number for semantic segmentation.
+
+    Args:
+        num_points (int): Number of points to be sampled.
+        block_size (float): Size of a block to sample points from.
+            Defaults to 1.5.
+        sample_rate (float, optional): Stride used in sliding patch generation.
+            This parameter is unused in `IndoorPatchPointSample` and thus has
+            been deprecated. We plan to remove it in the future.
+            Defaults to None.
+        ignore_index (int, optional): Label index that won't be used for the
+            segmentation task. This is set in PointSegClassMapping as neg_cls.
+            If not None, will be used as a patch selection criterion.
+            Defaults to None.
+        use_normalized_coord (bool): Whether to use normalized xyz as
+            additional features. Defaults to False.
+        num_try (int): Number of times to try if the patch selected is invalid.
+            Defaults to 10.
+        enlarge_size (float): Enlarge the sampled patch to
+            [-block_size / 2 - enlarge_size, block_size / 2 + enlarge_size] as
+            an augmentation. If None, set it as 0. Defaults to 0.2.
+        min_unique_num (int, optional): Minimum number of unique points
+            the sampled patch should contain. If None, use PointNet++'s method
+            to judge uniqueness. Defaults to None.
+        eps (float): A value added to patch boundary to guarantee
+            points coverage. Defaults to 1e-2.
+
+    Note:
+        This transform should only be used in the training process of point
+        cloud segmentation tasks. For the sliding patch generation and
+        inference process in testing, please refer to the `slide_inference`
+        function of `EncoderDecoder3D` class.
+    """
+
+    def __init__(self,
+                 num_points: int,
+                 block_size: float = 1.5,
+                 sample_rate: Optional[float] = None,
+                 ignore_index: Optional[int] = None,
+                 use_normalized_coord: bool = False,
+                 num_try: int = 10,
+                 enlarge_size: float = 0.2,
+                 min_unique_num: Optional[int] = None,
+                 eps: float = 1e-2) -> None:
+        self.num_points = num_points
+        self.block_size = block_size
+        self.ignore_index = ignore_index
+        self.use_normalized_coord = use_normalized_coord
+        self.num_try = num_try
+        self.enlarge_size = enlarge_size if enlarge_size is not None else 0.0
+        self.min_unique_num = min_unique_num
+        self.eps = eps
+
+        if sample_rate is not None:
+            warnings.warn(
+                "'sample_rate' has been deprecated and will be removed in "
+                'the future. Please remove them from your code.')
+
+    def _input_generation(self, coords: np.ndarray, patch_center: np.ndarray,
+                          coord_max: np.ndarray, attributes: np.ndarray,
+                          attribute_dims: dict,
+                          point_type: type) -> BasePoints:
+        """Generating model input.
+
+        Generate input by subtracting patch center and adding additional
+            features. Currently support colors and normalized xyz as features.
+
+        Args:
+            coords (np.ndarray): Sampled 3D Points.
+            patch_center (np.ndarray): Center coordinate of the selected patch.
+            coord_max (np.ndarray): Max coordinate of all 3D Points.
+            attributes (np.ndarray): features of input points.
+            attribute_dims (dict): Dictionary to indicate the meaning of extra
+                dimension.
+            point_type (type): class of input points inherited from BasePoints.
+
+        Returns:
+            :obj:`BasePoints`: The generated input data.
+        """
+        # subtract patch center, the z dimension is not centered
+        centered_coords = coords.copy()
+        centered_coords[:, 0] -= patch_center[0]
+        centered_coords[:, 1] -= patch_center[1]
+
+        if self.use_normalized_coord:
+            normalized_coord = coords / coord_max
+            attributes = np.concatenate([attributes, normalized_coord], axis=1)
+            if attribute_dims is None:
+                attribute_dims = dict()
+            attribute_dims.update(
+                dict(normalized_coord=[
+                    attributes.shape[1], attributes.shape[1] +
+                    1, attributes.shape[1] + 2
+                ]))
+
+        points = np.concatenate([centered_coords, attributes], axis=1)
+        points = point_type(
+            points, points_dim=points.shape[1], attribute_dims=attribute_dims)
+
+        return points
+
+    def _patch_points_sampling(
+            self, points: BasePoints,
+            sem_mask: np.ndarray) -> Tuple[BasePoints, np.ndarray]:
+        """Patch points sampling.
+
+        First sample a valid patch.
+        Then sample points within that patch to a certain number.
+
+        Args:
+            points (:obj:`BasePoints`): 3D Points.
+            sem_mask (np.ndarray): semantic segmentation mask for input points.
+
+        Returns:
+            tuple[:obj:`BasePoints`, np.ndarray]:
+
+                - points (:obj:`BasePoints`): 3D Points.
+                - choices (np.ndarray): The generated random samples.
+        """
+        coords = points.coord.numpy()
+        attributes = points.tensor[:, 3:].numpy()
+        attribute_dims = points.attribute_dims
+        point_type = type(points)
+
+        coord_max = np.amax(coords, axis=0)
+        coord_min = np.amin(coords, axis=0)
+
+        for _ in range(self.num_try):
+            # random sample a point as patch center
+            cur_center = coords[np.random.choice(coords.shape[0])]
+
+            # boundary of a patch, which would be enlarged by
+            # `self.enlarge_size` as an augmentation
+            cur_max = cur_center + np.array(
+                [self.block_size / 2.0, self.block_size / 2.0, 0.0])
+            cur_min = cur_center - np.array(
+                [self.block_size / 2.0, self.block_size / 2.0, 0.0])
+            cur_max[2] = coord_max[2]
+            cur_min[2] = coord_min[2]
+            cur_choice = np.sum(
+                (coords >= (cur_min - self.enlarge_size)) *
+                (coords <= (cur_max + self.enlarge_size)),
+                axis=1) == 3
+
+            if not cur_choice.any():  # no points in this patch
+                continue
+
+            cur_coords = coords[cur_choice, :]
+            cur_sem_mask = sem_mask[cur_choice]
+            point_idxs = np.where(cur_choice)[0]
+            mask = np.sum(
+                (cur_coords >= (cur_min - self.eps)) * (cur_coords <=
+                                                        (cur_max + self.eps)),
+                axis=1) == 3
+
+            # two criteria for patch sampling, adopted from PointNet++
+            # 1. selected patch should contain enough unique points
+            if self.min_unique_num is None:
+                # use PointNet++'s method as default
+                # [31, 31, 62] are just some big values used to transform
+                # coords from 3d array to 1d and then check their uniqueness
+                # this is used in all the ScanNet code following PointNet++
+                vidx = np.ceil(
+                    (cur_coords[mask, :] - cur_min) / (cur_max - cur_min) *
+                    np.array([31.0, 31.0, 62.0]))
+                vidx = np.unique(vidx[:, 0] * 31.0 * 62.0 + vidx[:, 1] * 62.0 +
+                                 vidx[:, 2])
+                flag1 = len(vidx) / 31.0 / 31.0 / 62.0 >= 0.02
+            else:
+                # if `min_unique_num` is provided, directly compare with it
+                flag1 = mask.sum() >= self.min_unique_num
+
+            # 2. selected patch should contain enough annotated points
+            if self.ignore_index is None:
+                flag2 = True
+            else:
+                flag2 = np.sum(cur_sem_mask != self.ignore_index) / \
+                               len(cur_sem_mask) >= 0.7
+
+            if flag1 and flag2:
+                break
+
+        # sample idx to `self.num_points`
+        if point_idxs.size >= self.num_points:
+            # no duplicate in sub-sampling
+            choices = np.random.choice(
+                point_idxs, self.num_points, replace=False)
+        else:
+            # do not use random choice here to avoid some points not counted
+            dup = np.random.choice(point_idxs.size,
+                                   self.num_points - point_idxs.size)
+            idx_dup = np.concatenate(
+                [np.arange(point_idxs.size),
+                 np.array(dup)], 0)
+            choices = point_idxs[idx_dup]
+
+        # construct model input
+        points = self._input_generation(coords[choices], cur_center, coord_max,
+                                        attributes[choices], attribute_dims,
+                                        point_type)
+
+        return points, choices
+
+    def transform(self, input_dict: dict) -> dict:
+        """Call function to sample points to in indoor scenes.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after sampling, 'points', 'pts_instance_mask'
+            and 'pts_semantic_mask' keys are updated in the result dict.
+        """
+        points = input_dict['points']
+
+        assert 'pts_semantic_mask' in input_dict.keys(), \
+            'semantic mask should be provided in training and evaluation'
+        pts_semantic_mask = input_dict['pts_semantic_mask']
+
+        points, choices = self._patch_points_sampling(points,
+                                                      pts_semantic_mask)
+
+        input_dict['points'] = points
+        input_dict['pts_semantic_mask'] = pts_semantic_mask[choices]
+
+        # 'eval_ann_info' will be passed to evaluator
+        if 'eval_ann_info' in input_dict:
+            input_dict['eval_ann_info']['pts_semantic_mask'] = \
+                pts_semantic_mask[choices]
+
+        pts_instance_mask = input_dict.get('pts_instance_mask', None)
+
+        if pts_instance_mask is not None:
+            input_dict['pts_instance_mask'] = pts_instance_mask[choices]
+            # 'eval_ann_info' will be passed to evaluator
+            if 'eval_ann_info' in input_dict:
+                input_dict['eval_ann_info']['pts_instance_mask'] = \
+                    pts_instance_mask[choices]
+
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(num_points={self.num_points},'
+        repr_str += f' block_size={self.block_size},'
+        repr_str += f' ignore_index={self.ignore_index},'
+        repr_str += f' use_normalized_coord={self.use_normalized_coord},'
+        repr_str += f' num_try={self.num_try},'
+        repr_str += f' enlarge_size={self.enlarge_size},'
+        repr_str += f' min_unique_num={self.min_unique_num},'
+        repr_str += f' eps={self.eps})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class BackgroundPointsFilter(BaseTransform):
+    """Filter background points near the bounding box.
+
+    Args:
+        bbox_enlarge_range (tuple[float] | float): Bbox enlarge range.
+    """
+
+    def __init__(self, bbox_enlarge_range: Union[Tuple[float], float]) -> None:
+        assert (is_tuple_of(bbox_enlarge_range, float)
+                and len(bbox_enlarge_range) == 3) \
+            or isinstance(bbox_enlarge_range, float), \
+            f'Invalid arguments bbox_enlarge_range {bbox_enlarge_range}'
+
+        if isinstance(bbox_enlarge_range, float):
+            bbox_enlarge_range = [bbox_enlarge_range] * 3
+        self.bbox_enlarge_range = np.array(
+            bbox_enlarge_range, dtype=np.float32)[np.newaxis, :]
+
+    def transform(self, input_dict: dict) -> dict:
+        """Call function to filter points by the range.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after filtering, 'points', 'pts_instance_mask'
+            and 'pts_semantic_mask' keys are updated in the result dict.
+        """
+        points = input_dict['points']
+        gt_bboxes_3d = input_dict['gt_bboxes_3d']
+
+        # avoid groundtruth being modified
+        gt_bboxes_3d_np = gt_bboxes_3d.tensor.clone().numpy()
+        gt_bboxes_3d_np[:, :3] = gt_bboxes_3d.gravity_center.clone().numpy()
+
+        enlarged_gt_bboxes_3d = gt_bboxes_3d_np.copy()
+        enlarged_gt_bboxes_3d[:, 3:6] += self.bbox_enlarge_range
+        points_numpy = points.tensor.clone().numpy()
+        foreground_masks = box_np_ops.points_in_rbbox(
+            points_numpy, gt_bboxes_3d_np, origin=(0.5, 0.5, 0.5))
+        enlarge_foreground_masks = box_np_ops.points_in_rbbox(
+            points_numpy, enlarged_gt_bboxes_3d, origin=(0.5, 0.5, 0.5))
+        foreground_masks = foreground_masks.max(1)
+        enlarge_foreground_masks = enlarge_foreground_masks.max(1)
+        valid_masks = ~np.logical_and(~foreground_masks,
+                                      enlarge_foreground_masks)
+
+        input_dict['points'] = points[valid_masks]
+        pts_instance_mask = input_dict.get('pts_instance_mask', None)
+        if pts_instance_mask is not None:
+            input_dict['pts_instance_mask'] = pts_instance_mask[valid_masks]
+
+        pts_semantic_mask = input_dict.get('pts_semantic_mask', None)
+        if pts_semantic_mask is not None:
+            input_dict['pts_semantic_mask'] = pts_semantic_mask[valid_masks]
+        return input_dict
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(bbox_enlarge_range={self.bbox_enlarge_range.tolist()})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class VoxelBasedPointSampler(BaseTransform):
+    """Voxel based point sampler.
+
+    Apply voxel sampling to multiple sweep points.
+
+    Args:
+        cur_sweep_cfg (dict): Config for sampling current points.
+        prev_sweep_cfg (dict, optional): Config for sampling previous points.
+            Defaults to None.
+        time_dim (int): Index that indicate the time dimension
+            for input points. Defaults to 3.
+    """
+
+    def __init__(self,
+                 cur_sweep_cfg: dict,
+                 prev_sweep_cfg: Optional[dict] = None,
+                 time_dim: int = 3) -> None:
+        self.cur_voxel_generator = VoxelGenerator(**cur_sweep_cfg)
+        self.cur_voxel_num = self.cur_voxel_generator._max_voxels
+        self.time_dim = time_dim
+        if prev_sweep_cfg is not None:
+            assert prev_sweep_cfg['max_num_points'] == \
+                cur_sweep_cfg['max_num_points']
+            self.prev_voxel_generator = VoxelGenerator(**prev_sweep_cfg)
+            self.prev_voxel_num = self.prev_voxel_generator._max_voxels
+        else:
+            self.prev_voxel_generator = None
+            self.prev_voxel_num = 0
+
+    def _sample_points(self, points: np.ndarray, sampler: VoxelGenerator,
+                       point_dim: int) -> np.ndarray:
+        """Sample points for each points subset.
+
+        Args:
+            points (np.ndarray): Points subset to be sampled.
+            sampler (VoxelGenerator): Voxel based sampler for
+                each points subset.
+            point_dim (int): The dimension of each points.
+
+        Returns:
+            np.ndarray: Sampled points.
+        """
+        voxels, coors, num_points_per_voxel = sampler.generate(points)
+        if voxels.shape[0] < sampler._max_voxels:
+            padding_points = np.zeros([
+                sampler._max_voxels - voxels.shape[0], sampler._max_num_points,
+                point_dim
+            ],
+                                      dtype=points.dtype)
+            padding_points[:] = voxels[0]
+            sample_points = np.concatenate([voxels, padding_points], axis=0)
+        else:
+            sample_points = voxels
+
+        return sample_points
+
+    def transform(self, results: dict) -> dict:
+        """Call function to sample points from multiple sweeps.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after sampling, 'points', 'pts_instance_mask'
+            and 'pts_semantic_mask' keys are updated in the result dict.
+        """
+        points = results['points']
+        original_dim = points.shape[1]
+
+        # TODO: process instance and semantic mask while _max_num_points
+        # is larger than 1
+        # Extend points with seg and mask fields
+        map_fields2dim = []
+        start_dim = original_dim
+        points_numpy = points.tensor.numpy()
+        extra_channel = [points_numpy]
+        for idx, key in enumerate(results['pts_mask_fields']):
+            map_fields2dim.append((key, idx + start_dim))
+            extra_channel.append(results[key][..., None])
+
+        start_dim += len(results['pts_mask_fields'])
+        for idx, key in enumerate(results['pts_seg_fields']):
+            map_fields2dim.append((key, idx + start_dim))
+            extra_channel.append(results[key][..., None])
+
+        points_numpy = np.concatenate(extra_channel, axis=-1)
+
+        # Split points into two part, current sweep points and
+        # previous sweeps points.
+        # TODO: support different sampling methods for next sweeps points
+        # and previous sweeps points.
+        cur_points_flag = (points_numpy[:, self.time_dim] == 0)
+        cur_sweep_points = points_numpy[cur_points_flag]
+        prev_sweeps_points = points_numpy[~cur_points_flag]
+        if prev_sweeps_points.shape[0] == 0:
+            prev_sweeps_points = cur_sweep_points
+
+        # Shuffle points before sampling
+        np.random.shuffle(cur_sweep_points)
+        np.random.shuffle(prev_sweeps_points)
+
+        cur_sweep_points = self._sample_points(cur_sweep_points,
+                                               self.cur_voxel_generator,
+                                               points_numpy.shape[1])
+        if self.prev_voxel_generator is not None:
+            prev_sweeps_points = self._sample_points(prev_sweeps_points,
+                                                     self.prev_voxel_generator,
+                                                     points_numpy.shape[1])
+
+            points_numpy = np.concatenate(
+                [cur_sweep_points, prev_sweeps_points], 0)
+        else:
+            points_numpy = cur_sweep_points
+
+        if self.cur_voxel_generator._max_num_points == 1:
+            points_numpy = points_numpy.squeeze(1)
+        results['points'] = points.new_point(points_numpy[..., :original_dim])
+
+        # Restore the corresponding seg and mask fields
+        for key, dim_index in map_fields2dim:
+            results[key] = points_numpy[..., dim_index]
+
+        return results
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+
+        def _auto_indent(repr_str, indent):
+            repr_str = repr_str.split('\n')
+            repr_str = [' ' * indent + t + '\n' for t in repr_str]
+            repr_str = ''.join(repr_str)[:-1]
+            return repr_str
+
+        repr_str = self.__class__.__name__
+        indent = 4
+        repr_str += '(\n'
+        repr_str += ' ' * indent + f'num_cur_sweep={self.cur_voxel_num},\n'
+        repr_str += ' ' * indent + f'num_prev_sweep={self.prev_voxel_num},\n'
+        repr_str += ' ' * indent + f'time_dim={self.time_dim},\n'
+        repr_str += ' ' * indent + 'cur_voxel_generator=\n'
+        repr_str += f'{_auto_indent(repr(self.cur_voxel_generator), 8)},\n'
+        repr_str += ' ' * indent + 'prev_voxel_generator=\n'
+        repr_str += f'{_auto_indent(repr(self.prev_voxel_generator), 8)})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class AffineResize(BaseTransform):
+    """Get the affine transform matrices to the target size.
+
+    Different from :class:`RandomAffine` in MMDetection, this class can
+    calculate the affine transform matrices while resizing the input image
+    to a fixed size. The affine transform matrices include: 1) matrix
+    transforming original image to the network input image size. 2) matrix
+    transforming original image to the network output feature map size.
+
+    Args:
+        img_scale (tuple): Images scales for resizing.
+        down_ratio (int): The down ratio of feature map.
+            Actually the arg should be >= 1.
+        bbox_clip_border (bool): Whether clip the objects
+            outside the border of the image. Defaults to True.
+    """
+
+    def __init__(self,
+                 img_scale: Tuple,
+                 down_ratio: int,
+                 bbox_clip_border: bool = True) -> None:
+
+        self.img_scale = img_scale
+        self.down_ratio = down_ratio
+        self.bbox_clip_border = bbox_clip_border
+
+    def transform(self, results: dict) -> dict:
+        """Call function to do affine transform to input image and labels.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after affine resize, 'affine_aug', 'trans_mat'
+            keys are added in the result dict.
+        """
+        # The results have gone through RandomShiftScale before AffineResize
+        if 'center' not in results:
+            img = results['img']
+            height, width = img.shape[:2]
+            center = np.array([width / 2, height / 2], dtype=np.float32)
+            size = np.array([width, height], dtype=np.float32)
+            results['affine_aug'] = False
+        else:
+            # The results did not go through RandomShiftScale before
+            # AffineResize
+            img = results['img']
+            center = results['center']
+            size = results['size']
+
+        trans_affine = self._get_transform_matrix(center, size, self.img_scale)
+
+        img = cv2.warpAffine(img, trans_affine[:2, :], self.img_scale)
+
+        if isinstance(self.down_ratio, tuple):
+            trans_mat = [
+                self._get_transform_matrix(
+                    center, size,
+                    (self.img_scale[0] // ratio, self.img_scale[1] // ratio))
+                for ratio in self.down_ratio
+            ]  # (3, 3)
+        else:
+            trans_mat = self._get_transform_matrix(
+                center, size, (self.img_scale[0] // self.down_ratio,
+                               self.img_scale[1] // self.down_ratio))
+
+        results['img'] = img
+        results['img_shape'] = img.shape
+        results['pad_shape'] = img.shape
+        results['trans_mat'] = trans_mat
+
+        if 'gt_bboxes' in results:
+            self._affine_bboxes(results, trans_affine)
+
+        if 'centers_2d' in results:
+            centers2d = self._affine_transform(results['centers_2d'],
+                                               trans_affine)
+            valid_index = (centers2d[:, 0] >
+                           0) & (centers2d[:, 0] <
+                                 self.img_scale[0]) & (centers2d[:, 1] > 0) & (
+                                     centers2d[:, 1] < self.img_scale[1])
+            results['centers_2d'] = centers2d[valid_index]
+
+            if 'gt_bboxes' in results:
+                results['gt_bboxes'] = results['gt_bboxes'][valid_index]
+                if 'gt_bboxes_labels' in results:
+                    results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
+                        valid_index]
+                if 'gt_masks' in results:
+                    raise NotImplementedError(
+                        'AffineResize only supports bbox.')
+
+            if 'gt_bboxes_3d' in results:
+                results['gt_bboxes_3d'].tensor = results[
+                    'gt_bboxes_3d'].tensor[valid_index]
+                if 'gt_labels_3d' in results:
+                    results['gt_labels_3d'] = results['gt_labels_3d'][
+                        valid_index]
+
+            results['depths'] = results['depths'][valid_index]
+
+        return results
+
+    def _affine_bboxes(self, results: dict, matrix: np.ndarray) -> None:
+        """Affine transform bboxes to input image.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+            matrix (np.ndarray): Matrix transforming original
+                image to the network input image size.
+                shape: (3, 3)
+        """
+
+        bboxes = results['gt_bboxes']
+        bboxes[:, :2] = self._affine_transform(bboxes[:, :2], matrix)
+        bboxes[:, 2:] = self._affine_transform(bboxes[:, 2:], matrix)
+        if self.bbox_clip_border:
+            bboxes[:, [0, 2]] = bboxes[:, [0, 2]].clip(0,
+                                                       self.img_scale[0] - 1)
+            bboxes[:, [1, 3]] = bboxes[:, [1, 3]].clip(0,
+                                                       self.img_scale[1] - 1)
+        results['gt_bboxes'] = bboxes
+
+    def _affine_transform(self, points: np.ndarray,
+                          matrix: np.ndarray) -> np.ndarray:
+        """Affine transform bbox points to input image.
+
+        Args:
+            points (np.ndarray): Points to be transformed.
+                shape: (N, 2)
+            matrix (np.ndarray): Affine transform matrix.
+                shape: (3, 3)
+
+        Returns:
+            np.ndarray: Transformed points.
+        """
+        num_points = points.shape[0]
+        hom_points_2d = np.concatenate((points, np.ones((num_points, 1))),
+                                       axis=1)
+        hom_points_2d = hom_points_2d.T
+        affined_points = np.matmul(matrix, hom_points_2d).T
+        return affined_points[:, :2]
+
+    def _get_transform_matrix(self, center: Tuple, scale: Tuple,
+                              output_scale: Tuple[float]) -> np.ndarray:
+        """Get affine transform matrix.
+
+        Args:
+            center (tuple): Center of current image.
+            scale (tuple): Scale of current image.
+            output_scale (tuple[float]): The transform target image scales.
+
+        Returns:
+            np.ndarray: Affine transform matrix.
+        """
+        # TODO: further add rot and shift here.
+        src_w = scale[0]
+        dst_w = output_scale[0]
+        dst_h = output_scale[1]
+
+        src_dir = np.array([0, src_w * -0.5])
+        dst_dir = np.array([0, dst_w * -0.5])
+
+        src = np.zeros((3, 2), dtype=np.float32)
+        dst = np.zeros((3, 2), dtype=np.float32)
+        src[0, :] = center
+        src[1, :] = center + src_dir
+        dst[0, :] = np.array([dst_w * 0.5, dst_h * 0.5])
+        dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+
+        src[2, :] = self._get_ref_point(src[0, :], src[1, :])
+        dst[2, :] = self._get_ref_point(dst[0, :], dst[1, :])
+
+        get_matrix = cv2.getAffineTransform(src, dst)
+
+        matrix = np.concatenate((get_matrix, [[0., 0., 1.]]))
+
+        return matrix.astype(np.float32)
+
+    def _get_ref_point(self, ref_point1: np.ndarray,
+                       ref_point2: np.ndarray) -> np.ndarray:
+        """Get reference point to calculate affine transform matrix.
+
+        While using opencv to calculate the affine matrix, we need at least
+        three corresponding points separately on original image and target
+        image. Here we use two points to get the the third reference point.
+        """
+        d = ref_point1 - ref_point2
+        ref_point3 = ref_point2 + np.array([-d[1], d[0]])
+        return ref_point3
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(img_scale={self.img_scale}, '
+        repr_str += f'down_ratio={self.down_ratio}) '
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class RandomShiftScale(BaseTransform):
+    """Random shift scale.
+
+    Different from the normal shift and scale function, it doesn't
+    directly shift or scale image. It can record the shift and scale
+    infos into loading TRANSFORMS. It's designed to be used with
+    AffineResize together.
+
+    Args:
+        shift_scale (tuple[float]): Shift and scale range.
+        aug_prob (float): The shifting and scaling probability.
+    """
+
+    def __init__(self, shift_scale: Tuple[float], aug_prob: float) -> None:
+
+        self.shift_scale = shift_scale
+        self.aug_prob = aug_prob
+
+    def transform(self, results: dict) -> dict:
+        """Call function to record random shift and scale infos.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Results after random shift and scale, 'center', 'size'
+            and 'affine_aug' keys are added in the result dict.
+        """
+        img = results['img']
+
+        height, width = img.shape[:2]
+
+        center = np.array([width / 2, height / 2], dtype=np.float32)
+        size = np.array([width, height], dtype=np.float32)
+
+        if random.random() < self.aug_prob:
+            shift, scale = self.shift_scale[0], self.shift_scale[1]
+            shift_ranges = np.arange(-shift, shift + 0.1, 0.1)
+            center[0] += size[0] * random.choice(shift_ranges)
+            center[1] += size[1] * random.choice(shift_ranges)
+            scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1)
+            size *= random.choice(scale_ranges)
+            results['affine_aug'] = True
+        else:
+            results['affine_aug'] = False
+
+        results['center'] = center
+        results['size'] = size
+
+        return results
+
+    def __repr__(self) -> str:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(shift_scale={self.shift_scale}, '
+        repr_str += f'aug_prob={self.aug_prob}) '
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class Resize3D(Resize):
+
+    def _resize_3d(self, results: dict) -> None:
+        """Resize centers_2d and modify camera intrinisc with
+        ``results['scale']``."""
+        if 'centers_2d' in results:
+            results['centers_2d'] *= results['scale_factor'][:2]
+        results['cam2img'][0] *= np.array(results['scale_factor'][0])
+        results['cam2img'][1] *= np.array(results['scale_factor'][1])
+
+    def transform(self, results: dict) -> dict:
+        """Transform function to resize images, bounding boxes, semantic
+        segmentation map and keypoints.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map',
+            'gt_keypoints', 'scale', 'scale_factor', 'img_shape',
+            and 'keep_ratio' keys are updated in result dict.
+        """
+
+        super(Resize3D, self).transform(results)
+        self._resize_3d(results)
+        return results
+
+
+@TRANSFORMS.register_module()
+class RandomResize3D(RandomResize):
+    """The difference between RandomResize3D and RandomResize:
+
+    1. Compared to RandomResize, this class would further
+        check if scale is already set in results.
+    2. During resizing, this class would modify the centers_2d
+        and cam2img with ``results['scale']``.
+    """
+
+    def _resize_3d(self, results: dict) -> None:
+        """Resize centers_2d and modify camera intrinisc with
+        ``results['scale']``."""
+        if 'centers_2d' in results:
+            results['centers_2d'] *= results['scale_factor'][:2]
+        results['cam2img'][0] *= np.array(results['scale_factor'][0])
+        results['cam2img'][1] *= np.array(results['scale_factor'][1])
+
+    def transform(self, results: dict) -> dict:
+        """Transform function to resize images, bounding boxes, masks, semantic
+        segmentation map. Compared to RandomResize, this function would further
+        check if scale is already set in results.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor',
+            'keep_ratio' keys are added into result dict.
+        """
+        if 'scale' not in results:
+            results['scale'] = self._random_scale()
+        self.resize.scale = results['scale']
+        results = self.resize(results)
+        self._resize_3d(results)
+
+        return results
+
+
+@TRANSFORMS.register_module()
+class RandomCrop3D(RandomCrop):
+    """3D version of RandomCrop. RamdomCrop3D supports the modifications of
+    camera intrinsic matrix and using predefined randomness variable to do the
+    augmentation.
+
+    The absolute ``crop_size`` is sampled based on ``crop_type`` and
+    ``image_size``, then the cropped results are generated.
+
+    Required Keys:
+
+    - img
+    - gt_bboxes (np.float32) (optional)
+    - gt_bboxes_labels (np.int64) (optional)
+    - gt_masks (BitmapMasks | PolygonMasks) (optional)
+    - gt_ignore_flags (np.bool) (optional)
+    - gt_seg_map (np.uint8) (optional)
+
+    Modified Keys:
+
+    - img
+    - img_shape
+    - gt_bboxes (optional)
+    - gt_bboxes_labels (optional)
+    - gt_masks (optional)
+    - gt_ignore_flags (optional)
+    - gt_seg_map (optional)
+
+    Added Keys:
+
+    - homography_matrix
+
+    Args:
+        crop_size (tuple): The relative ratio or absolute pixels of
+            height and width.
+        crop_type (str): One of "relative_range", "relative",
+            "absolute", "absolute_range". "relative" randomly crops
+            (h * crop_size[0], w * crop_size[1]) part from an input of size
+            (h, w). "relative_range" uniformly samples relative crop size from
+            range [crop_size[0], 1] and [crop_size[1], 1] for height and width
+            respectively. "absolute" crops from an input with absolute size
+            (crop_size[0], crop_size[1]). "absolute_range" uniformly samples
+            crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w
+            in range [crop_size[0], min(w, crop_size[1])].
+            Defaults to "absolute".
+        allow_negative_crop (bool): Whether to allow a crop that does
+            not contain any bbox area. Defaults to False.
+        recompute_bbox (bool): Whether to re-compute the boxes based
+            on cropped instance masks. Defaults to False.
+        bbox_clip_border (bool): Whether clip the objects outside
+            the border of the image. Defaults to True.
+        rel_offset_h (tuple): The cropping interval of image height. Defaults
+            to (0., 1.).
+        rel_offset_w (tuple): The cropping interval of image width. Defaults
+            to (0., 1.).
+
+    Note:
+        - If the image is smaller than the absolute crop size, return the
+          original image.
+        - The keys for bboxes, labels and masks must be aligned. That is,
+          ``gt_bboxes`` corresponds to ``gt_labels`` and ``gt_masks``, and
+          ``gt_bboxes_ignore`` corresponds to ``gt_labels_ignore`` and
+          ``gt_masks_ignore``.
+        - If the crop does not contain any gt-bbox region and
+          ``allow_negative_crop`` is set to False, skip this image.
+    """
+
+    def __init__(
+        self,
+        crop_size: tuple,
+        crop_type: str = 'absolute',
+        allow_negative_crop: bool = False,
+        recompute_bbox: bool = False,
+        bbox_clip_border: bool = True,
+        rel_offset_h: tuple = (0., 1.),
+        rel_offset_w: tuple = (0., 1.)
+    ) -> None:
+        super().__init__(
+            crop_size=crop_size,
+            crop_type=crop_type,
+            allow_negative_crop=allow_negative_crop,
+            recompute_bbox=recompute_bbox,
+            bbox_clip_border=bbox_clip_border)
+        # rel_offset specifies the relative offset range of cropping origin
+        # [0., 1.] means starting from 0*margin to 1*margin + 1
+        self.rel_offset_h = rel_offset_h
+        self.rel_offset_w = rel_offset_w
+
+    def _crop_data(self,
+                   results: dict,
+                   crop_size: tuple,
+                   allow_negative_crop: bool = False) -> dict:
+        """Function to randomly crop images, bounding boxes, masks, semantic
+        segmentation maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+            crop_size (tuple): Expected absolute size after cropping, (h, w).
+            allow_negative_crop (bool): Whether to allow a crop that does not
+                contain any bbox area. Defaults to False.
+
+        Returns:
+            dict: Randomly cropped results, 'img_shape' key in result dict is
+            updated according to crop size.
+        """
+        assert crop_size[0] > 0 and crop_size[1] > 0
+        for key in results.get('img_fields', ['img']):
+            img = results[key]
+            if 'img_crop_offset' not in results:
+                margin_h = max(img.shape[0] - crop_size[0], 0)
+                margin_w = max(img.shape[1] - crop_size[1], 0)
+                # TOCHECK: a little different from LIGA implementation
+                offset_h = np.random.randint(
+                    self.rel_offset_h[0] * margin_h,
+                    self.rel_offset_h[1] * margin_h + 1)
+                offset_w = np.random.randint(
+                    self.rel_offset_w[0] * margin_w,
+                    self.rel_offset_w[1] * margin_w + 1)
+            else:
+                offset_w, offset_h = results['img_crop_offset']
+
+            crop_h = min(crop_size[0], img.shape[0])
+            crop_w = min(crop_size[1], img.shape[1])
+            crop_y1, crop_y2 = offset_h, offset_h + crop_h
+            crop_x1, crop_x2 = offset_w, offset_w + crop_w
+
+            # crop the image
+            img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
+            img_shape = img.shape
+            results[key] = img
+        results['img_shape'] = img_shape
+
+        # crop bboxes accordingly and clip to the image boundary
+        for key in results.get('bbox_fields', []):
+            # e.g. gt_bboxes and gt_bboxes_ignore
+            bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h],
+                                   dtype=np.float32)
+            bboxes = results[key] - bbox_offset
+            if self.bbox_clip_border:
+                bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1])
+                bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0])
+            valid_inds = (bboxes[:, 2] > bboxes[:, 0]) & (
+                bboxes[:, 3] > bboxes[:, 1])
+            # If the crop does not contain any gt-bbox area and
+            # allow_negative_crop is False, skip this image.
+            if (key == 'gt_bboxes' and not valid_inds.any()
+                    and not allow_negative_crop):
+                return None
+            results[key] = bboxes[valid_inds, :]
+            # label fields. e.g. gt_labels and gt_labels_ignore
+            label_key = self.bbox2label.get(key)
+            if label_key in results:
+                results[label_key] = results[label_key][valid_inds]
+
+            # mask fields, e.g. gt_masks and gt_masks_ignore
+            mask_key = self.bbox2mask.get(key)
+            if mask_key in results:
+                results[mask_key] = results[mask_key][
+                    valid_inds.nonzero()[0]].crop(
+                        np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]))
+                if self.recompute_bbox:
+                    results[key] = results[mask_key].get_bboxes()
+
+        # crop semantic seg
+        for key in results.get('seg_fields', []):
+            results[key] = results[key][crop_y1:crop_y2, crop_x1:crop_x2]
+
+        # manipulate camera intrinsic matrix
+        # needs to apply offset to K instead of P2 (on KITTI)
+        if isinstance(results['cam2img'], list):
+            # TODO ignore this, but should handle it in the future
+            pass
+        else:
+            K = results['cam2img'][:3, :3].copy()
+            inv_K = np.linalg.inv(K)
+            T = np.matmul(inv_K, results['cam2img'][:3])
+            K[0, 2] -= crop_x1
+            K[1, 2] -= crop_y1
+            offset_cam2img = np.matmul(K, T)
+            results['cam2img'][:offset_cam2img.shape[0], :offset_cam2img.
+                               shape[1]] = offset_cam2img
+
+        results['img_crop_offset'] = [offset_w, offset_h]
+
+        return results
+
+    def transform(self, results: dict) -> dict:
+        """Transform function to randomly crop images, bounding boxes, masks,
+        semantic segmentation maps.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Randomly cropped results, 'img_shape' key in result dict is
+            updated according to crop size.
+        """
+        image_size = results['img'].shape[:2]
+        if 'crop_size' not in results:
+            crop_size = self._get_crop_size(image_size)
+            results['crop_size'] = crop_size
+        else:
+            crop_size = results['crop_size']
+        results = self._crop_data(results, crop_size, self.allow_negative_crop)
+        return results
+
+    def __repr__(self) -> dict:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(crop_size={self.crop_size}, '
+        repr_str += f'crop_type={self.crop_type}, '
+        repr_str += f'allow_negative_crop={self.allow_negative_crop}, '
+        repr_str += f'bbox_clip_border={self.bbox_clip_border}), '
+        repr_str += f'rel_offset_h={self.rel_offset_h}), '
+        repr_str += f'rel_offset_w={self.rel_offset_w})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class PhotoMetricDistortion3D(PhotoMetricDistortion):
+    """Apply photometric distortion to image sequentially, every transformation
+    is applied with a probability of 0.5. The position of random contrast is in
+    second or second to last.
+
+    PhotoMetricDistortion3D further support using predefined randomness
+    variable to do the augmentation.
+
+    1. random brightness
+    2. random contrast (mode 0)
+    3. convert color from BGR to HSV
+    4. random saturation
+    5. random hue
+    6. convert color from HSV to BGR
+    7. random contrast (mode 1)
+    8. randomly swap channels
+
+    Required Keys:
+
+    - img (np.uint8)
+
+    Modified Keys:
+
+    - img (np.float32)
+
+    Args:
+        brightness_delta (int): delta of brightness.
+        contrast_range (sequence): range of contrast.
+        saturation_range (sequence): range of saturation.
+        hue_delta (int): delta of hue.
+    """
+
+    def transform(self, results: dict) -> dict:
+        """Transform function to perform photometric distortion on images.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: Result dict with images distorted.
+        """
+        assert 'img' in results, '`img` is not found in results'
+        img = results['img']
+        img = img.astype(np.float32)
+        if 'photometric_param' not in results:
+            photometric_param = self._random_flags()
+            results['photometric_param'] = photometric_param
+        else:
+            photometric_param = results['photometric_param']
+
+        (mode, brightness_flag, contrast_flag, saturation_flag, hue_flag,
+         swap_flag, delta_value, alpha_value, saturation_value, hue_value,
+         swap_value) = photometric_param
+
+        # random brightness
+        if brightness_flag:
+            img += delta_value
+
+        # mode == 0 --> do random contrast first
+        # mode == 1 --> do random contrast last
+        if mode == 1:
+            if contrast_flag:
+                img *= alpha_value
+
+        # convert color from BGR to HSV
+        img = mmcv.bgr2hsv(img)
+
+        # random saturation
+        if saturation_flag:
+            img[..., 1] *= saturation_value
+
+        # random hue
+        if hue_flag:
+            img[..., 0] += hue_value
+            img[..., 0][img[..., 0] > 360] -= 360
+            img[..., 0][img[..., 0] < 0] += 360
+
+        # convert color from HSV to BGR
+        img = mmcv.hsv2bgr(img)
+
+        # random contrast
+        if mode == 0:
+            if contrast_flag:
+                img *= alpha_value
+
+        # randomly swap channels
+        if swap_flag:
+            img = img[..., swap_value]
+
+        results['img'] = img
+        return results
+
+
+@TRANSFORMS.register_module()
+class MultiViewWrapper(BaseTransform):
+    """Wrap transformation from single-view into multi-view.
+
+    The wrapper processes the images from multi-view one by one. For each
+    image, it constructs a pseudo dict according to the keys specified by the
+    'process_fields' parameter. After the transformation is finished, desired
+    information can be collected by specifying the keys in the 'collected_keys'
+    parameter. Multi-view images share the same transformation parameters
+    but do not share the same magnitude when a random transformation is
+    conducted.
+
+    Args:
+        transforms (list[dict]): A list of dict specifying the transformations
+            for the monocular situation.
+        override_aug_config (bool): flag of whether to use the same aug config
+            for multiview image. Defaults to True.
+        process_fields (list): Desired keys that the transformations should
+            be conducted on. Defaults to ['img', 'cam2img', 'lidar2cam'].
+        collected_keys (list): Collect information in transformation
+            like rotate angles, crop roi, and flip state. Defaults to
+                ['scale', 'scale_factor', 'crop',
+                 'crop_offset', 'ori_shape',
+                 'pad_shape', 'img_shape',
+                 'pad_fixed_size', 'pad_size_divisor',
+                 'flip', 'flip_direction', 'rotate'].
+        randomness_keys (list): The keys that related to the randomness
+            in transformation. Defaults to
+                    ['scale', 'scale_factor', 'crop_size', 'flip',
+                     'flip_direction', 'photometric_param']
+    """
+
+    def __init__(
+        self,
+        transforms: dict,
+        override_aug_config: bool = True,
+        process_fields: list = ['img', 'cam2img', 'lidar2cam'],
+        collected_keys: list = [
+            'scale', 'scale_factor', 'crop', 'img_crop_offset', 'ori_shape',
+            'pad_shape', 'img_shape', 'pad_fixed_size', 'pad_size_divisor',
+            'flip', 'flip_direction', 'rotate'
+        ],
+        randomness_keys: list = [
+            'scale', 'scale_factor', 'crop_size', 'img_crop_offset', 'flip',
+            'flip_direction', 'photometric_param'
+        ]
+    ) -> None:
+        self.transforms = Compose(transforms)
+        self.override_aug_config = override_aug_config
+        self.collected_keys = collected_keys
+        self.process_fields = process_fields
+        self.randomness_keys = randomness_keys
+
+    def transform(self, input_dict: dict) -> dict:
+        """Transform function to do the transform for multiview image.
+
+        Args:
+            results (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: output dict after transformtaion
+        """
+        # store the augmentation related keys for each image.
+        for key in self.collected_keys:
+            if key not in input_dict or \
+                    not isinstance(input_dict[key], list):
+                input_dict[key] = []
+        prev_process_dict = {}
+        for img_id in range(len(input_dict['img'])):
+            process_dict = {}
+
+            # override the process dict (e.g. scale in random scale,
+            # crop_size in random crop, flip, flip_direction in
+            # random flip)
+            if img_id != 0 and self.override_aug_config:
+                for key in self.randomness_keys:
+                    if key in prev_process_dict:
+                        process_dict[key] = prev_process_dict[key]
+
+            for key in self.process_fields:
+                if key in input_dict:
+                    process_dict[key] = input_dict[key][img_id]
+            process_dict = self.transforms(process_dict)
+            # store the randomness variable in transformation.
+            prev_process_dict = process_dict
+
+            # store the related results to results_dict
+            for key in self.process_fields:
+                if key in process_dict:
+                    input_dict[key][img_id] = process_dict[key]
+            # update the keys
+            for key in self.collected_keys:
+                if key in process_dict:
+                    if len(input_dict[key]) == img_id + 1:
+                        input_dict[key][img_id] = process_dict[key]
+                    else:
+                        input_dict[key].append(process_dict[key])
+
+        for key in self.collected_keys:
+            if len(input_dict[key]) == 0:
+                input_dict.pop(key)
+        return input_dict
+
+
+@TRANSFORMS.register_module()
+class PolarMix(BaseTransform):
+    """PolarMix data augmentation.
+
+    Required Keys:
+
+    - points (:obj:`BasePoints`)
+    - pts_semantic_mask (np.int64)
+    - mix_results (List[dict])
+
+    Modified Keys:
+
+    - points (:obj:`BasePoints`)
+    - pts_semantic_mask (np.int64)
+
+    Args:
+        instance_classes (List[int]): Semantic masks which represent the
+            instance.
+        swap_ratio (float): Swap ratio of two point cloud. Defaults to 0.5.
+        rotate_paste_ratio (float): Rotate paste ratio. Defaults to 1.0.
+    """
+
+    def __init__(self,
+                 instance_classes: List[int],
+                 swap_ratio: float = 0.5,
+                 rotate_paste_ratio: float = 1.0) -> None:
+        assert is_list_of(instance_classes, int)
+        self.instance_classes = instance_classes
+        self.swap_ratio = swap_ratio
+        self.rotate_paste_ratio = rotate_paste_ratio
+
+    def get_indexes(self, dataset: BaseDataset) -> int:
+        """Call function to collect indexes.
+
+        Args:
+            dataset (:obj:`BaseDataset`): The dataset.
+
+        Returns:
+            int: Index.
+        """
+        index = random.randint(0, len(dataset))
+        return index
+
+    def transform(self, input_dict: dict) -> dict:
+        """PolarMix transform function.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: output dict after transformtaion
+        """
+
+        assert 'mix_results' in input_dict
+        assert len(input_dict['mix_results']) == 1, \
+            'MixUp only support 2 point cloud now!'
+
+        retrieve_results = input_dict['mix_results'][0]
+        retrieve_points = retrieve_results['points']
+        retrieve_pts_semantic_mask = retrieve_results['pts_semantic_mask']
+
+        points = input_dict['points']
+        pts_semantic_mask = input_dict['pts_semantic_mask']
+
+        # 1. swap point cloud
+        if np.random.random() < self.swap_ratio:
+            start_angle = (np.random.random() - 1) * np.pi  # -pi~pi
+            end_angle = start_angle + np.pi
+            # calculate horizontal angle for each point
+            yaw = torch.atan2(points.coord[:, 1], points.coord[:, 0])
+            retrieve_yaw = torch.atan2(retrieve_points.coord[:, 1],
+                                       retrieve_points.coord[:, 0])
+
+            # select points in sector
+            idx = (yaw <= start_angle) | (yaw >= end_angle)
+            retrieve_idx = (retrieve_yaw > start_angle) & (
+                retrieve_yaw < end_angle)
+
+            # swap
+            points = points[idx]
+            points = points.cat([points, retrieve_points[retrieve_idx]])
+            pts_semantic_mask = np.concatenate(
+                (pts_semantic_mask[idx.numpy()],
+                 retrieve_pts_semantic_mask[retrieve_idx.numpy()]),
+                axis=0)
+
+        # 2. rotate-pasting
+        if np.random.random() < self.rotate_paste_ratio:
+            # extract instance points
+            instance_points, instance_pts_semantic_mask = [], []
+            for instance_class in self.instance_classes:
+                retrieve_idx = retrieve_pts_semantic_mask == instance_class
+                instance_points.append(retrieve_points[retrieve_idx])
+                instance_pts_semantic_mask.append(
+                    retrieve_pts_semantic_mask[retrieve_idx])
+            instance_points = retrieve_points.cat(instance_points)
+            instance_pts_semantic_mask = np.concatenate(
+                instance_pts_semantic_mask, axis=0)
+
+            # rotate-copy
+            copy_points = [instance_points]
+            copy_pts_semantic_mask = [instance_pts_semantic_mask]
+            angle_list = [
+                np.random.random() * np.pi * 2 / 3,
+                (np.random.random() + 1) * np.pi * 2 / 3
+            ]
+            for angle in angle_list:
+                new_points = instance_points.clone()
+                new_points.rotate(angle)
+                copy_points.append(new_points)
+                copy_pts_semantic_mask.append(instance_pts_semantic_mask)
+            copy_points = instance_points.cat(copy_points)
+            copy_pts_semantic_mask = np.concatenate(
+                copy_pts_semantic_mask, axis=0)
+
+            points = points.cat([points, copy_points])
+            pts_semantic_mask = np.concatenate(
+                (pts_semantic_mask, copy_pts_semantic_mask), axis=0)
+
+        input_dict['points'] = points
+        input_dict['pts_semantic_mask'] = pts_semantic_mask
+        return input_dict
+
+    def __repr__(self) -> dict:
+        """str: Return a string that describes the module."""
+        repr_str = self.__class__.__name__
+        repr_str += f'(instance_classes={self.instance_classes}, '
+        repr_str += f'swap_ratio={self.swap_ratio})'
+        return repr_str

From d74d163af363f99d9d562a0b5c283e493f4fb6e1 Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Mon, 13 Feb 2023 11:08:54 +0800
Subject: [PATCH 07/22] Update test_transforms_3d.py

---
 tests/test_datasets/test_transforms/test_transforms_3d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_datasets/test_transforms/test_transforms_3d.py b/tests/test_datasets/test_transforms/test_transforms_3d.py
index 175451c3ee..59fda6f834 100644
--- a/tests/test_datasets/test_transforms/test_transforms_3d.py
+++ b/tests/test_datasets/test_transforms/test_transforms_3d.py
@@ -119,7 +119,7 @@ def test_transform(self):
         with self.assertRaises(AssertionError):
             transform = PolarMix(instance_classes=[1.0, 2.0])
 
-        transform = PolarMix(instance_classes=[1, 2])
+        transform = PolarMix(instance_classes=[1, 2], swap_ratio=1.0)
         # test assertion for invalid mix_results
         with self.assertRaises(AssertionError):
             results = transform(copy.deepcopy(self.results))

From 4eb8d54e402bbad5a788af685b485a647a182efd Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Mon, 13 Feb 2023 11:18:27 +0800
Subject: [PATCH 08/22] update docs

---
 mmdet3d/datasets/transforms/transforms_3d.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index a729f5739c..e38fb87b7c 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2435,8 +2435,7 @@ def transform(self, input_dict: dict) -> dict:
                 retrieve_yaw < end_angle)
 
             # swap
-            points = points[idx]
-            points = points.cat([points, retrieve_points[retrieve_idx]])
+            points = points.cat([points[idx], retrieve_points[retrieve_idx]])
             pts_semantic_mask = np.concatenate(
                 (pts_semantic_mask[idx.numpy()],
                  retrieve_pts_semantic_mask[retrieve_idx.numpy()]),
@@ -2483,5 +2482,6 @@ def __repr__(self) -> dict:
         """str: Return a string that describes the module."""
         repr_str = self.__class__.__name__
         repr_str += f'(instance_classes={self.instance_classes}, '
-        repr_str += f'swap_ratio={self.swap_ratio})'
+        repr_str += f'swap_ratio={self.swap_ratio}, '
+        repr_str += f'rotate_paste_ratio={self.rotate_paste_ratio})'
         return repr_str

From 34ad2a9d2838cb7070f3dcb59e2eb2b8099923ca Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Wed, 15 Feb 2023 12:55:43 +0000
Subject: [PATCH 09/22] update polarmix without MultiImageMixDataset

---
 mmdet3d/datasets/seg3d_dataset.py             | 18 ++++-
 mmdet3d/datasets/transforms/transforms_3d.py  | 62 +++++++++++++--
 .../test_transforms/test_transforms_3d.py     | 79 +++++++++++++++----
 3 files changed, 136 insertions(+), 23 deletions(-)

diff --git a/mmdet3d/datasets/seg3d_dataset.py b/mmdet3d/datasets/seg3d_dataset.py
index 9a268b7359..2f13f70bc5 100644
--- a/mmdet3d/datasets/seg3d_dataset.py
+++ b/mmdet3d/datasets/seg3d_dataset.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from os import path as osp
-from typing import Callable, List, Optional, Sequence, Union
+from typing import Any, Callable, List, Optional, Sequence, Union
 
 import mmengine
 import numpy as np
@@ -267,6 +267,22 @@ def parse_data_info(self, info: dict) -> dict:
 
         return info
 
+    def prepare_data(self, idx) -> Any:
+        """Get data processed by ``self.pipeline``.
+
+        Args:
+            idx (int): The index of ``data_info``.
+
+        Returns:
+            Any: Depends on ``self.pipeline``.
+        """
+        if self.test_mode is False:
+            data_info = self.get_data_info(idx)
+            data_info['dataset'] = self
+            return self.pipeline(data_info)
+        else:
+            return super().prepare_data(idx)
+
     def get_scene_idxs(self, scene_idxs: Union[None, str,
                                                np.ndarray]) -> np.ndarray:
         """Compute scene_idxs for data sampling.
diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index e38fb87b7c..457feee8c5 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -1,7 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import copy
 import random
 import warnings
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Sequence, Tuple, Union
 
 import cv2
 import mmcv
@@ -2376,17 +2377,25 @@ class PolarMix(BaseTransform):
             instance.
         swap_ratio (float): Swap ratio of two point cloud. Defaults to 0.5.
         rotate_paste_ratio (float): Rotate paste ratio. Defaults to 1.0.
+        pre_transform (Sequence[dict]): Sequence of transform object or config
+            dict to be composed.
     """
 
     def __init__(self,
                  instance_classes: List[int],
                  swap_ratio: float = 0.5,
-                 rotate_paste_ratio: float = 1.0) -> None:
+                 rotate_paste_ratio: float = 1.0,
+                 pre_transform: Optional[Sequence[dict]] = None) -> None:
         assert is_list_of(instance_classes, int)
         self.instance_classes = instance_classes
         self.swap_ratio = swap_ratio
         self.rotate_paste_ratio = rotate_paste_ratio
 
+        if pre_transform is None:
+            self.pre_transform = None
+        else:
+            self.pre_transform = Compose(pre_transform)
+
     def get_indexes(self, dataset: BaseDataset) -> int:
         """Call function to collect indexes.
 
@@ -2396,17 +2405,17 @@ def get_indexes(self, dataset: BaseDataset) -> int:
         Returns:
             int: Index.
         """
-        index = random.randint(0, len(dataset))
+        index = np.random.randint(0, len(dataset))
         return index
 
-    def transform(self, input_dict: dict) -> dict:
+    def polar_mix_transform(self, input_dict: dict) -> dict:
         """PolarMix transform function.
 
         Args:
             input_dict (dict): Result dict from loading pipeline.
 
         Returns:
-            dict: output dict after transformtaion
+            dict: output dict after transformtaion.
         """
 
         assert 'mix_results' in input_dict
@@ -2422,7 +2431,7 @@ def transform(self, input_dict: dict) -> dict:
 
         # 1. swap point cloud
         if np.random.random() < self.swap_ratio:
-            start_angle = (np.random.random() - 1) * np.pi  # -pi~pi
+            start_angle = (np.random.random() - 1) * np.pi  # -pi~0
             end_angle = start_angle + np.pi
             # calculate horizontal angle for each point
             yaw = torch.atan2(points.coord[:, 1], points.coord[:, 0])
@@ -2478,10 +2487,49 @@ def transform(self, input_dict: dict) -> dict:
         input_dict['pts_semantic_mask'] = pts_semantic_mask
         return input_dict
 
+    def transform(self, input_dict: dict) -> dict:
+        """PolarMix transform function.
+
+        Args:
+            input_dict (dict): Result dict from loading pipeline.
+
+        Returns:
+            dict: output dict after transformtaion.
+        """
+
+        assert 'dataset' in input_dict
+        dataset = input_dict.pop('dataset', None)
+
+        # get index of other images
+        index = self.get_indexes(dataset)
+
+        mix_results = [copy.deepcopy(dataset.get_data_info(index))]
+
+        if self.pre_transform is not None:
+            for i, data in enumerate(mix_results):
+                # pre_transform may also require dataset
+                data.update({'dataset': dataset})
+                # before polarmix need to go through
+                # the necessary pre_transform
+                _results = self.pre_transform(data)
+                _results.pop('dataset')
+                mix_results[i] = _results
+
+        input_dict['mix_results'] = mix_results
+
+        input_dict = self.polar_mix_transform(input_dict)
+
+        if 'mix_results' in input_dict:
+            input_dict.pop('mix_results')
+        input_dict['dataset'] = dataset
+
+        return input_dict
+
     def __repr__(self) -> dict:
         """str: Return a string that describes the module."""
         repr_str = self.__class__.__name__
         repr_str += f'(instance_classes={self.instance_classes}, '
         repr_str += f'swap_ratio={self.swap_ratio}, '
-        repr_str += f'rotate_paste_ratio={self.rotate_paste_ratio})'
+        repr_str += f'rotate_paste_ratio={self.rotate_paste_ratio}, '
+        repr_str += f'pre_transform={self.pre_transform})'
         return repr_str
diff --git a/tests/test_datasets/test_transforms/test_transforms_3d.py b/tests/test_datasets/test_transforms/test_transforms_3d.py
index 59fda6f834..91df312646 100644
--- a/tests/test_datasets/test_transforms/test_transforms_3d.py
+++ b/tests/test_datasets/test_transforms/test_transforms_3d.py
@@ -6,10 +6,14 @@
 import torch
 from mmengine.testing import assert_allclose
 
-from mmdet3d.datasets import GlobalAlignment, RandomFlip3D
+from mmdet3d.datasets import (GlobalAlignment, RandomFlip3D,
+                              SemanticKITTIDataset)
 from mmdet3d.datasets.transforms import GlobalRotScaleTrans, PolarMix
-from mmdet3d.structures import BasePoints
+from mmdet3d.structures import LiDARPoints
 from mmdet3d.testing import create_data_info_after_loading
+from mmdet3d.utils import register_all_modules
+
+register_all_modules()
 
 
 class TestGlobalRotScaleTrans(unittest.TestCase):
@@ -105,10 +109,61 @@ def test_global_alignment(self):
 class TestPolarMix(unittest.TestCase):
 
     def setUp(self):
+        self.pre_transform = [
+            dict(
+                type='LoadPointsFromFile',
+                coord_type='LIDAR',
+                shift_height=True,
+                load_dim=4,
+                use_dim=[0, 1, 2]),
+            dict(
+                type='LoadAnnotations3D',
+                with_bbox_3d=False,
+                with_label_3d=False,
+                with_mask_3d=False,
+                with_seg_3d=True,
+                seg_3d_dtype=np.int32),
+        ]
+        classes = ('unlabeled', 'car', 'bicycle', 'motorcycle', 'truck', 'bus',
+                   'person', 'bicyclist', 'motorcyclist', 'road', 'parking',
+                   'sidewalk', 'other-ground', 'building', 'fence',
+                   'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign')
+        palette = [
+            [174, 199, 232],
+            [152, 223, 138],
+            [31, 119, 180],
+            [255, 187, 120],
+            [188, 189, 34],
+            [140, 86, 75],
+            [255, 152, 150],
+            [214, 39, 40],
+            [197, 176, 213],
+            [148, 103, 189],
+            [196, 156, 148],
+            [23, 190, 207],
+            [247, 182, 210],
+            [219, 219, 141],
+            [255, 127, 14],
+            [158, 218, 229],
+            [44, 160, 44],
+            [112, 128, 144],
+            [227, 119, 194],
+            [82, 84, 163],
+        ]
+        self.dataset = SemanticKITTIDataset(
+            './tests/data/semantickitti/',
+            'semantickitti_infos.pkl',
+            metainfo=dict(classes=classes, palette=palette),
+            data_prefix=dict(
+                pts='sequences/00/velodyne',
+                pts_semantic_mask='sequences/00/labels'),
+            pipeline=[],
+            modality=dict(use_lidar=True, use_camera=False))
         points = np.random.random((100, 4))
         self.results = {
-            'points': BasePoints(points, points_dim=4),
-            'pts_semantic_mask': np.random.randint(0, 5, (100, ))
+            'points': LiDARPoints(points, points_dim=4),
+            'pts_semantic_mask': np.random.randint(0, 20, (100, )),
+            'dataset': self.dataset
         }
 
     def test_transform(self):
@@ -119,16 +174,10 @@ def test_transform(self):
         with self.assertRaises(AssertionError):
             transform = PolarMix(instance_classes=[1.0, 2.0])
 
-        transform = PolarMix(instance_classes=[1, 2], swap_ratio=1.0)
-        # test assertion for invalid mix_results
-        with self.assertRaises(AssertionError):
-            results = transform(copy.deepcopy(self.results))
-
-        with self.assertRaises(AssertionError):
-            self.results['mix_results'] = [copy.deepcopy(self.results)] * 2
-            results = transform(copy.deepcopy(self.results))
-
-        self.results['mix_results'] = [copy.deepcopy(self.results)]
-        results = transform(copy.deepcopy(self.results))
+        transform = PolarMix(
+            instance_classes=[1, 2],
+            swap_ratio=1.0,
+            pre_transform=self.pre_transform)
+        results = transform.transform(copy.deepcopy(self.results))
         self.assertTrue(results['points'].shape[0] ==
                         results['pts_semantic_mask'].shape[0])

From b50fc17b0ae78a6317c7762bc08c10f8c218be17 Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Thu, 16 Feb 2023 12:04:57 +0000
Subject: [PATCH 10/22] add comments

---
 mmdet3d/datasets/seg3d_dataset.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mmdet3d/datasets/seg3d_dataset.py b/mmdet3d/datasets/seg3d_dataset.py
index 2f13f70bc5..dffd4ed6f4 100644
--- a/mmdet3d/datasets/seg3d_dataset.py
+++ b/mmdet3d/datasets/seg3d_dataset.py
@@ -278,6 +278,8 @@ def prepare_data(self, idx) -> Any:
         """
         if self.test_mode is False:
             data_info = self.get_data_info(idx)
+            # Pass the dataset to the pipeline during training to support mixed
+            # data augmentation, such as polarmix.
             data_info['dataset'] = self
             return self.pipeline(data_info)
         else:

From cadc5644495519c4125d7a20fef828007346f0fc Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Fri, 17 Feb 2023 01:36:42 +0000
Subject: [PATCH 11/22] fix UT

---
 .../test_transforms/test_transforms_3d.py     | 44 ++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/tests/test_datasets/test_transforms/test_transforms_3d.py b/tests/test_datasets/test_transforms/test_transforms_3d.py
index 91df312646..8eeac78890 100644
--- a/tests/test_datasets/test_transforms/test_transforms_3d.py
+++ b/tests/test_datasets/test_transforms/test_transforms_3d.py
@@ -123,6 +123,7 @@ def setUp(self):
                 with_mask_3d=False,
                 with_seg_3d=True,
                 seg_3d_dtype=np.int32),
+            dict(type='PointSegClassMapping'),
         ]
         classes = ('unlabeled', 'car', 'bicycle', 'motorcycle', 'truck', 'bus',
                    'person', 'bicyclist', 'motorcyclist', 'road', 'parking',
@@ -150,10 +151,51 @@ def setUp(self):
             [227, 119, 194],
             [82, 84, 163],
         ]
+        seg_label_mapping = {
+            0: 0,  # "unlabeled"
+            1: 0,  # "outlier" mapped to "unlabeled" --------------mapped
+            10: 1,  # "car"
+            11: 2,  # "bicycle"
+            13: 5,  # "bus" mapped to "other-vehicle" --------------mapped
+            15: 3,  # "motorcycle"
+            16: 5,  # "on-rails" mapped to "other-vehicle" ---------mapped
+            18: 4,  # "truck"
+            20: 5,  # "other-vehicle"
+            30: 6,  # "person"
+            31: 7,  # "bicyclist"
+            32: 8,  # "motorcyclist"
+            40: 9,  # "road"
+            44: 10,  # "parking"
+            48: 11,  # "sidewalk"
+            49: 12,  # "other-ground"
+            50: 13,  # "building"
+            51: 14,  # "fence"
+            52: 0,  # "other-structure" mapped to "unlabeled" ------mapped
+            60: 9,  # "lane-marking" to "road" ---------------------mapped
+            70: 15,  # "vegetation"
+            71: 16,  # "trunk"
+            72: 17,  # "terrain"
+            80: 18,  # "pole"
+            81: 19,  # "traffic-sign"
+            99: 0,  # "other-object" to "unlabeled" ----------------mapped
+            252: 1,  # "moving-car" to "car" ------------------------mapped
+            253: 7,  # "moving-bicyclist" to "bicyclist" ------------mapped
+            254: 6,  # "moving-person" to "person" ------------------mapped
+            255: 8,  # "moving-motorcyclist" to "motorcyclist" ------mapped
+            256: 5,  # "moving-on-rails" mapped to "other-vehic------mapped
+            257: 5,  # "moving-bus" mapped to "other-vehicle" -------mapped
+            258: 4,  # "moving-truck" to "truck" --------------------mapped
+            259: 5  # "moving-other"-vehicle to "other-vehicle"-----mapped
+        }
+        max_label = 259
         self.dataset = SemanticKITTIDataset(
             './tests/data/semantickitti/',
             'semantickitti_infos.pkl',
-            metainfo=dict(classes=classes, palette=palette),
+            metainfo=dict(
+                classes=classes,
+                palette=palette,
+                seg_label_mapping=seg_label_mapping,
+                max_label=max_label),
             data_prefix=dict(
                 pts='sequences/00/velodyne',
                 pts_semantic_mask='sequences/00/labels'),

From 07cdbc1bca8cfbd423226b139089c7e34646dd42 Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Mon, 20 Feb 2023 06:25:18 +0000
Subject: [PATCH 12/22] update docstring

---
 mmdet3d/datasets/seg3d_dataset.py            | 2 +-
 mmdet3d/datasets/transforms/transforms_3d.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/mmdet3d/datasets/seg3d_dataset.py b/mmdet3d/datasets/seg3d_dataset.py
index 9582beeb3e..874cff1fee 100644
--- a/mmdet3d/datasets/seg3d_dataset.py
+++ b/mmdet3d/datasets/seg3d_dataset.py
@@ -292,7 +292,7 @@ def prepare_data(self, idx) -> Any:
         Returns:
             Any: Depends on ``self.pipeline``.
         """
-        if self.test_mode is False:
+        if not self.test_mode:
             data_info = self.get_data_info(idx)
             # Pass the dataset to the pipeline during training to support mixed
             # data augmentation, such as polarmix.
diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 457feee8c5..9f90bb1bea 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2361,6 +2361,14 @@ def transform(self, input_dict: dict) -> dict:
 class PolarMix(BaseTransform):
     """PolarMix data augmentation.
 
+    The polarmix transform steps are as follows:
+
+        1. Another random point cloud is picked by dataset.
+        2. Exchange sectors of two point clouds that are cut with certain
+           azimuth angles.
+        3. Cut point instances from picked point cloud, rotate them by multiple
+           azimuth angles, and paste the cut and rotated instances.
+
     Required Keys:
 
     - points (:obj:`BasePoints`)

From 31382cd99bbe39949326246524942ebacb6b86fa Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Mon, 20 Feb 2023 07:56:55 +0000
Subject: [PATCH 13/22] fix yaw calculation

---
 mmdet3d/datasets/transforms/transforms_3d.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 9f90bb1bea..febd2cb9f7 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2442,9 +2442,9 @@ def polar_mix_transform(self, input_dict: dict) -> dict:
             start_angle = (np.random.random() - 1) * np.pi  # -pi~0
             end_angle = start_angle + np.pi
             # calculate horizontal angle for each point
-            yaw = torch.atan2(points.coord[:, 1], points.coord[:, 0])
-            retrieve_yaw = torch.atan2(retrieve_points.coord[:, 1],
-                                       retrieve_points.coord[:, 0])
+            yaw = -torch.atan2(points.coord[:, 1], points.coord[:, 0])
+            retrieve_yaw = -torch.atan2(retrieve_points.coord[:, 1],
+                                        retrieve_points.coord[:, 0])
 
             # select points in sector
             idx = (yaw <= start_angle) | (yaw >= end_angle)

From f5d9f32155618dc4e359aac356c93bfb609ac8af Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Mon, 20 Feb 2023 08:27:14 +0000
Subject: [PATCH 14/22] fix UT

---
 tests/test_datasets/test_transforms/test_transforms_3d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_datasets/test_transforms/test_transforms_3d.py b/tests/test_datasets/test_transforms/test_transforms_3d.py
index 8eeac78890..8b3380169d 100644
--- a/tests/test_datasets/test_transforms/test_transforms_3d.py
+++ b/tests/test_datasets/test_transforms/test_transforms_3d.py
@@ -122,7 +122,7 @@ def setUp(self):
                 with_label_3d=False,
                 with_mask_3d=False,
                 with_seg_3d=True,
-                seg_3d_dtype=np.int32),
+                seg_3d_dtype='np.int32'),
             dict(type='PointSegClassMapping'),
         ]
         classes = ('unlabeled', 'car', 'bicycle', 'motorcycle', 'truck', 'bus',

From d3579109ad399dc918bdd4e91ac64bf5e607c2c9 Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Tue, 21 Feb 2023 02:34:08 +0000
Subject: [PATCH 15/22] refactor

---
 mmdet3d/datasets/transforms/transforms_3d.py | 77 +++++++-------------
 1 file changed, 25 insertions(+), 52 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index febd2cb9f7..0314ea8556 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -12,7 +12,6 @@
 from mmdet.datasets.transforms import (PhotoMetricDistortion, RandomCrop,
                                        RandomFlip)
 from mmengine import is_list_of, is_tuple_of
-from mmengine.dataset import BaseDataset
 
 from mmdet3d.models.task_modules import VoxelGenerator
 from mmdet3d.registry import TRANSFORMS
@@ -2373,7 +2372,7 @@ class PolarMix(BaseTransform):
 
     - points (:obj:`BasePoints`)
     - pts_semantic_mask (np.int64)
-    - mix_results (List[dict])
+    - dataset (:obj:`BaseDataset`)
 
     Modified Keys:
 
@@ -2404,35 +2403,18 @@ def __init__(self,
         else:
             self.pre_transform = Compose(pre_transform)
 
-    def get_indexes(self, dataset: BaseDataset) -> int:
-        """Call function to collect indexes.
-
-        Args:
-            dataset (:obj:`BaseDataset`): The dataset.
-
-        Returns:
-            int: Index.
-        """
-        index = np.random.randint(0, len(dataset))
-        return index
-
-    def polar_mix_transform(self, input_dict: dict) -> dict:
+    def polar_mix_transform(self, input_dict: dict, mix_results: dict) -> dict:
         """PolarMix transform function.
 
         Args:
             input_dict (dict): Result dict from loading pipeline.
+            mix_results (dict): Mixed dict picked from dataset.
 
         Returns:
             dict: output dict after transformtaion.
         """
-
-        assert 'mix_results' in input_dict
-        assert len(input_dict['mix_results']) == 1, \
-            'MixUp only support 2 point cloud now!'
-
-        retrieve_results = input_dict['mix_results'][0]
-        retrieve_points = retrieve_results['points']
-        retrieve_pts_semantic_mask = retrieve_results['pts_semantic_mask']
+        mix_points = mix_results['points']
+        mix_pts_semantic_mask = mix_results['pts_semantic_mask']
 
         points = input_dict['points']
         pts_semantic_mask = input_dict['pts_semantic_mask']
@@ -2443,19 +2425,18 @@ def polar_mix_transform(self, input_dict: dict) -> dict:
             end_angle = start_angle + np.pi
             # calculate horizontal angle for each point
             yaw = -torch.atan2(points.coord[:, 1], points.coord[:, 0])
-            retrieve_yaw = -torch.atan2(retrieve_points.coord[:, 1],
-                                        retrieve_points.coord[:, 0])
+            mix_yaw = -torch.atan2(mix_points.coord[:, 1], mix_points.coord[:,
+                                                                            0])
 
             # select points in sector
             idx = (yaw <= start_angle) | (yaw >= end_angle)
-            retrieve_idx = (retrieve_yaw > start_angle) & (
-                retrieve_yaw < end_angle)
+            mix_idx = (mix_yaw > start_angle) & (mix_yaw < end_angle)
 
             # swap
-            points = points.cat([points[idx], retrieve_points[retrieve_idx]])
+            points = points.cat([points[idx], mix_points[mix_idx]])
             pts_semantic_mask = np.concatenate(
                 (pts_semantic_mask[idx.numpy()],
-                 retrieve_pts_semantic_mask[retrieve_idx.numpy()]),
+                 mix_pts_semantic_mask[mix_idx.numpy()]),
                 axis=0)
 
         # 2. rotate-pasting
@@ -2463,11 +2444,11 @@ def polar_mix_transform(self, input_dict: dict) -> dict:
             # extract instance points
             instance_points, instance_pts_semantic_mask = [], []
             for instance_class in self.instance_classes:
-                retrieve_idx = retrieve_pts_semantic_mask == instance_class
-                instance_points.append(retrieve_points[retrieve_idx])
+                mix_idx = mix_pts_semantic_mask == instance_class
+                instance_points.append(mix_points[mix_idx])
                 instance_pts_semantic_mask.append(
-                    retrieve_pts_semantic_mask[retrieve_idx])
-            instance_points = retrieve_points.cat(instance_points)
+                    mix_pts_semantic_mask[mix_idx])
+            instance_points = mix_points.cat(instance_points)
             instance_pts_semantic_mask = np.concatenate(
                 instance_pts_semantic_mask, axis=0)
 
@@ -2506,30 +2487,22 @@ def transform(self, input_dict: dict) -> dict:
         """
 
         assert 'dataset' in input_dict
-        dataset = input_dict.pop('dataset', None)
+        dataset = input_dict['dataset']
 
         # get index of other images
-        index = self.get_indexes(dataset)
+        index = np.random.randint(0, len(dataset))
 
-        mix_results = [copy.deepcopy(dataset.get_data_info(index))]
+        mix_results = copy.deepcopy(dataset.get_data_info(index))
 
         if self.pre_transform is not None:
-            for i, data in enumerate(mix_results):
-                # pre_transform may also require dataset
-                data.update({'dataset': dataset})
-                # before polarmix need to go through
-                # the necessary pre_transform
-                _results = self.pre_transform(data)
-                _results.pop('dataset')
-                mix_results[i] = _results
-
-        input_dict['mix_results'] = mix_results
-
-        input_dict = self.polar_mix_transform(input_dict)
-
-        if 'mix_results' in input_dict:
-            input_dict.pop('mix_results')
-        input_dict['dataset'] = dataset
+            # pre_transform may also require dataset
+            mix_results.update({'dataset': dataset})
+            # before polarmix need to go through
+            # the necessary pre_transform
+            mix_results = self.pre_transform(mix_results)
+            mix_results.pop('dataset')
+
+        input_dict = self.polar_mix_transform(input_dict, mix_results)
 
         return input_dict
 

From 51c5b8593557c6c8d35c1121a87481f2e555ac7f Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Tue, 21 Feb 2023 06:12:23 +0000
Subject: [PATCH 16/22] update

---
 mmdet3d/datasets/seg3d_dataset.py            | 6 +++---
 mmdet3d/datasets/transforms/transforms_3d.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/mmdet3d/datasets/seg3d_dataset.py b/mmdet3d/datasets/seg3d_dataset.py
index 874cff1fee..42025dee49 100644
--- a/mmdet3d/datasets/seg3d_dataset.py
+++ b/mmdet3d/datasets/seg3d_dataset.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from os import path as osp
-from typing import Any, Callable, List, Optional, Sequence, Union
+from typing import Callable, List, Optional, Sequence, Union
 
 import mmengine
 import numpy as np
@@ -283,14 +283,14 @@ def parse_data_info(self, info: dict) -> dict:
 
         return info
 
-    def prepare_data(self, idx) -> Any:
+    def prepare_data(self, idx: int) -> dict:
         """Get data processed by ``self.pipeline``.
 
         Args:
             idx (int): The index of ``data_info``.
 
         Returns:
-            Any: Depends on ``self.pipeline``.
+            dict: Results passed through ``self.pipeline``.
         """
         if not self.test_mode:
             data_info = self.get_data_info(idx)
diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 0314ea8556..44a07a6f3c 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import copy
 import random
 import warnings
 from typing import List, Optional, Sequence, Tuple, Union
@@ -2486,13 +2485,14 @@ def transform(self, input_dict: dict) -> dict:
             dict: output dict after transformtaion.
         """
 
-        assert 'dataset' in input_dict
+        assert 'dataset' in input_dict, \
+            '`dataset` is needed to pass through PolarMix, while not found.'
         dataset = input_dict['dataset']
 
         # get index of other images
         index = np.random.randint(0, len(dataset))
 
-        mix_results = copy.deepcopy(dataset.get_data_info(index))
+        mix_results = dataset.get_data_info(index)
 
         if self.pre_transform is not None:
             # pre_transform may also require dataset

From 9e5ee9f426d623926ef0243bd1efe373318e11ca Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Tue, 21 Feb 2023 06:42:29 +0000
Subject: [PATCH 17/22] update docs

---
 mmdet3d/datasets/transforms/transforms_3d.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 44a07a6f3c..8d5de5fe27 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2383,8 +2383,8 @@ class PolarMix(BaseTransform):
             instance.
         swap_ratio (float): Swap ratio of two point cloud. Defaults to 0.5.
         rotate_paste_ratio (float): Rotate paste ratio. Defaults to 1.0.
-        pre_transform (Sequence[dict]): Sequence of transform object or config
-            dict to be composed.
+        pre_transform (Sequence[dict], optional): Sequence of transform object
+            or config dict to be composed. Defaults to None.
     """
 
     def __init__(self,
@@ -2392,7 +2392,8 @@ def __init__(self,
                  swap_ratio: float = 0.5,
                  rotate_paste_ratio: float = 1.0,
                  pre_transform: Optional[Sequence[dict]] = None) -> None:
-        assert is_list_of(instance_classes, int)
+        assert is_list_of(instance_classes, int), \
+            'instance_classes should be a list of int'
         self.instance_classes = instance_classes
         self.swap_ratio = swap_ratio
         self.rotate_paste_ratio = rotate_paste_ratio

From f985fd6edbec8c01839dadfe81c84f18da524900 Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Tue, 21 Feb 2023 14:55:53 +0800
Subject: [PATCH 18/22] fix typo

---
 mmdet3d/datasets/transforms/transforms_3d.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 8d5de5fe27..354a7999fa 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2411,7 +2411,7 @@ def polar_mix_transform(self, input_dict: dict, mix_results: dict) -> dict:
             mix_results (dict): Mixed dict picked from dataset.
 
         Returns:
-            dict: output dict after transformtaion.
+            dict: output dict after transformation.
         """
         mix_points = mix_results['points']
         mix_pts_semantic_mask = mix_results['pts_semantic_mask']
@@ -2483,7 +2483,7 @@ def transform(self, input_dict: dict) -> dict:
             input_dict (dict): Result dict from loading pipeline.
 
         Returns:
-            dict: output dict after transformtaion.
+            dict: output dict after transformation.
         """
 
         assert 'dataset' in input_dict, \

From 388e74c75f4646fbbc7b5584d0066681cc1866f7 Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Tue, 21 Feb 2023 15:26:36 +0800
Subject: [PATCH 19/22] Update transforms_3d.py

---
 mmdet3d/datasets/transforms/transforms_3d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 354a7999fa..ac3667d74d 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2490,7 +2490,7 @@ def transform(self, input_dict: dict) -> dict:
             '`dataset` is needed to pass through PolarMix, while not found.'
         dataset = input_dict['dataset']
 
-        # get index of other images
+        # get index of other point cloud
         index = np.random.randint(0, len(dataset))
 
         mix_results = dataset.get_data_info(index)

From 249ef53d1fb0f8390704e64b1b0b4cfbff5b882d Mon Sep 17 00:00:00 2001
From: Xiang Xu <xuxiang0103@gmail.com>
Date: Wed, 22 Feb 2023 14:06:10 +0800
Subject: [PATCH 20/22] update ut

---
 tests/test_datasets/test_transforms/test_transforms_3d.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_datasets/test_transforms/test_transforms_3d.py b/tests/test_datasets/test_transforms/test_transforms_3d.py
index 8b3380169d..b66c3bb3c1 100644
--- a/tests/test_datasets/test_transforms/test_transforms_3d.py
+++ b/tests/test_datasets/test_transforms/test_transforms_3d.py
@@ -113,9 +113,8 @@ def setUp(self):
             dict(
                 type='LoadPointsFromFile',
                 coord_type='LIDAR',
-                shift_height=True,
                 load_dim=4,
-                use_dim=[0, 1, 2]),
+                use_dim=4),
             dict(
                 type='LoadAnnotations3D',
                 with_bbox_3d=False,

From 194a0be16079d966b4c469d3f5358f30dcf6dcdd Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Wed, 22 Feb 2023 06:48:54 +0000
Subject: [PATCH 21/22] fix typehint

---
 mmdet3d/datasets/transforms/transforms_3d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index ac3667d74d..87cc950466 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2507,7 +2507,7 @@ def transform(self, input_dict: dict) -> dict:
 
         return input_dict
 
-    def __repr__(self) -> dict:
+    def __repr__(self) -> str:
         """str: Return a string that describes the module."""
         repr_str = self.__class__.__name__
         repr_str += f'(instance_classes={self.instance_classes}, '

From 6a80afde2fc5e847acd021e31087a698a91407da Mon Sep 17 00:00:00 2001
From: Xiangxu-0103 <xuxiang0103@gmail.com>
Date: Wed, 22 Feb 2023 07:54:41 +0000
Subject: [PATCH 22/22] add prob argument

---
 mmdet3d/datasets/transforms/transforms_3d.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/mmdet3d/datasets/transforms/transforms_3d.py b/mmdet3d/datasets/transforms/transforms_3d.py
index 87cc950466..dbdbf2a45c 100644
--- a/mmdet3d/datasets/transforms/transforms_3d.py
+++ b/mmdet3d/datasets/transforms/transforms_3d.py
@@ -2385,19 +2385,22 @@ class PolarMix(BaseTransform):
         rotate_paste_ratio (float): Rotate paste ratio. Defaults to 1.0.
         pre_transform (Sequence[dict], optional): Sequence of transform object
             or config dict to be composed. Defaults to None.
+        prob (float): The transformation probability. Defaults to 1.0.
     """
 
     def __init__(self,
                  instance_classes: List[int],
                  swap_ratio: float = 0.5,
                  rotate_paste_ratio: float = 1.0,
-                 pre_transform: Optional[Sequence[dict]] = None) -> None:
+                 pre_transform: Optional[Sequence[dict]] = None,
+                 prob: float = 1.0) -> None:
         assert is_list_of(instance_classes, int), \
             'instance_classes should be a list of int'
         self.instance_classes = instance_classes
         self.swap_ratio = swap_ratio
         self.rotate_paste_ratio = rotate_paste_ratio
 
+        self.prob = prob
         if pre_transform is None:
             self.pre_transform = None
         else:
@@ -2485,6 +2488,8 @@ def transform(self, input_dict: dict) -> dict:
         Returns:
             dict: output dict after transformation.
         """
+        if np.random.rand() > self.prob:
+            return input_dict
 
         assert 'dataset' in input_dict, \
             '`dataset` is needed to pass through PolarMix, while not found.'
@@ -2513,5 +2518,6 @@ def __repr__(self) -> str:
         repr_str += f'(instance_classes={self.instance_classes}, '
         repr_str += f'swap_ratio={self.swap_ratio}, '
         repr_str += f'rotate_paste_ratio={self.rotate_paste_ratio}, '
-        repr_str += f'pre_transform={self.pre_transform})'
+        repr_str += f'pre_transform={self.pre_transform}, '
+        repr_str += f'prob={self.prob})'
         return repr_str