From 83a16d2447422ea82c909f78f9f46811ae27dede Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Wed, 4 Jan 2023 18:50:31 +0800
Subject: [PATCH 01/13] add mono3d inferencer

---
 mmdet3d/apis/inferencers/__init__.py          |   1 +
 .../apis/inferencers/mono3d_det_inferencer.py | 344 ++++++++++++++++++
 mmdet3d/datasets/transforms/loading.py        |  49 +++
 3 files changed, 394 insertions(+)
 create mode 100644 mmdet3d/apis/inferencers/__init__.py
 create mode 100644 mmdet3d/apis/inferencers/mono3d_det_inferencer.py

diff --git a/mmdet3d/apis/inferencers/__init__.py b/mmdet3d/apis/inferencers/__init__.py
new file mode 100644
index 0000000000..ef101fec61
--- /dev/null
+++ b/mmdet3d/apis/inferencers/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py b/mmdet3d/apis/inferencers/mono3d_det_inferencer.py
new file mode 100644
index 0000000000..de5bd81983
--- /dev/null
+++ b/mmdet3d/apis/inferencers/mono3d_det_inferencer.py
@@ -0,0 +1,344 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import os.path as osp
+import warnings
+from typing import Dict, List, Optional, Sequence, Tuple, Union
+
+import mmcv
+import mmengine
+import numpy as np
+import torch.nn as nn
+from mmdet.registry import DATASETS, MODELS
+from mmdet.utils import ConfigType, register_all_modules
+from mmengine.dataset import Compose
+from mmengine.infer.infer import BaseInferencer, ModelType
+from mmengine.runner import load_checkpoint
+from mmengine.structures import InstanceData
+from mmengine.visualization import Visualizer
+
+from ..evaluation import get_classes
+
+InstanceList = List[InstanceData]
+InputType = Union[str, np.ndarray]
+InputsType = Union[InputType, Sequence[InputType]]
+PredType = Union[InstanceData, InstanceList]
+ImgType = Union[np.ndarray, Sequence[np.ndarray]]
+ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
+
+
+class Mono3DDetInferencer(BaseInferencer):
+    """MMDet inferencer.
+
+    Args:
+        model (str, optional): Path to the config file or the model name
+            defined in metafile. For example, it could be
+            "yolox-s" or "configs/yolox/yolox_s_8xb8-300e_coco.py".
+        weights (str, optional): Path to the checkpoint. If it is not specified
+            and model is a model name of metafile, the weights will be loaded
+            from metafile. Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+    """
+
+    preprocess_kwargs: set = set()
+    forward_kwargs: set = set()
+    visualize_kwargs: set = {
+        'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
+        'img_out_dir'
+    }
+    postprocess_kwargs: set = {
+        'print_result', 'pred_out_file', 'return_datasample'
+    }
+
+    def __init__(self,
+                 model: Union[ModelType, str],
+                 weights: Optional[str] = None,
+                 device: Optional[str] = None,
+                 scope: Optional[str] = 'mmdet',
+                 palette: str = 'none') -> None:
+        # A global counter tracking the number of images processed, for
+        # naming of the output images
+        self.num_visualized_imgs = 0
+        self.palette = palette
+        register_all_modules()
+        super().__init__(
+            model=model, weights=weights, device=device, scope=scope)
+
+    def _init_model(
+        self,
+        cfg: ConfigType,
+        weights: str,
+        device: str = 'cpu',
+    ) -> nn.Module:
+        if 'init_cfg' in cfg.model.backbone:
+            cfg.model.backbone.init_cfg = None
+        model = MODELS.build(cfg.model)
+
+        checkpoint = load_checkpoint(model, weights, map_location='cpu')
+        checkpoint_meta = checkpoint.get('meta', {})
+        # save the dataset_meta in the model for convenience
+        if 'dataset_meta' in checkpoint_meta:
+            # mmdet 3.x, all keys should be lowercase
+            model.dataset_meta = {
+                k.lower(): v
+                for k, v in checkpoint_meta['dataset_meta'].items()
+            }
+        elif 'CLASSES' in checkpoint_meta:
+            # < mmdet 3.x
+            classes = checkpoint_meta['CLASSES']
+            model.dataset_meta = {'classes': classes}
+        else:
+            warnings.simplefilter('once')
+            warnings.warn(
+                'dataset_meta or class names are not saved in the '
+                'checkpoint\'s meta data, use COCO classes by default.')
+            model.dataset_meta = {'classes': get_classes('coco')}
+
+        # Priority:  args.palette -> config -> checkpoint
+        if self.palette != 'none':
+            model.dataset_meta['palette'] = self.palette
+        else:
+            test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset)
+            # lazy init. We only need the metainfo.
+            test_dataset_cfg['lazy_init'] = True
+            metainfo = DATASETS.build(test_dataset_cfg).metainfo
+            cfg_palette = metainfo.get('palette', None)
+            if cfg_palette is not None:
+                model.dataset_meta['palette'] = cfg_palette
+            else:
+                if 'palette' not in model.dataset_meta:
+                    warnings.warn(
+                        'palette does not exist, random is used by default. '
+                        'You can also set the palette to customize.')
+                    model.dataset_meta['palette'] = 'random'
+
+        model.cfg = cfg  # save the config in the model for convenience
+        model.to(device)
+        model.eval()
+        return model
+
+    def _init_pipeline(self, cfg: ConfigType) -> Compose:
+        """Initialize the test pipeline."""
+        pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+
+        # For inference, the key of ``img_id`` is not used.
+        if 'meta_keys' in pipeline_cfg[-1]:
+            pipeline_cfg[-1]['meta_keys'] = tuple(
+                meta_key for meta_key in pipeline_cfg[-1]['meta_keys']
+                if meta_key != 'img_id')
+
+        load_img_idx = self._get_transform_idx(pipeline_cfg,
+                                               'LoadImageFromFileMono3D')
+        if load_img_idx == -1:
+            raise ValueError(
+                'LoadImageFromFileMono3D is not found in the test pipeline')
+        pipeline_cfg[load_img_idx]['type'] = 'Mono3DInferencerLoader'
+        return Compose(pipeline_cfg)
+
+    def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int:
+        """Returns the index of the transform in a pipeline.
+
+        If the transform is not found, returns -1.
+        """
+        for i, transform in enumerate(pipeline_cfg):
+            if transform['type'] == name:
+                return i
+        return -1
+
+    def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]:
+        visualizer = super()._init_visualizer(cfg)
+        visualizer.dataset_meta = self.model.dataset_meta
+        return visualizer
+
+    def __call__(self,
+                 inputs: InputsType,
+                 return_datasamples: bool = False,
+                 batch_size: int = 1,
+                 return_vis: bool = False,
+                 show: bool = False,
+                 wait_time: int = 0,
+                 draw_pred: bool = True,
+                 pred_score_thr: float = 0.3,
+                 img_out_dir: str = '',
+                 print_result: bool = False,
+                 pred_out_file: str = '',
+                 **kwargs) -> dict:
+        """Call the inferencer.
+        Args:
+            inputs (InputsType): Inputs for the inferencer.
+            return_datasamples (bool): Whether to return results as
+                :obj:`BaseDataElement`. Defaults to False.
+            batch_size (int): Inference batch size. Defaults to 1.
+            show (bool): Whether to display the visualization results in a
+                popup window. Defaults to False.
+            wait_time (float): The interval of show (s). Defaults to 0.
+            draw_pred (bool): Whether to draw predicted bounding boxes.
+                Defaults to True.
+            pred_score_thr (float): Minimum score of bboxes to draw.
+                Defaults to 0.3.
+            img_out_dir (str): Output directory of visualization results.
+                If left as empty, no file will be saved. Defaults to ''.
+            print_result (bool): Whether to print the inference result w/o
+                visualization to the console. Defaults to False.
+            pred_out_file: File to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+            **kwargs: Other keyword arguments passed to :meth:`preprocess`,
+                :meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
+                Each key in kwargs should be in the corresponding set of
+                ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs``
+                and ``postprocess_kwargs``.
+        Returns:
+            dict: Inference and visualization results.
+        """
+        return super().__call__(
+            inputs,
+            return_datasamples,
+            batch_size,
+            return_vis=return_vis,
+            show=show,
+            wait_time=wait_time,
+            draw_pred=draw_pred,
+            pred_score_thr=pred_score_thr,
+            img_out_dir=img_out_dir,
+            print_result=print_result,
+            pred_out_file=pred_out_file,
+            **kwargs)
+
+    def visualize(self,
+                  inputs: InputsType,
+                  preds: PredType,
+                  return_vis: bool = False,
+                  show: bool = False,
+                  wait_time: int = 0,
+                  draw_pred: bool = True,
+                  pred_score_thr: float = 0.3,
+                  img_out_dir: str = '') -> Union[List[np.ndarray], None]:
+        """Visualize predictions.
+
+        Args:
+            inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
+            preds (List[Dict]): Predictions of the model.
+            return_vis (bool): Whether to return the visualization result.
+                Defaults to False.
+            show (bool): Whether to display the image in a popup window.
+                Defaults to False.
+            wait_time (float): The interval of show (s). Defaults to 0.
+            draw_pred (bool): Whether to draw predicted bounding boxes.
+                Defaults to True.
+            pred_score_thr (float): Minimum score of bboxes to draw.
+                Defaults to 0.3.
+            img_out_dir (str): Output directory of visualization results.
+                If left as empty, no file will be saved. Defaults to ''.
+        Returns:
+            List[np.ndarray] or None: Returns visualization results only if
+            applicable.
+        """
+        if self.visualizer is None or (not show and img_out_dir == ''
+                                       and not return_vis):
+            return None
+
+        if getattr(self, 'visualizer') is None:
+            raise ValueError('Visualization needs the "visualizer" term'
+                             'defined in the config, but got None.')
+
+        results = []
+
+        for single_input, pred in zip(inputs, preds):
+            if isinstance(single_input, str):
+                img_bytes = mmengine.fileio.get(single_input)
+                img = mmcv.imfrombytes(img_bytes)
+                img = img[:, :, ::-1]
+                img_name = osp.basename(single_input)
+            elif isinstance(single_input, np.ndarray):
+                img = single_input.copy()
+                img_num = str(self.num_visualized_imgs).zfill(8)
+                img_name = f'{img_num}.jpg'
+            else:
+                raise ValueError('Unsupported input type: '
+                                 f'{type(single_input)}')
+
+            out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \
+                else None
+
+            self.visualizer.add_datasample(
+                img_name,
+                img,
+                pred,
+                show=show,
+                wait_time=wait_time,
+                draw_gt=False,
+                draw_pred=draw_pred,
+                pred_score_thr=pred_score_thr,
+                out_file=out_file,
+            )
+            results.append(img)
+            self.num_visualized_imgs += 1
+
+        return results
+
+    def postprocess(
+        self,
+        preds: PredType,
+        visualization: Optional[List[np.ndarray]] = None,
+        return_datasample: bool = False,
+        print_result: bool = False,
+        pred_out_file: str = '',
+    ) -> Union[ResType, Tuple[ResType, np.ndarray]]:
+        """Process the predictions and visualization results from ``forward``
+        and ``visualize``.
+        This method should be responsible for the following tasks:
+        1. Convert datasamples into a json-serializable dict if needed.
+        2. Pack the predictions and visualization results and return them.
+        3. Dump or log the predictions.
+        Args:
+            preds (List[Dict]): Predictions of the model.
+            visualization (Optional[np.ndarray]): Visualized predictions.
+            return_datasample (bool): Whether to use Datasample to store
+                inference results. If False, dict will be used.
+            print_result (bool): Whether to print the inference result w/o
+                visualization to the console. Defaults to False.
+            pred_out_file: File to save the inference results w/o
+                visualization. If left as empty, no file will be saved.
+                Defaults to ''.
+        Returns:
+            dict: Inference and visualization results with key ``predictions``
+            and ``visualization``.
+            - ``visualization`` (Any): Returned by :meth:`visualize`.
+            - ``predictions`` (dict or DataSample): Returned by
+                :meth:`forward` and processed in :meth:`postprocess`.
+                If ``return_datasample=False``, it usually should be a
+                json-serializable dict containing only basic data elements such
+                as strings and numbers.
+        """
+        result_dict = {}
+        results = preds
+        if not return_datasample:
+            results = []
+            for pred in preds:
+                result = self.pred2dict(pred)
+                results.append(result)
+        # Add img to the results after printing and dumping
+        result_dict['predictions'] = results
+        if print_result:
+            print(result_dict)
+        if pred_out_file != '':
+            mmengine.dump(result_dict, pred_out_file)
+        result_dict['visualization'] = visualization
+        return result_dict
+
+    def pred2dict(self, data_sample: InstanceData) -> Dict:
+        """Extract elements necessary to represent a prediction into a
+        dictionary.
+
+        It's better to contain only basic data elements such as strings and
+        numbers in order to guarantee it's json-serializable.
+        """
+        pred_instances = data_sample.pred_instances.numpy()
+        result = {
+            'bboxes': pred_instances.bboxes.tolist(),
+            'labels': pred_instances.labels.tolist(),
+            'scores': pred_instances.scores.tolist()
+        }
+
+        return result
diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py
index 718ee9eb78..eb80592ff3 100644
--- a/mmdet3d/datasets/transforms/loading.py
+++ b/mmdet3d/datasets/transforms/loading.py
@@ -931,3 +931,52 @@ def __repr__(self) -> str:
         repr_str += f'{indent_str}with_bbox_depth={self.with_bbox_depth}, '
         repr_str += f'{indent_str}poly2mask={self.poly2mask})'
         return repr_str
+
+
+@TRANSFORMS.register_module()
+class Mono3DInferencerLoader(BaseTransform):
+    """Load an image from ``results['img']``. Similar with
+    :obj:`LoadImageFromFileMono3D`, but the image has been loaded as
+    :obj:`np.ndarray` in ``results['img']``. Can be used when loading image
+    from webcam. Required Keys:
+
+    - img
+    Modified Keys:
+    - img
+    - img_path
+    - img_shape
+    - ori_shape
+    Args:
+        to_float32 (bool): Whether to convert the loaded image to a float32
+            numpy array. If set to False, the loaded image is an uint8 array.
+            Defaults to False.
+    """
+
+    def __init__(self, **kwargs) -> None:
+        super().__init__()
+        self.from_file = TRANSFORMS.build(
+            dict(type='LoadImageFromFileMono3D', **kwargs))
+        self.from_ndarray = TRANSFORMS.build(
+            dict(type='LoadImageFromNDArray', **kwargs))
+
+    def transform(self, single_input: Union[str, np.ndarray, dict]) -> dict:
+        """Transform function to add image meta information.
+
+        Args:
+            single_input (dict): Result dict with Webcam read image in
+                ``results['img']``.
+        Returns:
+            dict: The dict contains loaded image and meta information.
+        """
+        if isinstance(single_input, str):
+            inputs = dict(img_path=single_input)
+        elif isinstance(single_input, np.ndarray):
+            inputs = dict(img=single_input)
+        elif isinstance(single_input, dict):
+            inputs = single_input
+        else:
+            raise NotImplementedError
+
+        if 'img' in inputs:
+            return self.from_ndarray(inputs)
+        return self.from_file(inputs)

From f031d55db2f7a2c1bb1ab89acdf18e8a1620bd82 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Fri, 6 Jan 2023 17:45:59 +0800
Subject: [PATCH 02/13] update mono3d inferenceer

---
 .../apis/inferencers/mono3d_det_inferencer.py | 102 ++++++++----------
 1 file changed, 47 insertions(+), 55 deletions(-)

diff --git a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py b/mmdet3d/apis/inferencers/mono3d_det_inferencer.py
index de5bd81983..31bad32ee5 100644
--- a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py
+++ b/mmdet3d/apis/inferencers/mono3d_det_inferencer.py
@@ -1,22 +1,19 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import copy
 import os.path as osp
-import warnings
 from typing import Dict, List, Optional, Sequence, Tuple, Union
 
 import mmcv
 import mmengine
 import numpy as np
 import torch.nn as nn
-from mmdet.registry import DATASETS, MODELS
-from mmdet.utils import ConfigType, register_all_modules
 from mmengine.dataset import Compose
 from mmengine.infer.infer import BaseInferencer, ModelType
 from mmengine.runner import load_checkpoint
 from mmengine.structures import InstanceData
 from mmengine.visualization import Visualizer
 
-from ..evaluation import get_classes
+from mmdet3d.registry import MODELS
+from mmdet3d.utils import ConfigType, register_all_modules
 
 InstanceList = List[InstanceData]
 InputType = Union[str, np.ndarray]
@@ -26,13 +23,30 @@
 ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
 
 
+def convert_SyncBN(config):
+    """Convert config's naiveSyncBN to BN.
+
+    Args:
+         config (str or :obj:`mmengine.Config`): Config file path or the config
+            object.
+    """
+    if isinstance(config, dict):
+        for item in config:
+            if item == 'norm_cfg':
+                config[item]['type'] = config[item]['type']. \
+                                    replace('naiveSyncBN', 'BN')
+            else:
+                convert_SyncBN(config[item])
+
+
 class Mono3DDetInferencer(BaseInferencer):
-    """MMDet inferencer.
+    """MMDet3D Mono3D inferencer.
 
     Args:
         model (str, optional): Path to the config file or the model name
             defined in metafile. For example, it could be
-            "yolox-s" or "configs/yolox/yolox_s_8xb8-300e_coco.py".
+            "pgd-kitti" or
+            "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py".
         weights (str, optional): Path to the checkpoint. If it is not specified
             and model is a model name of metafile, the weights will be loaded
             from metafile. Defaults to None.
@@ -54,7 +68,7 @@ def __init__(self,
                  model: Union[ModelType, str],
                  weights: Optional[str] = None,
                  device: Optional[str] = None,
-                 scope: Optional[str] = 'mmdet',
+                 scope: Optional[str] = 'mmdet3d',
                  palette: str = 'none') -> None:
         # A global counter tracking the number of images processed, for
         # naming of the output images
@@ -70,47 +84,29 @@ def _init_model(
         weights: str,
         device: str = 'cpu',
     ) -> nn.Module:
-        if 'init_cfg' in cfg.model.backbone:
-            cfg.model.backbone.init_cfg = None
+        convert_SyncBN(cfg.model)
+        cfg.model.train_cfg = None
         model = MODELS.build(cfg.model)
 
         checkpoint = load_checkpoint(model, weights, map_location='cpu')
-        checkpoint_meta = checkpoint.get('meta', {})
+        dataset_meta = checkpoint['meta'].get('dataset_meta', None)
         # save the dataset_meta in the model for convenience
-        if 'dataset_meta' in checkpoint_meta:
-            # mmdet 3.x, all keys should be lowercase
-            model.dataset_meta = {
-                k.lower(): v
-                for k, v in checkpoint_meta['dataset_meta'].items()
-            }
-        elif 'CLASSES' in checkpoint_meta:
-            # < mmdet 3.x
-            classes = checkpoint_meta['CLASSES']
-            model.dataset_meta = {'classes': classes}
-        else:
-            warnings.simplefilter('once')
-            warnings.warn(
-                'dataset_meta or class names are not saved in the '
-                'checkpoint\'s meta data, use COCO classes by default.')
-            model.dataset_meta = {'classes': get_classes('coco')}
-
-        # Priority:  args.palette -> config -> checkpoint
-        if self.palette != 'none':
-            model.dataset_meta['palette'] = self.palette
+        if 'dataset_meta' in checkpoint.get('meta', {}):
+            # mmdet3d 1.x
+            model.dataset_meta = dataset_meta
+        elif 'CLASSES' in checkpoint.get('meta', {}):
+            # < mmdet3d 1.x
+            classes = checkpoint['meta']['CLASSES']
+            model.dataset_meta = {'CLASSES': classes}
+
+            if 'PALETTE' in checkpoint.get('meta', {}):  # 3D Segmentor
+                model.dataset_meta['PALETTE'] = checkpoint['meta']['PALETTE']
         else:
-            test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset)
-            # lazy init. We only need the metainfo.
-            test_dataset_cfg['lazy_init'] = True
-            metainfo = DATASETS.build(test_dataset_cfg).metainfo
-            cfg_palette = metainfo.get('palette', None)
-            if cfg_palette is not None:
-                model.dataset_meta['palette'] = cfg_palette
-            else:
-                if 'palette' not in model.dataset_meta:
-                    warnings.warn(
-                        'palette does not exist, random is used by default. '
-                        'You can also set the palette to customize.')
-                    model.dataset_meta['palette'] = 'random'
+            # < mmdet3d 1.x
+            model.dataset_meta = {'CLASSES': cfg.class_names}
+
+            if 'PALETTE' in checkpoint.get('meta', {}):  # 3D Segmentor
+                model.dataset_meta['PALETTE'] = checkpoint['meta']['PALETTE']
 
         model.cfg = cfg  # save the config in the model for convenience
         model.to(device)
@@ -121,12 +117,6 @@ def _init_pipeline(self, cfg: ConfigType) -> Compose:
         """Initialize the test pipeline."""
         pipeline_cfg = cfg.test_dataloader.dataset.pipeline
 
-        # For inference, the key of ``img_id`` is not used.
-        if 'meta_keys' in pipeline_cfg[-1]:
-            pipeline_cfg[-1]['meta_keys'] = tuple(
-                meta_key for meta_key in pipeline_cfg[-1]['meta_keys']
-                if meta_key != 'img_id')
-
         load_img_idx = self._get_transform_idx(pipeline_cfg,
                                                'LoadImageFromFileMono3D')
         if load_img_idx == -1:
@@ -261,9 +251,10 @@ def visualize(self,
             out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \
                 else None
 
+            data_input = dict(img=img)
             self.visualizer.add_datasample(
                 img_name,
-                img,
+                data_input,
                 pred,
                 show=show,
                 wait_time=wait_time,
@@ -271,6 +262,7 @@ def visualize(self,
                 draw_pred=draw_pred,
                 pred_score_thr=pred_score_thr,
                 out_file=out_file,
+                vis_task='mono_det',
             )
             results.append(img)
             self.num_visualized_imgs += 1
@@ -334,11 +326,11 @@ def pred2dict(self, data_sample: InstanceData) -> Dict:
         It's better to contain only basic data elements such as strings and
         numbers in order to guarantee it's json-serializable.
         """
-        pred_instances = data_sample.pred_instances.numpy()
+        pred_instances = data_sample.pred_instances_3d.numpy()
         result = {
-            'bboxes': pred_instances.bboxes.tolist(),
-            'labels': pred_instances.labels.tolist(),
-            'scores': pred_instances.scores.tolist()
+            'bboxes_3d': pred_instances.bboxes_3d.tolist(),
+            'labels_3d': pred_instances.labels_3d.tolist(),
+            'scores_3d': pred_instances.scores_3d.tolist()
         }
 
         return result

From 8f1ad746b806ea69917485344a261feff7dd1d66 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 9 Jan 2023 10:55:26 +0800
Subject: [PATCH 03/13] update init file

---
 mmdet3d/apis/inferencers/__init__.py    | 3 +++
 mmdet3d/datasets/transforms/__init__.py | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/mmdet3d/apis/inferencers/__init__.py b/mmdet3d/apis/inferencers/__init__.py
index ef101fec61..dc024e102e 100644
--- a/mmdet3d/apis/inferencers/__init__.py
+++ b/mmdet3d/apis/inferencers/__init__.py
@@ -1 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from .mono3d_det_inferencer import Mono3DDetInferencer
+
+__all__ = ['Mono3DDetInferencer']
diff --git a/mmdet3d/datasets/transforms/__init__.py b/mmdet3d/datasets/transforms/__init__.py
index 1a21814be2..86e79307ae 100644
--- a/mmdet3d/datasets/transforms/__init__.py
+++ b/mmdet3d/datasets/transforms/__init__.py
@@ -4,7 +4,8 @@
 from .loading import (LoadAnnotations3D, LoadImageFromFileMono3D,
                       LoadMultiViewImageFromFiles, LoadPointsFromDict,
                       LoadPointsFromFile, LoadPointsFromMultiSweeps,
-                      NormalizePointsColor, PointSegClassMapping)
+                      Mono3DInferencerLoader, NormalizePointsColor,
+                      PointSegClassMapping)
 from .test_time_aug import MultiScaleFlipAug3D
 # yapf: disable
 from .transforms_3d import (AffineResize, BackgroundPointsFilter,
@@ -28,5 +29,5 @@
     'IndoorPatchPointSample', 'LoadImageFromFileMono3D', 'ObjectNameFilter',
     'RandomDropPointsColor', 'RandomJitterPoints', 'AffineResize',
     'RandomShiftScale', 'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
-    'MultiViewWrapper', 'PhotoMetricDistortion3D'
+    'MultiViewWrapper', 'PhotoMetricDistortion3D', 'Mono3DInferencerLoader'
 ]

From 4116433551c3c3b0c6dd514ec3e0560b527539d3 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 9 Jan 2023 11:36:51 +0800
Subject: [PATCH 04/13] update unit test

---
 configs/pgd/metafile.yml                      |   2 +
 mmdet3d/apis/__init__.py                      |  10 +-
 .../test_mono3d_det_inferencer.py             | 102 ++++++++++++++++++
 3 files changed, 108 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py

diff --git a/configs/pgd/metafile.yml b/configs/pgd/metafile.yml
index d48b3b6a93..8870ed426e 100644
--- a/configs/pgd/metafile.yml
+++ b/configs/pgd/metafile.yml
@@ -17,6 +17,8 @@ Collections:
 
 Models:
   - Name: pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d
+    Alias:
+       - pgd-kitti
     In Collection: PGD
     Config: configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py
     Metadata:
diff --git a/mmdet3d/apis/__init__.py b/mmdet3d/apis/__init__.py
index 2926178c85..d1ed517444 100644
--- a/mmdet3d/apis/__init__.py
+++ b/mmdet3d/apis/__init__.py
@@ -3,12 +3,10 @@
                         inference_mono_3d_detector,
                         inference_multi_modality_detector, inference_segmentor,
                         init_model)
+from .inferencers import Mono3DDetInferencer
 
 __all__ = [
-    'inference_detector',
-    'init_model',
-    'inference_mono_3d_detector',
-    'convert_SyncBN',
-    'inference_multi_modality_detector',
-    'inference_segmentor',
+    'inference_detector', 'init_model', 'inference_mono_3d_detector',
+    'convert_SyncBN', 'inference_multi_modality_detector',
+    'inference_segmentor', 'Mono3DDetInferencer'
 ]
diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
new file mode 100644
index 0000000000..cd67b7240e
--- /dev/null
+++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
@@ -0,0 +1,102 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+from unittest import TestCase
+
+import mmcv
+import mmengine
+import numpy as np
+from mmengine.utils import is_list_of
+
+from mmdet3d.apis import Mono3DDetInferencer
+from mmdet3d.structures import Det3DDataSample
+
+
+class TestDetInferencer(TestCase):
+
+    def test_init(self):
+        # init from metafile
+        Mono3DDetInferencer('pgd-kitti')
+        # init from cfg
+        Mono3DDetInferencer(
+            'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py')
+
+    def assert_predictions_equal(self, preds1, preds2):
+        for pred1, pred2 in zip(preds1, preds2):
+            if 'bboxes_3d' in pred1:
+                self.assertTrue(
+                    np.allclose(pred1['bboxes_3d'], pred2['bboxes_3d'], 0.1))
+            if 'scores_3d' in pred1:
+                self.assertTrue(
+                    np.allclose(pred1['scores_3d'], pred2['scores_3d'], 0.1))
+            if 'labels_3d' in pred1:
+                self.assertTrue(
+                    np.allclose(pred1['labels_3d'], pred2['labels_3d']))
+
+    def test_call(self, model):
+        # single img
+        img_path = 'tests/data/kitti/training/image_2/000007.png'
+        inferencer = Mono3DDetInferencer(model)
+        res_path = inferencer(img_path, return_vis=True)
+        # ndarray
+        img = mmcv.imread(img_path)
+        res_ndarray = inferencer(img, return_vis=True)
+        self.assert_predictions_equal(res_path['predictions'],
+                                      res_ndarray['predictions'])
+        self.assertIn('visualization', res_path)
+        self.assertIn('visualization', res_ndarray)
+
+        # multiple images
+        img_paths = [
+            'tests/data/kitti/training/image_2/000007.png',
+            'tests/data/kitti/training/image_2/000000.png'
+        ]
+        res_path = inferencer(img_paths, return_vis=True)
+        # list of ndarray
+        imgs = [mmcv.imread(p) for p in img_paths]
+        res_ndarray = inferencer(imgs, return_vis=True)
+        self.assert_predictions_equal(res_path['predictions'],
+                                      res_ndarray['predictions'])
+        self.assertIn('visualization', res_path)
+        self.assertIn('visualization', res_ndarray)
+
+        # img dir, test different batch sizes
+        img_dir = 'tests/data/kitti/training/image_2/'
+        res_bs1 = inferencer(img_dir, batch_size=1, return_vis=True)
+        res_bs3 = inferencer(img_dir, batch_size=2, return_vis=True)
+        self.assert_predictions_equal(res_bs1['predictions'],
+                                      res_bs3['predictions'])
+        if model == 'pgd-kitti':
+            # There is a jitter operation when the mask is drawn,
+            # so it cannot be asserted.
+            for res_bs1_vis, res_bs3_vis in zip(res_bs1['visualization'],
+                                                res_bs3['visualization']):
+                self.assertTrue(np.allclose(res_bs1_vis, res_bs3_vis))
+
+    def test_visualize(self, model):
+        img_paths = [
+            'tests/data/kitti/training/image_2/000007.png',
+            'tests/data/kitti/training/image_2/000000.png'
+        ]
+        inferencer = Mono3DDetInferencer(model)
+        # img_out_dir
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            inferencer(img_paths, img_out_dir=tmp_dir)
+            for img_dir in ['000007.png', '000000.png']:
+                self.assertTrue(osp.exists(osp.join(tmp_dir, img_dir)))
+
+    def test_postprocess(self, model):
+        # return_datasample
+        img_path = 'tests/data/kitti/training/image_2/000007.png'
+        inferencer = Mono3DDetInferencer(model)
+        res = inferencer(img_path, return_datasamples=True)
+        self.assertTrue(is_list_of(res['predictions'], Det3DDataSample))
+
+        # pred_out_file
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            pred_out_file = osp.join(tmp_dir, 'tmp.json')
+            res = inferencer(
+                img_path, print_result=True, pred_out_file=pred_out_file)
+            dumped_res = mmengine.load(pred_out_file)
+            self.assert_predictions_equal(res['predictions'],
+                                          dumped_res['predictions'])

From 09ddf1eddf12e2fbb05443f96243acc635e7c797 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 9 Jan 2023 11:37:53 +0800
Subject: [PATCH 05/13] fix name

---
 tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
index cd67b7240e..49650c51e2 100644
--- a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
@@ -12,7 +12,7 @@
 from mmdet3d.structures import Det3DDataSample
 
 
-class TestDetInferencer(TestCase):
+class TestMono3DDetInferencer(TestCase):
 
     def test_init(self):
         # init from metafile

From af078eb88f5f0829cdce39803a78e6619b090260 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 9 Jan 2023 14:43:08 +0800
Subject: [PATCH 06/13] add base_det3d_inferencer

---
 mmdet3d/apis/__init__.py                      |   4 +-
 mmdet3d/apis/inferencers/__init__.py          |   5 +-
 ...inferencer.py => base_det3d_inferencer.py} |  96 +-----------
 .../apis/inferencers/mono_det3d_inferencer.py | 148 ++++++++++++++++++
 .../test_mono3d_det_inferencer.py             |  14 +-
 5 files changed, 162 insertions(+), 105 deletions(-)
 rename mmdet3d/apis/inferencers/{mono3d_det_inferencer.py => base_det3d_inferencer.py} (72%)
 create mode 100644 mmdet3d/apis/inferencers/mono_det3d_inferencer.py

diff --git a/mmdet3d/apis/__init__.py b/mmdet3d/apis/__init__.py
index d1ed517444..d1b52e4c7c 100644
--- a/mmdet3d/apis/__init__.py
+++ b/mmdet3d/apis/__init__.py
@@ -3,10 +3,10 @@
                         inference_mono_3d_detector,
                         inference_multi_modality_detector, inference_segmentor,
                         init_model)
-from .inferencers import Mono3DDetInferencer
+from .inferencers import BaseDet3DInferencer, MonoDet3DInferencer
 
 __all__ = [
     'inference_detector', 'init_model', 'inference_mono_3d_detector',
     'convert_SyncBN', 'inference_multi_modality_detector',
-    'inference_segmentor', 'Mono3DDetInferencer'
+    'inference_segmentor', 'BaseDet3DInferencer', 'MonoDet3DInferencer'
 ]
diff --git a/mmdet3d/apis/inferencers/__init__.py b/mmdet3d/apis/inferencers/__init__.py
index dc024e102e..0aaf0b2984 100644
--- a/mmdet3d/apis/inferencers/__init__.py
+++ b/mmdet3d/apis/inferencers/__init__.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .mono3d_det_inferencer import Mono3DDetInferencer
+from .base_det3d_inferencer import BaseDet3DInferencer
+from .mono_det3d_inferencer import MonoDet3DInferencer
 
-__all__ = ['Mono3DDetInferencer']
+__all__ = ['BaseDet3DInferencer', 'MonoDet3DInferencer']
diff --git a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py b/mmdet3d/apis/inferencers/base_det3d_inferencer.py
similarity index 72%
rename from mmdet3d/apis/inferencers/mono3d_det_inferencer.py
rename to mmdet3d/apis/inferencers/base_det3d_inferencer.py
index 31bad32ee5..03e70662ac 100644
--- a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py
+++ b/mmdet3d/apis/inferencers/base_det3d_inferencer.py
@@ -1,12 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import os.path as osp
 from typing import Dict, List, Optional, Sequence, Tuple, Union
 
-import mmcv
 import mmengine
 import numpy as np
 import torch.nn as nn
-from mmengine.dataset import Compose
 from mmengine.infer.infer import BaseInferencer, ModelType
 from mmengine.runner import load_checkpoint
 from mmengine.structures import InstanceData
@@ -39,8 +36,8 @@ def convert_SyncBN(config):
                 convert_SyncBN(config[item])
 
 
-class Mono3DDetInferencer(BaseInferencer):
-    """MMDet3D Mono3D inferencer.
+class BaseDet3DInferencer(BaseInferencer):
+    """Base 3D object detection inferencer.
 
     Args:
         model (str, optional): Path to the config file or the model name
@@ -70,9 +67,6 @@ def __init__(self,
                  device: Optional[str] = None,
                  scope: Optional[str] = 'mmdet3d',
                  palette: str = 'none') -> None:
-        # A global counter tracking the number of images processed, for
-        # naming of the output images
-        self.num_visualized_imgs = 0
         self.palette = palette
         register_all_modules()
         super().__init__(
@@ -113,18 +107,6 @@ def _init_model(
         model.eval()
         return model
 
-    def _init_pipeline(self, cfg: ConfigType) -> Compose:
-        """Initialize the test pipeline."""
-        pipeline_cfg = cfg.test_dataloader.dataset.pipeline
-
-        load_img_idx = self._get_transform_idx(pipeline_cfg,
-                                               'LoadImageFromFileMono3D')
-        if load_img_idx == -1:
-            raise ValueError(
-                'LoadImageFromFileMono3D is not found in the test pipeline')
-        pipeline_cfg[load_img_idx]['type'] = 'Mono3DInferencerLoader'
-        return Compose(pipeline_cfg)
-
     def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int:
         """Returns the index of the transform in a pipeline.
 
@@ -195,80 +177,6 @@ def __call__(self,
             pred_out_file=pred_out_file,
             **kwargs)
 
-    def visualize(self,
-                  inputs: InputsType,
-                  preds: PredType,
-                  return_vis: bool = False,
-                  show: bool = False,
-                  wait_time: int = 0,
-                  draw_pred: bool = True,
-                  pred_score_thr: float = 0.3,
-                  img_out_dir: str = '') -> Union[List[np.ndarray], None]:
-        """Visualize predictions.
-
-        Args:
-            inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
-            preds (List[Dict]): Predictions of the model.
-            return_vis (bool): Whether to return the visualization result.
-                Defaults to False.
-            show (bool): Whether to display the image in a popup window.
-                Defaults to False.
-            wait_time (float): The interval of show (s). Defaults to 0.
-            draw_pred (bool): Whether to draw predicted bounding boxes.
-                Defaults to True.
-            pred_score_thr (float): Minimum score of bboxes to draw.
-                Defaults to 0.3.
-            img_out_dir (str): Output directory of visualization results.
-                If left as empty, no file will be saved. Defaults to ''.
-        Returns:
-            List[np.ndarray] or None: Returns visualization results only if
-            applicable.
-        """
-        if self.visualizer is None or (not show and img_out_dir == ''
-                                       and not return_vis):
-            return None
-
-        if getattr(self, 'visualizer') is None:
-            raise ValueError('Visualization needs the "visualizer" term'
-                             'defined in the config, but got None.')
-
-        results = []
-
-        for single_input, pred in zip(inputs, preds):
-            if isinstance(single_input, str):
-                img_bytes = mmengine.fileio.get(single_input)
-                img = mmcv.imfrombytes(img_bytes)
-                img = img[:, :, ::-1]
-                img_name = osp.basename(single_input)
-            elif isinstance(single_input, np.ndarray):
-                img = single_input.copy()
-                img_num = str(self.num_visualized_imgs).zfill(8)
-                img_name = f'{img_num}.jpg'
-            else:
-                raise ValueError('Unsupported input type: '
-                                 f'{type(single_input)}')
-
-            out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \
-                else None
-
-            data_input = dict(img=img)
-            self.visualizer.add_datasample(
-                img_name,
-                data_input,
-                pred,
-                show=show,
-                wait_time=wait_time,
-                draw_gt=False,
-                draw_pred=draw_pred,
-                pred_score_thr=pred_score_thr,
-                out_file=out_file,
-                vis_task='mono_det',
-            )
-            results.append(img)
-            self.num_visualized_imgs += 1
-
-        return results
-
     def postprocess(
         self,
         preds: PredType,
diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
new file mode 100644
index 0000000000..ec7401bbb3
--- /dev/null
+++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
@@ -0,0 +1,148 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from typing import Dict, List, Optional, Sequence, Union
+
+import mmcv
+import mmengine
+import numpy as np
+from mmengine.dataset import Compose
+from mmengine.infer.infer import ModelType
+from mmengine.structures import InstanceData
+
+from mmdet3d.utils import ConfigType
+from .base_det3d_inferencer import BaseDet3DInferencer
+
+InstanceList = List[InstanceData]
+InputType = Union[str, np.ndarray]
+InputsType = Union[InputType, Sequence[InputType]]
+PredType = Union[InstanceData, InstanceList]
+ImgType = Union[np.ndarray, Sequence[np.ndarray]]
+ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
+
+
+class MonoDet3DInferencer(BaseDet3DInferencer):
+    """MMDet3D Mono3D inferencer.
+
+    Args:
+        model (str, optional): Path to the config file or the model name
+            defined in metafile. For example, it could be
+            "pgd-kitti" or
+            "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py".
+        weights (str, optional): Path to the checkpoint. If it is not specified
+            and model is a model name of metafile, the weights will be loaded
+            from metafile. Defaults to None.
+        device (str, optional): Device to run inference. If None, the available
+            device will be automatically used. Defaults to None.
+    """
+
+    preprocess_kwargs: set = set()
+    forward_kwargs: set = set()
+    visualize_kwargs: set = {
+        'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
+        'img_out_dir'
+    }
+    postprocess_kwargs: set = {
+        'print_result', 'pred_out_file', 'return_datasample'
+    }
+
+    def __init__(self,
+                 model: Union[ModelType, str],
+                 weights: Optional[str] = None,
+                 device: Optional[str] = None,
+                 scope: Optional[str] = 'mmdet3d',
+                 palette: str = 'none') -> None:
+        # A global counter tracking the number of images processed, for
+        # naming of the output images
+        self.num_visualized_imgs = 0
+        super().__init__(
+            model=model,
+            weights=weights,
+            device=device,
+            scope=scope,
+            palette=palette)
+
+    def _init_pipeline(self, cfg: ConfigType) -> Compose:
+        """Initialize the test pipeline."""
+        pipeline_cfg = cfg.test_dataloader.dataset.pipeline
+
+        load_img_idx = self._get_transform_idx(pipeline_cfg,
+                                               'LoadImageFromFileMono3D')
+        if load_img_idx == -1:
+            raise ValueError(
+                'LoadImageFromFileMono3D is not found in the test pipeline')
+        pipeline_cfg[load_img_idx]['type'] = 'Mono3DInferencerLoader'
+        return Compose(pipeline_cfg)
+
+    def visualize(self,
+                  inputs: InputsType,
+                  preds: PredType,
+                  return_vis: bool = False,
+                  show: bool = False,
+                  wait_time: int = 0,
+                  draw_pred: bool = True,
+                  pred_score_thr: float = 0.3,
+                  img_out_dir: str = '') -> Union[List[np.ndarray], None]:
+        """Visualize predictions.
+
+        Args:
+            inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
+            preds (List[Dict]): Predictions of the model.
+            return_vis (bool): Whether to return the visualization result.
+                Defaults to False.
+            show (bool): Whether to display the image in a popup window.
+                Defaults to False.
+            wait_time (float): The interval of show (s). Defaults to 0.
+            draw_pred (bool): Whether to draw predicted bounding boxes.
+                Defaults to True.
+            pred_score_thr (float): Minimum score of bboxes to draw.
+                Defaults to 0.3.
+            img_out_dir (str): Output directory of visualization results.
+                If left as empty, no file will be saved. Defaults to ''.
+        Returns:
+            List[np.ndarray] or None: Returns visualization results only if
+            applicable.
+        """
+        if self.visualizer is None or (not show and img_out_dir == ''
+                                       and not return_vis):
+            return None
+
+        if getattr(self, 'visualizer') is None:
+            raise ValueError('Visualization needs the "visualizer" term'
+                             'defined in the config, but got None.')
+
+        results = []
+
+        for single_input, pred in zip(inputs, preds):
+            if isinstance(single_input, str):
+                img_bytes = mmengine.fileio.get(single_input)
+                img = mmcv.imfrombytes(img_bytes)
+                img = img[:, :, ::-1]
+                img_name = osp.basename(single_input)
+            elif isinstance(single_input, np.ndarray):
+                img = single_input.copy()
+                img_num = str(self.num_visualized_imgs).zfill(8)
+                img_name = f'{img_num}.jpg'
+            else:
+                raise ValueError('Unsupported input type: '
+                                 f'{type(single_input)}')
+
+            out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \
+                else None
+
+            data_input = dict(img=img)
+            self.visualizer.add_datasample(
+                img_name,
+                data_input,
+                pred,
+                show=show,
+                wait_time=wait_time,
+                draw_gt=False,
+                draw_pred=draw_pred,
+                pred_score_thr=pred_score_thr,
+                out_file=out_file,
+                vis_task='mono_det',
+            )
+            results.append(img)
+            self.num_visualized_imgs += 1
+
+        return results
diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
index 49650c51e2..f9b25a2b2b 100644
--- a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
@@ -8,17 +8,17 @@
 import numpy as np
 from mmengine.utils import is_list_of
 
-from mmdet3d.apis import Mono3DDetInferencer
+from mmdet3d.apis import MonoDet3DInferencer
 from mmdet3d.structures import Det3DDataSample
 
 
-class TestMono3DDetInferencer(TestCase):
+class TestMonoDet3DInferencer(TestCase):
 
     def test_init(self):
         # init from metafile
-        Mono3DDetInferencer('pgd-kitti')
+        MonoDet3DInferencer('pgd-kitti')
         # init from cfg
-        Mono3DDetInferencer(
+        MonoDet3DInferencer(
             'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py')
 
     def assert_predictions_equal(self, preds1, preds2):
@@ -36,7 +36,7 @@ def assert_predictions_equal(self, preds1, preds2):
     def test_call(self, model):
         # single img
         img_path = 'tests/data/kitti/training/image_2/000007.png'
-        inferencer = Mono3DDetInferencer(model)
+        inferencer = MonoDet3DInferencer(model)
         res_path = inferencer(img_path, return_vis=True)
         # ndarray
         img = mmcv.imread(img_path)
@@ -78,7 +78,7 @@ def test_visualize(self, model):
             'tests/data/kitti/training/image_2/000007.png',
             'tests/data/kitti/training/image_2/000000.png'
         ]
-        inferencer = Mono3DDetInferencer(model)
+        inferencer = MonoDet3DInferencer(model)
         # img_out_dir
         with tempfile.TemporaryDirectory() as tmp_dir:
             inferencer(img_paths, img_out_dir=tmp_dir)
@@ -88,7 +88,7 @@ def test_visualize(self, model):
     def test_postprocess(self, model):
         # return_datasample
         img_path = 'tests/data/kitti/training/image_2/000007.png'
-        inferencer = Mono3DDetInferencer(model)
+        inferencer = MonoDet3DInferencer(model)
         res = inferencer(img_path, return_datasamples=True)
         self.assertTrue(is_list_of(res['predictions'], Det3DDataSample))
 

From 31418db956042fa26ef3d6a9c9882d782bf8eb63 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Tue, 10 Jan 2023 11:42:37 +0800
Subject: [PATCH 07/13] fix comments

---
 .../apis/inferencers/base_det3d_inferencer.py |  6 +-
 mmdet3d/datasets/transforms/loading.py        | 71 +++++++++++++------
 2 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/mmdet3d/apis/inferencers/base_det3d_inferencer.py b/mmdet3d/apis/inferencers/base_det3d_inferencer.py
index 03e70662ac..2a13122046 100644
--- a/mmdet3d/apis/inferencers/base_det3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/base_det3d_inferencer.py
@@ -83,11 +83,9 @@ def _init_model(
         model = MODELS.build(cfg.model)
 
         checkpoint = load_checkpoint(model, weights, map_location='cpu')
-        dataset_meta = checkpoint['meta'].get('dataset_meta', None)
-        # save the dataset_meta in the model for convenience
         if 'dataset_meta' in checkpoint.get('meta', {}):
             # mmdet3d 1.x
-            model.dataset_meta = dataset_meta
+            model.dataset_meta = checkpoint['meta']['dataset_meta']
         elif 'CLASSES' in checkpoint.get('meta', {}):
             # < mmdet3d 1.x
             classes = checkpoint['meta']['CLASSES']
@@ -236,7 +234,7 @@ def pred2dict(self, data_sample: InstanceData) -> Dict:
         """
         pred_instances = data_sample.pred_instances_3d.numpy()
         result = {
-            'bboxes_3d': pred_instances.bboxes_3d.tolist(),
+            'bboxes_3d': pred_instances.bboxes_3d.tensor.numpy().tolist(),
             'labels_3d': pred_instances.labels_3d.tolist(),
             'scores_3d': pred_instances.scores_3d.tolist()
         }
diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py
index eb80592ff3..ac635bb91c 100644
--- a/mmdet3d/datasets/transforms/loading.py
+++ b/mmdet3d/datasets/transforms/loading.py
@@ -267,6 +267,46 @@ def transform(self, results: dict) -> dict:
         return results
 
 
+@TRANSFORMS.register_module()
+class LoadImageFromNDArray(LoadImageFromFile):
+    """Load an image from ``results['img']``.
+    Similar with :obj:`LoadImageFromFile`, but the image has been loaded as
+    :obj:`np.ndarray` in ``results['img']``. Can be used when loading image
+    from webcam.
+    Required Keys:
+    - img
+    Modified Keys:
+    - img
+    - img_path
+    - img_shape
+    - ori_shape
+    Args:
+        to_float32 (bool): Whether to convert the loaded image to a float32
+            numpy array. If set to False, the loaded image is an uint8 array.
+            Defaults to False.
+    """
+
+    def transform(self, results: dict) -> dict:
+        """Transform function to add image meta information.
+
+        Args:
+            results (dict): Result dict with Webcam read image in
+                ``results['img']``.
+        Returns:
+            dict: The dict contains loaded image and meta information.
+        """
+
+        img = results['img']
+        if self.to_float32:
+            img = img.astype(np.float32)
+
+        results['img_path'] = None
+        results['img'] = img
+        results['img_shape'] = img.shape[:2]
+        results['ori_shape'] = img.shape[:2]
+        return results
+
+
 @TRANSFORMS.register_module()
 class LoadPointsFromMultiSweeps(BaseTransform):
     """Load points from multiple sweeps.
@@ -935,17 +975,10 @@ def __repr__(self) -> str:
 
 @TRANSFORMS.register_module()
 class Mono3DInferencerLoader(BaseTransform):
-    """Load an image from ``results['img']``. Similar with
+    """Load an image from ``results['images']['CAMX']['img']``. Similar with
     :obj:`LoadImageFromFileMono3D`, but the image has been loaded as
-    :obj:`np.ndarray` in ``results['img']``. Can be used when loading image
-    from webcam. Required Keys:
+    :obj:`np.ndarray` in ``results['images']['CAMX']['img']``.
 
-    - img
-    Modified Keys:
-    - img
-    - img_path
-    - img_shape
-    - ori_shape
     Args:
         to_float32 (bool): Whether to convert the loaded image to a float32
             numpy array. If set to False, the loaded image is an uint8 array.
@@ -959,24 +992,18 @@ def __init__(self, **kwargs) -> None:
         self.from_ndarray = TRANSFORMS.build(
             dict(type='LoadImageFromNDArray', **kwargs))
 
-    def transform(self, single_input: Union[str, np.ndarray, dict]) -> dict:
+    def transform(self, single_input: dict) -> dict:
         """Transform function to add image meta information.
 
         Args:
             single_input (dict): Result dict with Webcam read image in
-                ``results['img']``.
+                ``results['images']['CAMX']['img']``.
         Returns:
             dict: The dict contains loaded image and meta information.
         """
-        if isinstance(single_input, str):
-            inputs = dict(img_path=single_input)
-        elif isinstance(single_input, np.ndarray):
-            inputs = dict(img=single_input)
-        elif isinstance(single_input, dict):
-            inputs = single_input
-        else:
-            raise NotImplementedError
-
-        if 'img' in inputs:
-            return self.from_ndarray(inputs)
+        inputs = single_input
+        camera_type = list(inputs['images'].keys())[0]
+        assert 'cam2img' in inputs['images'][camera_type]
+        if 'img' in inputs['images'][camera_type]:
+            return self.from_ndarray(inputs['images'][camera_type])
         return self.from_file(inputs)

From 40ccc9df3983c5f15b16620970231bedf30f69d4 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Wed, 11 Jan 2023 00:45:49 +0800
Subject: [PATCH 08/13] fix comments

---
 .../apis/inferencers/mono_det3d_inferencer.py | 51 +++++++++++--
 mmdet3d/datasets/transforms/loading.py        | 39 ++++++++--
 mmdet3d/visualization/local_visualizer.py     |  2 +
 .../test_mono3d_det_inferencer.py             | 72 +++++++++++--------
 4 files changed, 121 insertions(+), 43 deletions(-)

diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
index ec7401bbb3..691cdb1437 100644
--- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
@@ -6,6 +6,8 @@
 import mmengine
 import numpy as np
 from mmengine.dataset import Compose
+from mmengine.fileio import (get_file_backend, isdir, join_path,
+                             list_dir_or_file)
 from mmengine.infer.infer import ModelType
 from mmengine.structures import InstanceData
 
@@ -54,13 +56,48 @@ def __init__(self,
         # A global counter tracking the number of images processed, for
         # naming of the output images
         self.num_visualized_imgs = 0
-        super().__init__(
+        super(MonoDet3DInferencer, self).__init__(
             model=model,
             weights=weights,
             device=device,
             scope=scope,
             palette=palette)
 
+    def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
+        """Preprocess the inputs to a list.
+
+        Preprocess inputs to a list according to its type:
+
+        - list or tuple: return inputs
+        - dict:
+            - Directory path: return all files in the directory
+            - other cases: return a list containing the string. The string
+              could be a path to file, a url or other types of string according
+              to the task.
+
+        Args:
+            inputs (Union[dict, list]): Inputs for the inferencer.
+
+        Returns:
+            list: List of input for the :meth:`preprocess`.
+        """
+
+        if isinstance(inputs, dict) and isinstance(inputs['img'], str):
+            img = inputs['img']
+            backend = get_file_backend(img)
+            if hasattr(backend, 'isdir') and isdir(img):
+                # Backends like HttpsBackend do not implement `isdir`, so only
+                # those backends that implement `isdir` could accept the inputs
+                # as a directory
+                filename_list = list_dir_or_file(img, list_dir=False)
+                img = [join_path(img, filename) for filename in filename_list]
+                inputs['img'] = img
+
+        if not isinstance(inputs, (list, tuple)):
+            inputs = [inputs]
+
+        return list(inputs)
+
     def _init_pipeline(self, cfg: ConfigType) -> Compose:
         """Initialize the test pipeline."""
         pipeline_cfg = cfg.test_dataloader.dataset.pipeline
@@ -85,7 +122,7 @@ def visualize(self,
         """Visualize predictions.
 
         Args:
-            inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer.
+            inputs (List[Dict]): Inputs for the inferencer.
             preds (List[Dict]): Predictions of the model.
             return_vis (bool): Whether to return the visualization result.
                 Defaults to False.
@@ -113,13 +150,13 @@ def visualize(self,
         results = []
 
         for single_input, pred in zip(inputs, preds):
-            if isinstance(single_input, str):
-                img_bytes = mmengine.fileio.get(single_input)
+            if isinstance(single_input['img'], str):
+                img_bytes = mmengine.fileio.get(single_input['img'])
                 img = mmcv.imfrombytes(img_bytes)
                 img = img[:, :, ::-1]
-                img_name = osp.basename(single_input)
-            elif isinstance(single_input, np.ndarray):
-                img = single_input.copy()
+                img_name = osp.basename(single_input['img'])
+            elif isinstance(single_input['img'], np.ndarray):
+                img = single_input['img'].copy()
                 img_num = str(self.num_visualized_imgs).zfill(8)
                 img_name = f'{img_num}.jpg'
             else:
diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py
index ac635bb91c..44e28c5382 100644
--- a/mmdet3d/datasets/transforms/loading.py
+++ b/mmdet3d/datasets/transforms/loading.py
@@ -10,6 +10,7 @@
 from mmdet.datasets.transforms import LoadAnnotations
 
 from mmdet3d.registry import TRANSFORMS
+from mmdet3d.structures.bbox_3d import get_box_type
 from mmdet3d.structures.points import BasePoints, get_points_type
 
 
@@ -1001,9 +1002,37 @@ def transform(self, single_input: dict) -> dict:
         Returns:
             dict: The dict contains loaded image and meta information.
         """
-        inputs = single_input
-        camera_type = list(inputs['images'].keys())[0]
-        assert 'cam2img' in inputs['images'][camera_type]
-        if 'img' in inputs['images'][camera_type]:
-            return self.from_ndarray(inputs['images'][camera_type])
+        box_type_3d, box_mode_3d = get_box_type('camera')
+        if isinstance(single_input['calib'], str):
+            calib_path = single_input['calib']
+            with open(calib_path, 'r') as f:
+                lines = f.readlines()
+            cam2img = np.array([
+                float(info) for info in lines[0].split(' ')[0:16]
+            ]).reshape([4, 4])
+        elif isinstance(single_input['calib'], np.ndarray):
+            cam2img = single_input['calib']
+        else:
+            raise ValueError('Unsupported input type: '
+                             f'{type(single_input)}')
+
+        if isinstance(single_input['img'], str):
+            inputs = dict(
+                images=dict(
+                    CAM_FRONT=dict(
+                        img_path=single_input['img'], cam2img=cam2img)),
+                box_mode_3d=box_mode_3d,
+                box_type_3d=box_type_3d)
+        elif isinstance(single_input['img'], np.ndarray):
+            inputs = dict(
+                img=single_input['img'],
+                cam2img=cam2img,
+                box_type_3d=box_type_3d,
+                box_mode_3d=box_mode_3d)
+        else:
+            raise ValueError('Unsupported input type: '
+                             f'{type(single_input)}')
+
+        if 'img' in inputs:
+            return self.from_ndarray(inputs)
         return self.from_file(inputs)
diff --git a/mmdet3d/visualization/local_visualizer.py b/mmdet3d/visualization/local_visualizer.py
index 5f9f4188ee..523521857b 100644
--- a/mmdet3d/visualization/local_visualizer.py
+++ b/mmdet3d/visualization/local_visualizer.py
@@ -729,6 +729,7 @@ def add_datasample(self,
             if 'gt_instances' in data_sample:
                 if len(data_sample.gt_instances) > 0:
                     assert 'img' in data_input
+                    img = data_input['img']
                     if isinstance(data_input['img'], Tensor):
                         img = data_input['img'].permute(1, 2, 0).numpy()
                         img = img[..., [2, 1, 0]]  # bgr to rgb
@@ -760,6 +761,7 @@ def add_datasample(self,
                     pred_instances = data_sample.pred_instances
                     pred_instances = pred_instances_3d[
                         pred_instances.scores > pred_score_thr].cpu()
+                    img = data_input['img']
                     if isinstance(data_input['img'], Tensor):
                         img = data_input['img'].permute(1, 2, 0).numpy()
                         img = img[..., [2, 1, 0]]  # bgr to rgb
diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
index f9b25a2b2b..de3e657222 100644
--- a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
@@ -7,6 +7,7 @@
 import mmengine
 import numpy as np
 from mmengine.utils import is_list_of
+from parameterized import parameterized
 
 from mmdet3d.apis import MonoDet3DInferencer
 from mmdet3d.structures import Det3DDataSample
@@ -19,7 +20,11 @@ def test_init(self):
         MonoDet3DInferencer('pgd-kitti')
         # init from cfg
         MonoDet3DInferencer(
-            'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py')
+            'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py',
+            'https://download.openmmlab.com/mmdetection3d/v1.0.0_models/pgd/'
+            'pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d/'
+            'pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d_'
+            '20211022_102608-8a97533b.pth')
 
     def assert_predictions_equal(self, preds1, preds2):
         for pred1, pred2 in zip(preds1, preds2):
@@ -33,70 +38,75 @@ def assert_predictions_equal(self, preds1, preds2):
                 self.assertTrue(
                     np.allclose(pred1['labels_3d'], pred2['labels_3d']))
 
+    @parameterized.expand(['pgd-kitti'])
     def test_call(self, model):
         # single img
-        img_path = 'tests/data/kitti/training/image_2/000007.png'
+        img_path = 'demo/data/kitti/000008.png'
+        calib_path = 'demo/data/kitti/000008.txt'
         inferencer = MonoDet3DInferencer(model)
-        res_path = inferencer(img_path, return_vis=True)
+        inputs = dict(img=img_path, calib=calib_path)
+        res_path = inferencer(inputs, return_vis=True)
         # ndarray
         img = mmcv.imread(img_path)
-        res_ndarray = inferencer(img, return_vis=True)
+        inputs = dict(img=img, calib=calib_path)
+        res_ndarray = inferencer(inputs, return_vis=True)
         self.assert_predictions_equal(res_path['predictions'],
                                       res_ndarray['predictions'])
         self.assertIn('visualization', res_path)
         self.assertIn('visualization', res_ndarray)
 
         # multiple images
-        img_paths = [
-            'tests/data/kitti/training/image_2/000007.png',
-            'tests/data/kitti/training/image_2/000000.png'
+        inputs = [
+            dict(
+                img='demo/data/kitti/000008.png',
+                calib='demo/data/kitti/000008.txt'),
+            dict(
+                img='demo/data/kitti/000008.png',
+                calib='demo/data/kitti/000008.txt')
         ]
-        res_path = inferencer(img_paths, return_vis=True)
+        res_path = inferencer(inputs, return_vis=True)
         # list of ndarray
-        imgs = [mmcv.imread(p) for p in img_paths]
-        res_ndarray = inferencer(imgs, return_vis=True)
+        imgs = [mmcv.imread(p['img']) for p in inputs]
+        inputs[0]['img'] = imgs[0]
+        inputs[1]['img'] = imgs[1]
+        res_ndarray = inferencer(inputs, return_vis=True)
         self.assert_predictions_equal(res_path['predictions'],
                                       res_ndarray['predictions'])
         self.assertIn('visualization', res_path)
         self.assertIn('visualization', res_ndarray)
 
-        # img dir, test different batch sizes
-        img_dir = 'tests/data/kitti/training/image_2/'
-        res_bs1 = inferencer(img_dir, batch_size=1, return_vis=True)
-        res_bs3 = inferencer(img_dir, batch_size=2, return_vis=True)
-        self.assert_predictions_equal(res_bs1['predictions'],
-                                      res_bs3['predictions'])
-        if model == 'pgd-kitti':
-            # There is a jitter operation when the mask is drawn,
-            # so it cannot be asserted.
-            for res_bs1_vis, res_bs3_vis in zip(res_bs1['visualization'],
-                                                res_bs3['visualization']):
-                self.assertTrue(np.allclose(res_bs1_vis, res_bs3_vis))
-
+    @parameterized.expand(['pgd-kitti'])
     def test_visualize(self, model):
-        img_paths = [
-            'tests/data/kitti/training/image_2/000007.png',
-            'tests/data/kitti/training/image_2/000000.png'
+        inputs = [
+            dict(
+                img='demo/data/kitti/000008.png',
+                calib='demo/data/kitti/000008.txt'),
+            dict(
+                img='demo/data/kitti/000008.png',
+                calib='demo/data/kitti/000008.txt')
         ]
         inferencer = MonoDet3DInferencer(model)
         # img_out_dir
         with tempfile.TemporaryDirectory() as tmp_dir:
-            inferencer(img_paths, img_out_dir=tmp_dir)
-            for img_dir in ['000007.png', '000000.png']:
+            inferencer(inputs, img_out_dir=tmp_dir)
+            for img_dir in ['000008.png', '000008.png']:
                 self.assertTrue(osp.exists(osp.join(tmp_dir, img_dir)))
 
+    @parameterized.expand(['pgd-kitti'])
     def test_postprocess(self, model):
         # return_datasample
-        img_path = 'tests/data/kitti/training/image_2/000007.png'
+        img_path = 'demo/data/kitti/000008.png'
+        calib_path = 'demo/data/kitti/000008.txt'
+        inputs = dict(img=img_path, calib=calib_path)
         inferencer = MonoDet3DInferencer(model)
-        res = inferencer(img_path, return_datasamples=True)
+        res = inferencer(inputs, return_datasamples=True)
         self.assertTrue(is_list_of(res['predictions'], Det3DDataSample))
 
         # pred_out_file
         with tempfile.TemporaryDirectory() as tmp_dir:
             pred_out_file = osp.join(tmp_dir, 'tmp.json')
             res = inferencer(
-                img_path, print_result=True, pred_out_file=pred_out_file)
+                inputs, print_result=True, pred_out_file=pred_out_file)
             dumped_res = mmengine.load(pred_out_file)
             self.assert_predictions_equal(res['predictions'],
                                           dumped_res['predictions'])

From 10ec030b0f05f8c4481a3229fc27c59b56785080 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 16 Jan 2023 15:35:57 +0800
Subject: [PATCH 09/13] fix comments

---
 .../apis/inferencers/base_det3d_inferencer.py | 44 +++++++++++--------
 .../apis/inferencers/mono_det3d_inferencer.py | 10 ++++-
 2 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/mmdet3d/apis/inferencers/base_det3d_inferencer.py b/mmdet3d/apis/inferencers/base_det3d_inferencer.py
index 2a13122046..db7b4ae041 100644
--- a/mmdet3d/apis/inferencers/base_det3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/base_det3d_inferencer.py
@@ -20,22 +20,6 @@
 ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
 
 
-def convert_SyncBN(config):
-    """Convert config's naiveSyncBN to BN.
-
-    Args:
-         config (str or :obj:`mmengine.Config`): Config file path or the config
-            object.
-    """
-    if isinstance(config, dict):
-        for item in config:
-            if item == 'norm_cfg':
-                config[item]['type'] = config[item]['type']. \
-                                    replace('naiveSyncBN', 'BN')
-            else:
-                convert_SyncBN(config[item])
-
-
 class BaseDet3DInferencer(BaseInferencer):
     """Base 3D object detection inferencer.
 
@@ -44,11 +28,17 @@ class BaseDet3DInferencer(BaseInferencer):
             defined in metafile. For example, it could be
             "pgd-kitti" or
             "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py".
+            If model is not specified, user must provide the
+            `weights` saved by MMEngine which contains the config string.
+            Defaults to None.
         weights (str, optional): Path to the checkpoint. If it is not specified
             and model is a model name of metafile, the weights will be loaded
             from metafile. Defaults to None.
         device (str, optional): Device to run inference. If None, the available
             device will be automatically used. Defaults to None.
+        scope (str, optional): The scope of the model. Defaults to mmdet3d.
+        palette (str): Color palette used for visualization. The order of
+            priority is palette -> config -> checkpoint. Defaults to 'none'.
     """
 
     preprocess_kwargs: set = set()
@@ -62,7 +52,7 @@ class BaseDet3DInferencer(BaseInferencer):
     }
 
     def __init__(self,
-                 model: Union[ModelType, str],
+                 model: Union[ModelType, str, None] = None,
                  weights: Optional[str] = None,
                  device: Optional[str] = None,
                  scope: Optional[str] = 'mmdet3d',
@@ -72,13 +62,28 @@ def __init__(self,
         super().__init__(
             model=model, weights=weights, device=device, scope=scope)
 
+    def _convert_syncbn(self, cfg: ConfigType):
+        """Convert config's naiveSyncBN to BN.
+
+        Args:
+            config (str or :obj:`mmengine.Config`): Config file path
+                or the config object.
+        """
+        if isinstance(cfg, dict):
+            for item in cfg:
+                if item == 'norm_cfg':
+                    cfg[item]['type'] = cfg[item]['type']. \
+                                        replace('naiveSyncBN', 'BN')
+                else:
+                    self._convert_syncbn(cfg[item])
+
     def _init_model(
         self,
         cfg: ConfigType,
         weights: str,
         device: str = 'cpu',
     ) -> nn.Module:
-        convert_SyncBN(cfg.model)
+        self._convert_syncbn(cfg.model)
         cfg.model.train_cfg = None
         model = MODELS.build(cfg.model)
 
@@ -139,6 +144,8 @@ def __call__(self,
             return_datasamples (bool): Whether to return results as
                 :obj:`BaseDataElement`. Defaults to False.
             batch_size (int): Inference batch size. Defaults to 1.
+            return_vis (bool): Whether to return the visualization result.
+                Defaults to False.
             show (bool): Whether to display the visualization results in a
                 popup window. Defaults to False.
             wait_time (float): The interval of show (s). Defaults to 0.
@@ -216,7 +223,6 @@ def postprocess(
             for pred in preds:
                 result = self.pred2dict(pred)
                 results.append(result)
-        # Add img to the results after printing and dumping
         result_dict['predictions'] = results
         if print_result:
             print(result_dict)
diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
index 691cdb1437..c7165886d5 100644
--- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
@@ -23,18 +23,24 @@
 
 
 class MonoDet3DInferencer(BaseDet3DInferencer):
-    """MMDet3D Mono3D inferencer.
+    """MMDet3D Monocular 3D object detection inferencer.
 
     Args:
         model (str, optional): Path to the config file or the model name
             defined in metafile. For example, it could be
             "pgd-kitti" or
             "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py".
+            If model is not specified, user must provide the
+            `weights` saved by MMEngine which contains the config string.
+            Defaults to None.
         weights (str, optional): Path to the checkpoint. If it is not specified
             and model is a model name of metafile, the weights will be loaded
             from metafile. Defaults to None.
         device (str, optional): Device to run inference. If None, the available
             device will be automatically used. Defaults to None.
+        scope (str, optional): The scope of the model. Defaults to mmdet3d.
+        palette (str): Color palette used for visualization. The order of
+            priority is palette -> config -> checkpoint. Defaults to 'none'.
     """
 
     preprocess_kwargs: set = set()
@@ -48,7 +54,7 @@ class MonoDet3DInferencer(BaseDet3DInferencer):
     }
 
     def __init__(self,
-                 model: Union[ModelType, str],
+                 model: Union[ModelType, str, None] = None,
                  weights: Optional[str] = None,
                  device: Optional[str] = None,
                  scope: Optional[str] = 'mmdet3d',

From 0a8898d898107ee545386629be64bf82286c0f64 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 16 Jan 2023 15:49:50 +0800
Subject: [PATCH 10/13] renmae pgd-kitti to pgd_kitti

---
 configs/pgd/metafile.yml                                  | 2 +-
 mmdet3d/apis/inferencers/mono_det3d_inferencer.py         | 2 +-
 .../test_inferencers/test_mono3d_det_inferencer.py        | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/configs/pgd/metafile.yml b/configs/pgd/metafile.yml
index 8870ed426e..676f58edb0 100644
--- a/configs/pgd/metafile.yml
+++ b/configs/pgd/metafile.yml
@@ -18,7 +18,7 @@ Collections:
 Models:
   - Name: pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d
     Alias:
-       - pgd-kitti
+       - pgd_kitti
     In Collection: PGD
     Config: configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py
     Metadata:
diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
index c7165886d5..95259da399 100644
--- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
+++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py
@@ -28,7 +28,7 @@ class MonoDet3DInferencer(BaseDet3DInferencer):
     Args:
         model (str, optional): Path to the config file or the model name
             defined in metafile. For example, it could be
-            "pgd-kitti" or
+            "pgd_kitti" or
             "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py".
             If model is not specified, user must provide the
             `weights` saved by MMEngine which contains the config string.
diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
index de3e657222..71df09f78f 100644
--- a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
+++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py
@@ -17,7 +17,7 @@ class TestMonoDet3DInferencer(TestCase):
 
     def test_init(self):
         # init from metafile
-        MonoDet3DInferencer('pgd-kitti')
+        MonoDet3DInferencer('pgd_kitti')
         # init from cfg
         MonoDet3DInferencer(
             'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py',
@@ -38,7 +38,7 @@ def assert_predictions_equal(self, preds1, preds2):
                 self.assertTrue(
                     np.allclose(pred1['labels_3d'], pred2['labels_3d']))
 
-    @parameterized.expand(['pgd-kitti'])
+    @parameterized.expand(['pgd_kitti'])
     def test_call(self, model):
         # single img
         img_path = 'demo/data/kitti/000008.png'
@@ -75,7 +75,7 @@ def test_call(self, model):
         self.assertIn('visualization', res_path)
         self.assertIn('visualization', res_ndarray)
 
-    @parameterized.expand(['pgd-kitti'])
+    @parameterized.expand(['pgd_kitti'])
     def test_visualize(self, model):
         inputs = [
             dict(
@@ -92,7 +92,7 @@ def test_visualize(self, model):
             for img_dir in ['000008.png', '000008.png']:
                 self.assertTrue(osp.exists(osp.join(tmp_dir, img_dir)))
 
-    @parameterized.expand(['pgd-kitti'])
+    @parameterized.expand(['pgd_kitti'])
     def test_postprocess(self, model):
         # return_datasample
         img_path = 'demo/data/kitti/000008.png'

From dc43aaa83a40c1b745d7f0b268df71cd3aea656a Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 30 Jan 2023 19:44:48 +0800
Subject: [PATCH 11/13] add parameterized in tests.txt

---
 requirements/tests.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements/tests.txt b/requirements/tests.txt
index 303cc37d6f..563fc4682d 100644
--- a/requirements/tests.txt
+++ b/requirements/tests.txt
@@ -5,6 +5,7 @@ interrogate
 isort
 # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
 kwarray
+parameterized
 pytest
 pytest-cov
 pytest-runner

From d6a3cea877882a07770d32f69b3580ee776ae572 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 30 Jan 2023 20:00:24 +0800
Subject: [PATCH 12/13] add txt file

---
 demo/data/kitti/000008.txt | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 demo/data/kitti/000008.txt

diff --git a/demo/data/kitti/000008.txt b/demo/data/kitti/000008.txt
new file mode 100644
index 0000000000..2bc863a8fd
--- /dev/null
+++ b/demo/data/kitti/000008.txt
@@ -0,0 +1 @@
+721.5377 0.0 609.5593 44.85728 0.0 721.5377 172.854 0.2163791 0.0 0.0 1.0 0.002745884 0.0 0.0 0.0 1.0

From 966795144ec614938de79f07cc89476114561219 Mon Sep 17 00:00:00 2001
From: ChaimZhu <zhuchenming@pjlab.org.cn>
Date: Mon, 30 Jan 2023 20:19:48 +0800
Subject: [PATCH 13/13] update loadimgfromfilemono3d to fit latest mmcv

---
 mmdet3d/datasets/transforms/loading.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py
index f7d61fe9fd..a3bcefc5af 100644
--- a/mmdet3d/datasets/transforms/loading.py
+++ b/mmdet3d/datasets/transforms/loading.py
@@ -4,6 +4,7 @@
 
 import mmcv
 import mmengine
+import mmengine.fileio as fileio
 import numpy as np
 from mmcv.transforms import LoadImageFromFile
 from mmcv.transforms.base import BaseTransform
@@ -255,9 +256,21 @@ def transform(self, results: dict) -> dict:
                 'Currently we only support load image from kitti and'
                 'nuscenes datasets')
 
-        img_bytes = self.file_client.get(filename)
-        img = mmcv.imfrombytes(
-            img_bytes, flag=self.color_type, backend=self.imdecode_backend)
+        try:
+            if self.file_client_args is not None:
+                file_client = fileio.FileClient.infer_client(
+                    self.file_client_args, filename)
+                img_bytes = file_client.get(filename)
+            else:
+                img_bytes = fileio.get(
+                    filename, backend_args=self.backend_args)
+            img = mmcv.imfrombytes(
+                img_bytes, flag=self.color_type, backend=self.imdecode_backend)
+        except Exception as e:
+            if self.ignore_empty:
+                return None
+            else:
+                raise e
         if self.to_float32:
             img = img.astype(np.float32)