From 83a16d2447422ea82c909f78f9f46811ae27dede Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Wed, 4 Jan 2023 18:50:31 +0800 Subject: [PATCH 01/13] add mono3d inferencer --- mmdet3d/apis/inferencers/__init__.py | 1 + .../apis/inferencers/mono3d_det_inferencer.py | 344 ++++++++++++++++++ mmdet3d/datasets/transforms/loading.py | 49 +++ 3 files changed, 394 insertions(+) create mode 100644 mmdet3d/apis/inferencers/__init__.py create mode 100644 mmdet3d/apis/inferencers/mono3d_det_inferencer.py diff --git a/mmdet3d/apis/inferencers/__init__.py b/mmdet3d/apis/inferencers/__init__.py new file mode 100644 index 0000000000..ef101fec61 --- /dev/null +++ b/mmdet3d/apis/inferencers/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py b/mmdet3d/apis/inferencers/mono3d_det_inferencer.py new file mode 100644 index 0000000000..de5bd81983 --- /dev/null +++ b/mmdet3d/apis/inferencers/mono3d_det_inferencer.py @@ -0,0 +1,344 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import os.path as osp +import warnings +from typing import Dict, List, Optional, Sequence, Tuple, Union + +import mmcv +import mmengine +import numpy as np +import torch.nn as nn +from mmdet.registry import DATASETS, MODELS +from mmdet.utils import ConfigType, register_all_modules +from mmengine.dataset import Compose +from mmengine.infer.infer import BaseInferencer, ModelType +from mmengine.runner import load_checkpoint +from mmengine.structures import InstanceData +from mmengine.visualization import Visualizer + +from ..evaluation import get_classes + +InstanceList = List[InstanceData] +InputType = Union[str, np.ndarray] +InputsType = Union[InputType, Sequence[InputType]] +PredType = Union[InstanceData, InstanceList] +ImgType = Union[np.ndarray, Sequence[np.ndarray]] +ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]] + + +class Mono3DDetInferencer(BaseInferencer): + """MMDet inferencer. + + Args: + model (str, optional): Path to the config file or the model name + defined in metafile. For example, it could be + "yolox-s" or "configs/yolox/yolox_s_8xb8-300e_coco.py". + weights (str, optional): Path to the checkpoint. If it is not specified + and model is a model name of metafile, the weights will be loaded + from metafile. Defaults to None. + device (str, optional): Device to run inference. If None, the available + device will be automatically used. Defaults to None. + """ + + preprocess_kwargs: set = set() + forward_kwargs: set = set() + visualize_kwargs: set = { + 'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr', + 'img_out_dir' + } + postprocess_kwargs: set = { + 'print_result', 'pred_out_file', 'return_datasample' + } + + def __init__(self, + model: Union[ModelType, str], + weights: Optional[str] = None, + device: Optional[str] = None, + scope: Optional[str] = 'mmdet', + palette: str = 'none') -> None: + # A global counter tracking the number of images processed, for + # naming of the output images + self.num_visualized_imgs = 0 + self.palette = palette + register_all_modules() + super().__init__( + model=model, weights=weights, device=device, scope=scope) + + def _init_model( + self, + cfg: ConfigType, + weights: str, + device: str = 'cpu', + ) -> nn.Module: + if 'init_cfg' in cfg.model.backbone: + cfg.model.backbone.init_cfg = None + model = MODELS.build(cfg.model) + + checkpoint = load_checkpoint(model, weights, map_location='cpu') + checkpoint_meta = checkpoint.get('meta', {}) + # save the dataset_meta in the model for convenience + if 'dataset_meta' in checkpoint_meta: + # mmdet 3.x, all keys should be lowercase + model.dataset_meta = { + k.lower(): v + for k, v in checkpoint_meta['dataset_meta'].items() + } + elif 'CLASSES' in checkpoint_meta: + # < mmdet 3.x + classes = checkpoint_meta['CLASSES'] + model.dataset_meta = {'classes': classes} + else: + warnings.simplefilter('once') + warnings.warn( + 'dataset_meta or class names are not saved in the ' + 'checkpoint\'s meta data, use COCO classes by default.') + model.dataset_meta = {'classes': get_classes('coco')} + + # Priority: args.palette -> config -> checkpoint + if self.palette != 'none': + model.dataset_meta['palette'] = self.palette + else: + test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset) + # lazy init. We only need the metainfo. + test_dataset_cfg['lazy_init'] = True + metainfo = DATASETS.build(test_dataset_cfg).metainfo + cfg_palette = metainfo.get('palette', None) + if cfg_palette is not None: + model.dataset_meta['palette'] = cfg_palette + else: + if 'palette' not in model.dataset_meta: + warnings.warn( + 'palette does not exist, random is used by default. ' + 'You can also set the palette to customize.') + model.dataset_meta['palette'] = 'random' + + model.cfg = cfg # save the config in the model for convenience + model.to(device) + model.eval() + return model + + def _init_pipeline(self, cfg: ConfigType) -> Compose: + """Initialize the test pipeline.""" + pipeline_cfg = cfg.test_dataloader.dataset.pipeline + + # For inference, the key of ``img_id`` is not used. + if 'meta_keys' in pipeline_cfg[-1]: + pipeline_cfg[-1]['meta_keys'] = tuple( + meta_key for meta_key in pipeline_cfg[-1]['meta_keys'] + if meta_key != 'img_id') + + load_img_idx = self._get_transform_idx(pipeline_cfg, + 'LoadImageFromFileMono3D') + if load_img_idx == -1: + raise ValueError( + 'LoadImageFromFileMono3D is not found in the test pipeline') + pipeline_cfg[load_img_idx]['type'] = 'Mono3DInferencerLoader' + return Compose(pipeline_cfg) + + def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int: + """Returns the index of the transform in a pipeline. + + If the transform is not found, returns -1. + """ + for i, transform in enumerate(pipeline_cfg): + if transform['type'] == name: + return i + return -1 + + def _init_visualizer(self, cfg: ConfigType) -> Optional[Visualizer]: + visualizer = super()._init_visualizer(cfg) + visualizer.dataset_meta = self.model.dataset_meta + return visualizer + + def __call__(self, + inputs: InputsType, + return_datasamples: bool = False, + batch_size: int = 1, + return_vis: bool = False, + show: bool = False, + wait_time: int = 0, + draw_pred: bool = True, + pred_score_thr: float = 0.3, + img_out_dir: str = '', + print_result: bool = False, + pred_out_file: str = '', + **kwargs) -> dict: + """Call the inferencer. + Args: + inputs (InputsType): Inputs for the inferencer. + return_datasamples (bool): Whether to return results as + :obj:`BaseDataElement`. Defaults to False. + batch_size (int): Inference batch size. Defaults to 1. + show (bool): Whether to display the visualization results in a + popup window. Defaults to False. + wait_time (float): The interval of show (s). Defaults to 0. + draw_pred (bool): Whether to draw predicted bounding boxes. + Defaults to True. + pred_score_thr (float): Minimum score of bboxes to draw. + Defaults to 0.3. + img_out_dir (str): Output directory of visualization results. + If left as empty, no file will be saved. Defaults to ''. + print_result (bool): Whether to print the inference result w/o + visualization to the console. Defaults to False. + pred_out_file: File to save the inference results w/o + visualization. If left as empty, no file will be saved. + Defaults to ''. + **kwargs: Other keyword arguments passed to :meth:`preprocess`, + :meth:`forward`, :meth:`visualize` and :meth:`postprocess`. + Each key in kwargs should be in the corresponding set of + ``preprocess_kwargs``, ``forward_kwargs``, ``visualize_kwargs`` + and ``postprocess_kwargs``. + Returns: + dict: Inference and visualization results. + """ + return super().__call__( + inputs, + return_datasamples, + batch_size, + return_vis=return_vis, + show=show, + wait_time=wait_time, + draw_pred=draw_pred, + pred_score_thr=pred_score_thr, + img_out_dir=img_out_dir, + print_result=print_result, + pred_out_file=pred_out_file, + **kwargs) + + def visualize(self, + inputs: InputsType, + preds: PredType, + return_vis: bool = False, + show: bool = False, + wait_time: int = 0, + draw_pred: bool = True, + pred_score_thr: float = 0.3, + img_out_dir: str = '') -> Union[List[np.ndarray], None]: + """Visualize predictions. + + Args: + inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer. + preds (List[Dict]): Predictions of the model. + return_vis (bool): Whether to return the visualization result. + Defaults to False. + show (bool): Whether to display the image in a popup window. + Defaults to False. + wait_time (float): The interval of show (s). Defaults to 0. + draw_pred (bool): Whether to draw predicted bounding boxes. + Defaults to True. + pred_score_thr (float): Minimum score of bboxes to draw. + Defaults to 0.3. + img_out_dir (str): Output directory of visualization results. + If left as empty, no file will be saved. Defaults to ''. + Returns: + List[np.ndarray] or None: Returns visualization results only if + applicable. + """ + if self.visualizer is None or (not show and img_out_dir == '' + and not return_vis): + return None + + if getattr(self, 'visualizer') is None: + raise ValueError('Visualization needs the "visualizer" term' + 'defined in the config, but got None.') + + results = [] + + for single_input, pred in zip(inputs, preds): + if isinstance(single_input, str): + img_bytes = mmengine.fileio.get(single_input) + img = mmcv.imfrombytes(img_bytes) + img = img[:, :, ::-1] + img_name = osp.basename(single_input) + elif isinstance(single_input, np.ndarray): + img = single_input.copy() + img_num = str(self.num_visualized_imgs).zfill(8) + img_name = f'{img_num}.jpg' + else: + raise ValueError('Unsupported input type: ' + f'{type(single_input)}') + + out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \ + else None + + self.visualizer.add_datasample( + img_name, + img, + pred, + show=show, + wait_time=wait_time, + draw_gt=False, + draw_pred=draw_pred, + pred_score_thr=pred_score_thr, + out_file=out_file, + ) + results.append(img) + self.num_visualized_imgs += 1 + + return results + + def postprocess( + self, + preds: PredType, + visualization: Optional[List[np.ndarray]] = None, + return_datasample: bool = False, + print_result: bool = False, + pred_out_file: str = '', + ) -> Union[ResType, Tuple[ResType, np.ndarray]]: + """Process the predictions and visualization results from ``forward`` + and ``visualize``. + This method should be responsible for the following tasks: + 1. Convert datasamples into a json-serializable dict if needed. + 2. Pack the predictions and visualization results and return them. + 3. Dump or log the predictions. + Args: + preds (List[Dict]): Predictions of the model. + visualization (Optional[np.ndarray]): Visualized predictions. + return_datasample (bool): Whether to use Datasample to store + inference results. If False, dict will be used. + print_result (bool): Whether to print the inference result w/o + visualization to the console. Defaults to False. + pred_out_file: File to save the inference results w/o + visualization. If left as empty, no file will be saved. + Defaults to ''. + Returns: + dict: Inference and visualization results with key ``predictions`` + and ``visualization``. + - ``visualization`` (Any): Returned by :meth:`visualize`. + - ``predictions`` (dict or DataSample): Returned by + :meth:`forward` and processed in :meth:`postprocess`. + If ``return_datasample=False``, it usually should be a + json-serializable dict containing only basic data elements such + as strings and numbers. + """ + result_dict = {} + results = preds + if not return_datasample: + results = [] + for pred in preds: + result = self.pred2dict(pred) + results.append(result) + # Add img to the results after printing and dumping + result_dict['predictions'] = results + if print_result: + print(result_dict) + if pred_out_file != '': + mmengine.dump(result_dict, pred_out_file) + result_dict['visualization'] = visualization + return result_dict + + def pred2dict(self, data_sample: InstanceData) -> Dict: + """Extract elements necessary to represent a prediction into a + dictionary. + + It's better to contain only basic data elements such as strings and + numbers in order to guarantee it's json-serializable. + """ + pred_instances = data_sample.pred_instances.numpy() + result = { + 'bboxes': pred_instances.bboxes.tolist(), + 'labels': pred_instances.labels.tolist(), + 'scores': pred_instances.scores.tolist() + } + + return result diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py index 718ee9eb78..eb80592ff3 100644 --- a/mmdet3d/datasets/transforms/loading.py +++ b/mmdet3d/datasets/transforms/loading.py @@ -931,3 +931,52 @@ def __repr__(self) -> str: repr_str += f'{indent_str}with_bbox_depth={self.with_bbox_depth}, ' repr_str += f'{indent_str}poly2mask={self.poly2mask})' return repr_str + + +@TRANSFORMS.register_module() +class Mono3DInferencerLoader(BaseTransform): + """Load an image from ``results['img']``. Similar with + :obj:`LoadImageFromFileMono3D`, but the image has been loaded as + :obj:`np.ndarray` in ``results['img']``. Can be used when loading image + from webcam. Required Keys: + + - img + Modified Keys: + - img + - img_path + - img_shape + - ori_shape + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + """ + + def __init__(self, **kwargs) -> None: + super().__init__() + self.from_file = TRANSFORMS.build( + dict(type='LoadImageFromFileMono3D', **kwargs)) + self.from_ndarray = TRANSFORMS.build( + dict(type='LoadImageFromNDArray', **kwargs)) + + def transform(self, single_input: Union[str, np.ndarray, dict]) -> dict: + """Transform function to add image meta information. + + Args: + single_input (dict): Result dict with Webcam read image in + ``results['img']``. + Returns: + dict: The dict contains loaded image and meta information. + """ + if isinstance(single_input, str): + inputs = dict(img_path=single_input) + elif isinstance(single_input, np.ndarray): + inputs = dict(img=single_input) + elif isinstance(single_input, dict): + inputs = single_input + else: + raise NotImplementedError + + if 'img' in inputs: + return self.from_ndarray(inputs) + return self.from_file(inputs) From f031d55db2f7a2c1bb1ab89acdf18e8a1620bd82 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Fri, 6 Jan 2023 17:45:59 +0800 Subject: [PATCH 02/13] update mono3d inferenceer --- .../apis/inferencers/mono3d_det_inferencer.py | 102 ++++++++---------- 1 file changed, 47 insertions(+), 55 deletions(-) diff --git a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py b/mmdet3d/apis/inferencers/mono3d_det_inferencer.py index de5bd81983..31bad32ee5 100644 --- a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py +++ b/mmdet3d/apis/inferencers/mono3d_det_inferencer.py @@ -1,22 +1,19 @@ # Copyright (c) OpenMMLab. All rights reserved. -import copy import os.path as osp -import warnings from typing import Dict, List, Optional, Sequence, Tuple, Union import mmcv import mmengine import numpy as np import torch.nn as nn -from mmdet.registry import DATASETS, MODELS -from mmdet.utils import ConfigType, register_all_modules from mmengine.dataset import Compose from mmengine.infer.infer import BaseInferencer, ModelType from mmengine.runner import load_checkpoint from mmengine.structures import InstanceData from mmengine.visualization import Visualizer -from ..evaluation import get_classes +from mmdet3d.registry import MODELS +from mmdet3d.utils import ConfigType, register_all_modules InstanceList = List[InstanceData] InputType = Union[str, np.ndarray] @@ -26,13 +23,30 @@ ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]] +def convert_SyncBN(config): + """Convert config's naiveSyncBN to BN. + + Args: + config (str or :obj:`mmengine.Config`): Config file path or the config + object. + """ + if isinstance(config, dict): + for item in config: + if item == 'norm_cfg': + config[item]['type'] = config[item]['type']. \ + replace('naiveSyncBN', 'BN') + else: + convert_SyncBN(config[item]) + + class Mono3DDetInferencer(BaseInferencer): - """MMDet inferencer. + """MMDet3D Mono3D inferencer. Args: model (str, optional): Path to the config file or the model name defined in metafile. For example, it could be - "yolox-s" or "configs/yolox/yolox_s_8xb8-300e_coco.py". + "pgd-kitti" or + "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py". weights (str, optional): Path to the checkpoint. If it is not specified and model is a model name of metafile, the weights will be loaded from metafile. Defaults to None. @@ -54,7 +68,7 @@ def __init__(self, model: Union[ModelType, str], weights: Optional[str] = None, device: Optional[str] = None, - scope: Optional[str] = 'mmdet', + scope: Optional[str] = 'mmdet3d', palette: str = 'none') -> None: # A global counter tracking the number of images processed, for # naming of the output images @@ -70,47 +84,29 @@ def _init_model( weights: str, device: str = 'cpu', ) -> nn.Module: - if 'init_cfg' in cfg.model.backbone: - cfg.model.backbone.init_cfg = None + convert_SyncBN(cfg.model) + cfg.model.train_cfg = None model = MODELS.build(cfg.model) checkpoint = load_checkpoint(model, weights, map_location='cpu') - checkpoint_meta = checkpoint.get('meta', {}) + dataset_meta = checkpoint['meta'].get('dataset_meta', None) # save the dataset_meta in the model for convenience - if 'dataset_meta' in checkpoint_meta: - # mmdet 3.x, all keys should be lowercase - model.dataset_meta = { - k.lower(): v - for k, v in checkpoint_meta['dataset_meta'].items() - } - elif 'CLASSES' in checkpoint_meta: - # < mmdet 3.x - classes = checkpoint_meta['CLASSES'] - model.dataset_meta = {'classes': classes} - else: - warnings.simplefilter('once') - warnings.warn( - 'dataset_meta or class names are not saved in the ' - 'checkpoint\'s meta data, use COCO classes by default.') - model.dataset_meta = {'classes': get_classes('coco')} - - # Priority: args.palette -> config -> checkpoint - if self.palette != 'none': - model.dataset_meta['palette'] = self.palette + if 'dataset_meta' in checkpoint.get('meta', {}): + # mmdet3d 1.x + model.dataset_meta = dataset_meta + elif 'CLASSES' in checkpoint.get('meta', {}): + # < mmdet3d 1.x + classes = checkpoint['meta']['CLASSES'] + model.dataset_meta = {'CLASSES': classes} + + if 'PALETTE' in checkpoint.get('meta', {}): # 3D Segmentor + model.dataset_meta['PALETTE'] = checkpoint['meta']['PALETTE'] else: - test_dataset_cfg = copy.deepcopy(cfg.test_dataloader.dataset) - # lazy init. We only need the metainfo. - test_dataset_cfg['lazy_init'] = True - metainfo = DATASETS.build(test_dataset_cfg).metainfo - cfg_palette = metainfo.get('palette', None) - if cfg_palette is not None: - model.dataset_meta['palette'] = cfg_palette - else: - if 'palette' not in model.dataset_meta: - warnings.warn( - 'palette does not exist, random is used by default. ' - 'You can also set the palette to customize.') - model.dataset_meta['palette'] = 'random' + # < mmdet3d 1.x + model.dataset_meta = {'CLASSES': cfg.class_names} + + if 'PALETTE' in checkpoint.get('meta', {}): # 3D Segmentor + model.dataset_meta['PALETTE'] = checkpoint['meta']['PALETTE'] model.cfg = cfg # save the config in the model for convenience model.to(device) @@ -121,12 +117,6 @@ def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" pipeline_cfg = cfg.test_dataloader.dataset.pipeline - # For inference, the key of ``img_id`` is not used. - if 'meta_keys' in pipeline_cfg[-1]: - pipeline_cfg[-1]['meta_keys'] = tuple( - meta_key for meta_key in pipeline_cfg[-1]['meta_keys'] - if meta_key != 'img_id') - load_img_idx = self._get_transform_idx(pipeline_cfg, 'LoadImageFromFileMono3D') if load_img_idx == -1: @@ -261,9 +251,10 @@ def visualize(self, out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \ else None + data_input = dict(img=img) self.visualizer.add_datasample( img_name, - img, + data_input, pred, show=show, wait_time=wait_time, @@ -271,6 +262,7 @@ def visualize(self, draw_pred=draw_pred, pred_score_thr=pred_score_thr, out_file=out_file, + vis_task='mono_det', ) results.append(img) self.num_visualized_imgs += 1 @@ -334,11 +326,11 @@ def pred2dict(self, data_sample: InstanceData) -> Dict: It's better to contain only basic data elements such as strings and numbers in order to guarantee it's json-serializable. """ - pred_instances = data_sample.pred_instances.numpy() + pred_instances = data_sample.pred_instances_3d.numpy() result = { - 'bboxes': pred_instances.bboxes.tolist(), - 'labels': pred_instances.labels.tolist(), - 'scores': pred_instances.scores.tolist() + 'bboxes_3d': pred_instances.bboxes_3d.tolist(), + 'labels_3d': pred_instances.labels_3d.tolist(), + 'scores_3d': pred_instances.scores_3d.tolist() } return result From 8f1ad746b806ea69917485344a261feff7dd1d66 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 9 Jan 2023 10:55:26 +0800 Subject: [PATCH 03/13] update init file --- mmdet3d/apis/inferencers/__init__.py | 3 +++ mmdet3d/datasets/transforms/__init__.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mmdet3d/apis/inferencers/__init__.py b/mmdet3d/apis/inferencers/__init__.py index ef101fec61..dc024e102e 100644 --- a/mmdet3d/apis/inferencers/__init__.py +++ b/mmdet3d/apis/inferencers/__init__.py @@ -1 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. +from .mono3d_det_inferencer import Mono3DDetInferencer + +__all__ = ['Mono3DDetInferencer'] diff --git a/mmdet3d/datasets/transforms/__init__.py b/mmdet3d/datasets/transforms/__init__.py index 1a21814be2..86e79307ae 100644 --- a/mmdet3d/datasets/transforms/__init__.py +++ b/mmdet3d/datasets/transforms/__init__.py @@ -4,7 +4,8 @@ from .loading import (LoadAnnotations3D, LoadImageFromFileMono3D, LoadMultiViewImageFromFiles, LoadPointsFromDict, LoadPointsFromFile, LoadPointsFromMultiSweeps, - NormalizePointsColor, PointSegClassMapping) + Mono3DInferencerLoader, NormalizePointsColor, + PointSegClassMapping) from .test_time_aug import MultiScaleFlipAug3D # yapf: disable from .transforms_3d import (AffineResize, BackgroundPointsFilter, @@ -28,5 +29,5 @@ 'IndoorPatchPointSample', 'LoadImageFromFileMono3D', 'ObjectNameFilter', 'RandomDropPointsColor', 'RandomJitterPoints', 'AffineResize', 'RandomShiftScale', 'LoadPointsFromDict', 'Resize3D', 'RandomResize3D', - 'MultiViewWrapper', 'PhotoMetricDistortion3D' + 'MultiViewWrapper', 'PhotoMetricDistortion3D', 'Mono3DInferencerLoader' ] From 4116433551c3c3b0c6dd514ec3e0560b527539d3 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 9 Jan 2023 11:36:51 +0800 Subject: [PATCH 04/13] update unit test --- configs/pgd/metafile.yml | 2 + mmdet3d/apis/__init__.py | 10 +- .../test_mono3d_det_inferencer.py | 102 ++++++++++++++++++ 3 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py diff --git a/configs/pgd/metafile.yml b/configs/pgd/metafile.yml index d48b3b6a93..8870ed426e 100644 --- a/configs/pgd/metafile.yml +++ b/configs/pgd/metafile.yml @@ -17,6 +17,8 @@ Collections: Models: - Name: pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d + Alias: + - pgd-kitti In Collection: PGD Config: configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py Metadata: diff --git a/mmdet3d/apis/__init__.py b/mmdet3d/apis/__init__.py index 2926178c85..d1ed517444 100644 --- a/mmdet3d/apis/__init__.py +++ b/mmdet3d/apis/__init__.py @@ -3,12 +3,10 @@ inference_mono_3d_detector, inference_multi_modality_detector, inference_segmentor, init_model) +from .inferencers import Mono3DDetInferencer __all__ = [ - 'inference_detector', - 'init_model', - 'inference_mono_3d_detector', - 'convert_SyncBN', - 'inference_multi_modality_detector', - 'inference_segmentor', + 'inference_detector', 'init_model', 'inference_mono_3d_detector', + 'convert_SyncBN', 'inference_multi_modality_detector', + 'inference_segmentor', 'Mono3DDetInferencer' ] diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py new file mode 100644 index 0000000000..cd67b7240e --- /dev/null +++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py @@ -0,0 +1,102 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import tempfile +from unittest import TestCase + +import mmcv +import mmengine +import numpy as np +from mmengine.utils import is_list_of + +from mmdet3d.apis import Mono3DDetInferencer +from mmdet3d.structures import Det3DDataSample + + +class TestDetInferencer(TestCase): + + def test_init(self): + # init from metafile + Mono3DDetInferencer('pgd-kitti') + # init from cfg + Mono3DDetInferencer( + 'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py') + + def assert_predictions_equal(self, preds1, preds2): + for pred1, pred2 in zip(preds1, preds2): + if 'bboxes_3d' in pred1: + self.assertTrue( + np.allclose(pred1['bboxes_3d'], pred2['bboxes_3d'], 0.1)) + if 'scores_3d' in pred1: + self.assertTrue( + np.allclose(pred1['scores_3d'], pred2['scores_3d'], 0.1)) + if 'labels_3d' in pred1: + self.assertTrue( + np.allclose(pred1['labels_3d'], pred2['labels_3d'])) + + def test_call(self, model): + # single img + img_path = 'tests/data/kitti/training/image_2/000007.png' + inferencer = Mono3DDetInferencer(model) + res_path = inferencer(img_path, return_vis=True) + # ndarray + img = mmcv.imread(img_path) + res_ndarray = inferencer(img, return_vis=True) + self.assert_predictions_equal(res_path['predictions'], + res_ndarray['predictions']) + self.assertIn('visualization', res_path) + self.assertIn('visualization', res_ndarray) + + # multiple images + img_paths = [ + 'tests/data/kitti/training/image_2/000007.png', + 'tests/data/kitti/training/image_2/000000.png' + ] + res_path = inferencer(img_paths, return_vis=True) + # list of ndarray + imgs = [mmcv.imread(p) for p in img_paths] + res_ndarray = inferencer(imgs, return_vis=True) + self.assert_predictions_equal(res_path['predictions'], + res_ndarray['predictions']) + self.assertIn('visualization', res_path) + self.assertIn('visualization', res_ndarray) + + # img dir, test different batch sizes + img_dir = 'tests/data/kitti/training/image_2/' + res_bs1 = inferencer(img_dir, batch_size=1, return_vis=True) + res_bs3 = inferencer(img_dir, batch_size=2, return_vis=True) + self.assert_predictions_equal(res_bs1['predictions'], + res_bs3['predictions']) + if model == 'pgd-kitti': + # There is a jitter operation when the mask is drawn, + # so it cannot be asserted. + for res_bs1_vis, res_bs3_vis in zip(res_bs1['visualization'], + res_bs3['visualization']): + self.assertTrue(np.allclose(res_bs1_vis, res_bs3_vis)) + + def test_visualize(self, model): + img_paths = [ + 'tests/data/kitti/training/image_2/000007.png', + 'tests/data/kitti/training/image_2/000000.png' + ] + inferencer = Mono3DDetInferencer(model) + # img_out_dir + with tempfile.TemporaryDirectory() as tmp_dir: + inferencer(img_paths, img_out_dir=tmp_dir) + for img_dir in ['000007.png', '000000.png']: + self.assertTrue(osp.exists(osp.join(tmp_dir, img_dir))) + + def test_postprocess(self, model): + # return_datasample + img_path = 'tests/data/kitti/training/image_2/000007.png' + inferencer = Mono3DDetInferencer(model) + res = inferencer(img_path, return_datasamples=True) + self.assertTrue(is_list_of(res['predictions'], Det3DDataSample)) + + # pred_out_file + with tempfile.TemporaryDirectory() as tmp_dir: + pred_out_file = osp.join(tmp_dir, 'tmp.json') + res = inferencer( + img_path, print_result=True, pred_out_file=pred_out_file) + dumped_res = mmengine.load(pred_out_file) + self.assert_predictions_equal(res['predictions'], + dumped_res['predictions']) From 09ddf1eddf12e2fbb05443f96243acc635e7c797 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 9 Jan 2023 11:37:53 +0800 Subject: [PATCH 05/13] fix name --- tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py index cd67b7240e..49650c51e2 100644 --- a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py +++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py @@ -12,7 +12,7 @@ from mmdet3d.structures import Det3DDataSample -class TestDetInferencer(TestCase): +class TestMono3DDetInferencer(TestCase): def test_init(self): # init from metafile From af078eb88f5f0829cdce39803a78e6619b090260 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 9 Jan 2023 14:43:08 +0800 Subject: [PATCH 06/13] add base_det3d_inferencer --- mmdet3d/apis/__init__.py | 4 +- mmdet3d/apis/inferencers/__init__.py | 5 +- ...inferencer.py => base_det3d_inferencer.py} | 96 +----------- .../apis/inferencers/mono_det3d_inferencer.py | 148 ++++++++++++++++++ .../test_mono3d_det_inferencer.py | 14 +- 5 files changed, 162 insertions(+), 105 deletions(-) rename mmdet3d/apis/inferencers/{mono3d_det_inferencer.py => base_det3d_inferencer.py} (72%) create mode 100644 mmdet3d/apis/inferencers/mono_det3d_inferencer.py diff --git a/mmdet3d/apis/__init__.py b/mmdet3d/apis/__init__.py index d1ed517444..d1b52e4c7c 100644 --- a/mmdet3d/apis/__init__.py +++ b/mmdet3d/apis/__init__.py @@ -3,10 +3,10 @@ inference_mono_3d_detector, inference_multi_modality_detector, inference_segmentor, init_model) -from .inferencers import Mono3DDetInferencer +from .inferencers import BaseDet3DInferencer, MonoDet3DInferencer __all__ = [ 'inference_detector', 'init_model', 'inference_mono_3d_detector', 'convert_SyncBN', 'inference_multi_modality_detector', - 'inference_segmentor', 'Mono3DDetInferencer' + 'inference_segmentor', 'BaseDet3DInferencer', 'MonoDet3DInferencer' ] diff --git a/mmdet3d/apis/inferencers/__init__.py b/mmdet3d/apis/inferencers/__init__.py index dc024e102e..0aaf0b2984 100644 --- a/mmdet3d/apis/inferencers/__init__.py +++ b/mmdet3d/apis/inferencers/__init__.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .mono3d_det_inferencer import Mono3DDetInferencer +from .base_det3d_inferencer import BaseDet3DInferencer +from .mono_det3d_inferencer import MonoDet3DInferencer -__all__ = ['Mono3DDetInferencer'] +__all__ = ['BaseDet3DInferencer', 'MonoDet3DInferencer'] diff --git a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py b/mmdet3d/apis/inferencers/base_det3d_inferencer.py similarity index 72% rename from mmdet3d/apis/inferencers/mono3d_det_inferencer.py rename to mmdet3d/apis/inferencers/base_det3d_inferencer.py index 31bad32ee5..03e70662ac 100644 --- a/mmdet3d/apis/inferencers/mono3d_det_inferencer.py +++ b/mmdet3d/apis/inferencers/base_det3d_inferencer.py @@ -1,12 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp from typing import Dict, List, Optional, Sequence, Tuple, Union -import mmcv import mmengine import numpy as np import torch.nn as nn -from mmengine.dataset import Compose from mmengine.infer.infer import BaseInferencer, ModelType from mmengine.runner import load_checkpoint from mmengine.structures import InstanceData @@ -39,8 +36,8 @@ def convert_SyncBN(config): convert_SyncBN(config[item]) -class Mono3DDetInferencer(BaseInferencer): - """MMDet3D Mono3D inferencer. +class BaseDet3DInferencer(BaseInferencer): + """Base 3D object detection inferencer. Args: model (str, optional): Path to the config file or the model name @@ -70,9 +67,6 @@ def __init__(self, device: Optional[str] = None, scope: Optional[str] = 'mmdet3d', palette: str = 'none') -> None: - # A global counter tracking the number of images processed, for - # naming of the output images - self.num_visualized_imgs = 0 self.palette = palette register_all_modules() super().__init__( @@ -113,18 +107,6 @@ def _init_model( model.eval() return model - def _init_pipeline(self, cfg: ConfigType) -> Compose: - """Initialize the test pipeline.""" - pipeline_cfg = cfg.test_dataloader.dataset.pipeline - - load_img_idx = self._get_transform_idx(pipeline_cfg, - 'LoadImageFromFileMono3D') - if load_img_idx == -1: - raise ValueError( - 'LoadImageFromFileMono3D is not found in the test pipeline') - pipeline_cfg[load_img_idx]['type'] = 'Mono3DInferencerLoader' - return Compose(pipeline_cfg) - def _get_transform_idx(self, pipeline_cfg: ConfigType, name: str) -> int: """Returns the index of the transform in a pipeline. @@ -195,80 +177,6 @@ def __call__(self, pred_out_file=pred_out_file, **kwargs) - def visualize(self, - inputs: InputsType, - preds: PredType, - return_vis: bool = False, - show: bool = False, - wait_time: int = 0, - draw_pred: bool = True, - pred_score_thr: float = 0.3, - img_out_dir: str = '') -> Union[List[np.ndarray], None]: - """Visualize predictions. - - Args: - inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer. - preds (List[Dict]): Predictions of the model. - return_vis (bool): Whether to return the visualization result. - Defaults to False. - show (bool): Whether to display the image in a popup window. - Defaults to False. - wait_time (float): The interval of show (s). Defaults to 0. - draw_pred (bool): Whether to draw predicted bounding boxes. - Defaults to True. - pred_score_thr (float): Minimum score of bboxes to draw. - Defaults to 0.3. - img_out_dir (str): Output directory of visualization results. - If left as empty, no file will be saved. Defaults to ''. - Returns: - List[np.ndarray] or None: Returns visualization results only if - applicable. - """ - if self.visualizer is None or (not show and img_out_dir == '' - and not return_vis): - return None - - if getattr(self, 'visualizer') is None: - raise ValueError('Visualization needs the "visualizer" term' - 'defined in the config, but got None.') - - results = [] - - for single_input, pred in zip(inputs, preds): - if isinstance(single_input, str): - img_bytes = mmengine.fileio.get(single_input) - img = mmcv.imfrombytes(img_bytes) - img = img[:, :, ::-1] - img_name = osp.basename(single_input) - elif isinstance(single_input, np.ndarray): - img = single_input.copy() - img_num = str(self.num_visualized_imgs).zfill(8) - img_name = f'{img_num}.jpg' - else: - raise ValueError('Unsupported input type: ' - f'{type(single_input)}') - - out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \ - else None - - data_input = dict(img=img) - self.visualizer.add_datasample( - img_name, - data_input, - pred, - show=show, - wait_time=wait_time, - draw_gt=False, - draw_pred=draw_pred, - pred_score_thr=pred_score_thr, - out_file=out_file, - vis_task='mono_det', - ) - results.append(img) - self.num_visualized_imgs += 1 - - return results - def postprocess( self, preds: PredType, diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py new file mode 100644 index 0000000000..ec7401bbb3 --- /dev/null +++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +from typing import Dict, List, Optional, Sequence, Union + +import mmcv +import mmengine +import numpy as np +from mmengine.dataset import Compose +from mmengine.infer.infer import ModelType +from mmengine.structures import InstanceData + +from mmdet3d.utils import ConfigType +from .base_det3d_inferencer import BaseDet3DInferencer + +InstanceList = List[InstanceData] +InputType = Union[str, np.ndarray] +InputsType = Union[InputType, Sequence[InputType]] +PredType = Union[InstanceData, InstanceList] +ImgType = Union[np.ndarray, Sequence[np.ndarray]] +ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]] + + +class MonoDet3DInferencer(BaseDet3DInferencer): + """MMDet3D Mono3D inferencer. + + Args: + model (str, optional): Path to the config file or the model name + defined in metafile. For example, it could be + "pgd-kitti" or + "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py". + weights (str, optional): Path to the checkpoint. If it is not specified + and model is a model name of metafile, the weights will be loaded + from metafile. Defaults to None. + device (str, optional): Device to run inference. If None, the available + device will be automatically used. Defaults to None. + """ + + preprocess_kwargs: set = set() + forward_kwargs: set = set() + visualize_kwargs: set = { + 'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr', + 'img_out_dir' + } + postprocess_kwargs: set = { + 'print_result', 'pred_out_file', 'return_datasample' + } + + def __init__(self, + model: Union[ModelType, str], + weights: Optional[str] = None, + device: Optional[str] = None, + scope: Optional[str] = 'mmdet3d', + palette: str = 'none') -> None: + # A global counter tracking the number of images processed, for + # naming of the output images + self.num_visualized_imgs = 0 + super().__init__( + model=model, + weights=weights, + device=device, + scope=scope, + palette=palette) + + def _init_pipeline(self, cfg: ConfigType) -> Compose: + """Initialize the test pipeline.""" + pipeline_cfg = cfg.test_dataloader.dataset.pipeline + + load_img_idx = self._get_transform_idx(pipeline_cfg, + 'LoadImageFromFileMono3D') + if load_img_idx == -1: + raise ValueError( + 'LoadImageFromFileMono3D is not found in the test pipeline') + pipeline_cfg[load_img_idx]['type'] = 'Mono3DInferencerLoader' + return Compose(pipeline_cfg) + + def visualize(self, + inputs: InputsType, + preds: PredType, + return_vis: bool = False, + show: bool = False, + wait_time: int = 0, + draw_pred: bool = True, + pred_score_thr: float = 0.3, + img_out_dir: str = '') -> Union[List[np.ndarray], None]: + """Visualize predictions. + + Args: + inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer. + preds (List[Dict]): Predictions of the model. + return_vis (bool): Whether to return the visualization result. + Defaults to False. + show (bool): Whether to display the image in a popup window. + Defaults to False. + wait_time (float): The interval of show (s). Defaults to 0. + draw_pred (bool): Whether to draw predicted bounding boxes. + Defaults to True. + pred_score_thr (float): Minimum score of bboxes to draw. + Defaults to 0.3. + img_out_dir (str): Output directory of visualization results. + If left as empty, no file will be saved. Defaults to ''. + Returns: + List[np.ndarray] or None: Returns visualization results only if + applicable. + """ + if self.visualizer is None or (not show and img_out_dir == '' + and not return_vis): + return None + + if getattr(self, 'visualizer') is None: + raise ValueError('Visualization needs the "visualizer" term' + 'defined in the config, but got None.') + + results = [] + + for single_input, pred in zip(inputs, preds): + if isinstance(single_input, str): + img_bytes = mmengine.fileio.get(single_input) + img = mmcv.imfrombytes(img_bytes) + img = img[:, :, ::-1] + img_name = osp.basename(single_input) + elif isinstance(single_input, np.ndarray): + img = single_input.copy() + img_num = str(self.num_visualized_imgs).zfill(8) + img_name = f'{img_num}.jpg' + else: + raise ValueError('Unsupported input type: ' + f'{type(single_input)}') + + out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \ + else None + + data_input = dict(img=img) + self.visualizer.add_datasample( + img_name, + data_input, + pred, + show=show, + wait_time=wait_time, + draw_gt=False, + draw_pred=draw_pred, + pred_score_thr=pred_score_thr, + out_file=out_file, + vis_task='mono_det', + ) + results.append(img) + self.num_visualized_imgs += 1 + + return results diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py index 49650c51e2..f9b25a2b2b 100644 --- a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py +++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py @@ -8,17 +8,17 @@ import numpy as np from mmengine.utils import is_list_of -from mmdet3d.apis import Mono3DDetInferencer +from mmdet3d.apis import MonoDet3DInferencer from mmdet3d.structures import Det3DDataSample -class TestMono3DDetInferencer(TestCase): +class TestMonoDet3DInferencer(TestCase): def test_init(self): # init from metafile - Mono3DDetInferencer('pgd-kitti') + MonoDet3DInferencer('pgd-kitti') # init from cfg - Mono3DDetInferencer( + MonoDet3DInferencer( 'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py') def assert_predictions_equal(self, preds1, preds2): @@ -36,7 +36,7 @@ def assert_predictions_equal(self, preds1, preds2): def test_call(self, model): # single img img_path = 'tests/data/kitti/training/image_2/000007.png' - inferencer = Mono3DDetInferencer(model) + inferencer = MonoDet3DInferencer(model) res_path = inferencer(img_path, return_vis=True) # ndarray img = mmcv.imread(img_path) @@ -78,7 +78,7 @@ def test_visualize(self, model): 'tests/data/kitti/training/image_2/000007.png', 'tests/data/kitti/training/image_2/000000.png' ] - inferencer = Mono3DDetInferencer(model) + inferencer = MonoDet3DInferencer(model) # img_out_dir with tempfile.TemporaryDirectory() as tmp_dir: inferencer(img_paths, img_out_dir=tmp_dir) @@ -88,7 +88,7 @@ def test_visualize(self, model): def test_postprocess(self, model): # return_datasample img_path = 'tests/data/kitti/training/image_2/000007.png' - inferencer = Mono3DDetInferencer(model) + inferencer = MonoDet3DInferencer(model) res = inferencer(img_path, return_datasamples=True) self.assertTrue(is_list_of(res['predictions'], Det3DDataSample)) From 31418db956042fa26ef3d6a9c9882d782bf8eb63 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Tue, 10 Jan 2023 11:42:37 +0800 Subject: [PATCH 07/13] fix comments --- .../apis/inferencers/base_det3d_inferencer.py | 6 +- mmdet3d/datasets/transforms/loading.py | 71 +++++++++++++------ 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/mmdet3d/apis/inferencers/base_det3d_inferencer.py b/mmdet3d/apis/inferencers/base_det3d_inferencer.py index 03e70662ac..2a13122046 100644 --- a/mmdet3d/apis/inferencers/base_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/base_det3d_inferencer.py @@ -83,11 +83,9 @@ def _init_model( model = MODELS.build(cfg.model) checkpoint = load_checkpoint(model, weights, map_location='cpu') - dataset_meta = checkpoint['meta'].get('dataset_meta', None) - # save the dataset_meta in the model for convenience if 'dataset_meta' in checkpoint.get('meta', {}): # mmdet3d 1.x - model.dataset_meta = dataset_meta + model.dataset_meta = checkpoint['meta']['dataset_meta'] elif 'CLASSES' in checkpoint.get('meta', {}): # < mmdet3d 1.x classes = checkpoint['meta']['CLASSES'] @@ -236,7 +234,7 @@ def pred2dict(self, data_sample: InstanceData) -> Dict: """ pred_instances = data_sample.pred_instances_3d.numpy() result = { - 'bboxes_3d': pred_instances.bboxes_3d.tolist(), + 'bboxes_3d': pred_instances.bboxes_3d.tensor.numpy().tolist(), 'labels_3d': pred_instances.labels_3d.tolist(), 'scores_3d': pred_instances.scores_3d.tolist() } diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py index eb80592ff3..ac635bb91c 100644 --- a/mmdet3d/datasets/transforms/loading.py +++ b/mmdet3d/datasets/transforms/loading.py @@ -267,6 +267,46 @@ def transform(self, results: dict) -> dict: return results +@TRANSFORMS.register_module() +class LoadImageFromNDArray(LoadImageFromFile): + """Load an image from ``results['img']``. + Similar with :obj:`LoadImageFromFile`, but the image has been loaded as + :obj:`np.ndarray` in ``results['img']``. Can be used when loading image + from webcam. + Required Keys: + - img + Modified Keys: + - img + - img_path + - img_shape + - ori_shape + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + """ + + def transform(self, results: dict) -> dict: + """Transform function to add image meta information. + + Args: + results (dict): Result dict with Webcam read image in + ``results['img']``. + Returns: + dict: The dict contains loaded image and meta information. + """ + + img = results['img'] + if self.to_float32: + img = img.astype(np.float32) + + results['img_path'] = None + results['img'] = img + results['img_shape'] = img.shape[:2] + results['ori_shape'] = img.shape[:2] + return results + + @TRANSFORMS.register_module() class LoadPointsFromMultiSweeps(BaseTransform): """Load points from multiple sweeps. @@ -935,17 +975,10 @@ def __repr__(self) -> str: @TRANSFORMS.register_module() class Mono3DInferencerLoader(BaseTransform): - """Load an image from ``results['img']``. Similar with + """Load an image from ``results['images']['CAMX']['img']``. Similar with :obj:`LoadImageFromFileMono3D`, but the image has been loaded as - :obj:`np.ndarray` in ``results['img']``. Can be used when loading image - from webcam. Required Keys: + :obj:`np.ndarray` in ``results['images']['CAMX']['img']``. - - img - Modified Keys: - - img - - img_path - - img_shape - - ori_shape Args: to_float32 (bool): Whether to convert the loaded image to a float32 numpy array. If set to False, the loaded image is an uint8 array. @@ -959,24 +992,18 @@ def __init__(self, **kwargs) -> None: self.from_ndarray = TRANSFORMS.build( dict(type='LoadImageFromNDArray', **kwargs)) - def transform(self, single_input: Union[str, np.ndarray, dict]) -> dict: + def transform(self, single_input: dict) -> dict: """Transform function to add image meta information. Args: single_input (dict): Result dict with Webcam read image in - ``results['img']``. + ``results['images']['CAMX']['img']``. Returns: dict: The dict contains loaded image and meta information. """ - if isinstance(single_input, str): - inputs = dict(img_path=single_input) - elif isinstance(single_input, np.ndarray): - inputs = dict(img=single_input) - elif isinstance(single_input, dict): - inputs = single_input - else: - raise NotImplementedError - - if 'img' in inputs: - return self.from_ndarray(inputs) + inputs = single_input + camera_type = list(inputs['images'].keys())[0] + assert 'cam2img' in inputs['images'][camera_type] + if 'img' in inputs['images'][camera_type]: + return self.from_ndarray(inputs['images'][camera_type]) return self.from_file(inputs) From 40ccc9df3983c5f15b16620970231bedf30f69d4 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Wed, 11 Jan 2023 00:45:49 +0800 Subject: [PATCH 08/13] fix comments --- .../apis/inferencers/mono_det3d_inferencer.py | 51 +++++++++++-- mmdet3d/datasets/transforms/loading.py | 39 ++++++++-- mmdet3d/visualization/local_visualizer.py | 2 + .../test_mono3d_det_inferencer.py | 72 +++++++++++-------- 4 files changed, 121 insertions(+), 43 deletions(-) diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py index ec7401bbb3..691cdb1437 100644 --- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py @@ -6,6 +6,8 @@ import mmengine import numpy as np from mmengine.dataset import Compose +from mmengine.fileio import (get_file_backend, isdir, join_path, + list_dir_or_file) from mmengine.infer.infer import ModelType from mmengine.structures import InstanceData @@ -54,13 +56,48 @@ def __init__(self, # A global counter tracking the number of images processed, for # naming of the output images self.num_visualized_imgs = 0 - super().__init__( + super(MonoDet3DInferencer, self).__init__( model=model, weights=weights, device=device, scope=scope, palette=palette) + def _inputs_to_list(self, inputs: Union[dict, list]) -> list: + """Preprocess the inputs to a list. + + Preprocess inputs to a list according to its type: + + - list or tuple: return inputs + - dict: + - Directory path: return all files in the directory + - other cases: return a list containing the string. The string + could be a path to file, a url or other types of string according + to the task. + + Args: + inputs (Union[dict, list]): Inputs for the inferencer. + + Returns: + list: List of input for the :meth:`preprocess`. + """ + + if isinstance(inputs, dict) and isinstance(inputs['img'], str): + img = inputs['img'] + backend = get_file_backend(img) + if hasattr(backend, 'isdir') and isdir(img): + # Backends like HttpsBackend do not implement `isdir`, so only + # those backends that implement `isdir` could accept the inputs + # as a directory + filename_list = list_dir_or_file(img, list_dir=False) + img = [join_path(img, filename) for filename in filename_list] + inputs['img'] = img + + if not isinstance(inputs, (list, tuple)): + inputs = [inputs] + + return list(inputs) + def _init_pipeline(self, cfg: ConfigType) -> Compose: """Initialize the test pipeline.""" pipeline_cfg = cfg.test_dataloader.dataset.pipeline @@ -85,7 +122,7 @@ def visualize(self, """Visualize predictions. Args: - inputs (List[Union[str, np.ndarray]]): Inputs for the inferencer. + inputs (List[Dict]): Inputs for the inferencer. preds (List[Dict]): Predictions of the model. return_vis (bool): Whether to return the visualization result. Defaults to False. @@ -113,13 +150,13 @@ def visualize(self, results = [] for single_input, pred in zip(inputs, preds): - if isinstance(single_input, str): - img_bytes = mmengine.fileio.get(single_input) + if isinstance(single_input['img'], str): + img_bytes = mmengine.fileio.get(single_input['img']) img = mmcv.imfrombytes(img_bytes) img = img[:, :, ::-1] - img_name = osp.basename(single_input) - elif isinstance(single_input, np.ndarray): - img = single_input.copy() + img_name = osp.basename(single_input['img']) + elif isinstance(single_input['img'], np.ndarray): + img = single_input['img'].copy() img_num = str(self.num_visualized_imgs).zfill(8) img_name = f'{img_num}.jpg' else: diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py index ac635bb91c..44e28c5382 100644 --- a/mmdet3d/datasets/transforms/loading.py +++ b/mmdet3d/datasets/transforms/loading.py @@ -10,6 +10,7 @@ from mmdet.datasets.transforms import LoadAnnotations from mmdet3d.registry import TRANSFORMS +from mmdet3d.structures.bbox_3d import get_box_type from mmdet3d.structures.points import BasePoints, get_points_type @@ -1001,9 +1002,37 @@ def transform(self, single_input: dict) -> dict: Returns: dict: The dict contains loaded image and meta information. """ - inputs = single_input - camera_type = list(inputs['images'].keys())[0] - assert 'cam2img' in inputs['images'][camera_type] - if 'img' in inputs['images'][camera_type]: - return self.from_ndarray(inputs['images'][camera_type]) + box_type_3d, box_mode_3d = get_box_type('camera') + if isinstance(single_input['calib'], str): + calib_path = single_input['calib'] + with open(calib_path, 'r') as f: + lines = f.readlines() + cam2img = np.array([ + float(info) for info in lines[0].split(' ')[0:16] + ]).reshape([4, 4]) + elif isinstance(single_input['calib'], np.ndarray): + cam2img = single_input['calib'] + else: + raise ValueError('Unsupported input type: ' + f'{type(single_input)}') + + if isinstance(single_input['img'], str): + inputs = dict( + images=dict( + CAM_FRONT=dict( + img_path=single_input['img'], cam2img=cam2img)), + box_mode_3d=box_mode_3d, + box_type_3d=box_type_3d) + elif isinstance(single_input['img'], np.ndarray): + inputs = dict( + img=single_input['img'], + cam2img=cam2img, + box_type_3d=box_type_3d, + box_mode_3d=box_mode_3d) + else: + raise ValueError('Unsupported input type: ' + f'{type(single_input)}') + + if 'img' in inputs: + return self.from_ndarray(inputs) return self.from_file(inputs) diff --git a/mmdet3d/visualization/local_visualizer.py b/mmdet3d/visualization/local_visualizer.py index 5f9f4188ee..523521857b 100644 --- a/mmdet3d/visualization/local_visualizer.py +++ b/mmdet3d/visualization/local_visualizer.py @@ -729,6 +729,7 @@ def add_datasample(self, if 'gt_instances' in data_sample: if len(data_sample.gt_instances) > 0: assert 'img' in data_input + img = data_input['img'] if isinstance(data_input['img'], Tensor): img = data_input['img'].permute(1, 2, 0).numpy() img = img[..., [2, 1, 0]] # bgr to rgb @@ -760,6 +761,7 @@ def add_datasample(self, pred_instances = data_sample.pred_instances pred_instances = pred_instances_3d[ pred_instances.scores > pred_score_thr].cpu() + img = data_input['img'] if isinstance(data_input['img'], Tensor): img = data_input['img'].permute(1, 2, 0).numpy() img = img[..., [2, 1, 0]] # bgr to rgb diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py index f9b25a2b2b..de3e657222 100644 --- a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py +++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py @@ -7,6 +7,7 @@ import mmengine import numpy as np from mmengine.utils import is_list_of +from parameterized import parameterized from mmdet3d.apis import MonoDet3DInferencer from mmdet3d.structures import Det3DDataSample @@ -19,7 +20,11 @@ def test_init(self): MonoDet3DInferencer('pgd-kitti') # init from cfg MonoDet3DInferencer( - 'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py') + 'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py', + 'https://download.openmmlab.com/mmdetection3d/v1.0.0_models/pgd/' + 'pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d/' + 'pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d_' + '20211022_102608-8a97533b.pth') def assert_predictions_equal(self, preds1, preds2): for pred1, pred2 in zip(preds1, preds2): @@ -33,70 +38,75 @@ def assert_predictions_equal(self, preds1, preds2): self.assertTrue( np.allclose(pred1['labels_3d'], pred2['labels_3d'])) + @parameterized.expand(['pgd-kitti']) def test_call(self, model): # single img - img_path = 'tests/data/kitti/training/image_2/000007.png' + img_path = 'demo/data/kitti/000008.png' + calib_path = 'demo/data/kitti/000008.txt' inferencer = MonoDet3DInferencer(model) - res_path = inferencer(img_path, return_vis=True) + inputs = dict(img=img_path, calib=calib_path) + res_path = inferencer(inputs, return_vis=True) # ndarray img = mmcv.imread(img_path) - res_ndarray = inferencer(img, return_vis=True) + inputs = dict(img=img, calib=calib_path) + res_ndarray = inferencer(inputs, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) self.assertIn('visualization', res_path) self.assertIn('visualization', res_ndarray) # multiple images - img_paths = [ - 'tests/data/kitti/training/image_2/000007.png', - 'tests/data/kitti/training/image_2/000000.png' + inputs = [ + dict( + img='demo/data/kitti/000008.png', + calib='demo/data/kitti/000008.txt'), + dict( + img='demo/data/kitti/000008.png', + calib='demo/data/kitti/000008.txt') ] - res_path = inferencer(img_paths, return_vis=True) + res_path = inferencer(inputs, return_vis=True) # list of ndarray - imgs = [mmcv.imread(p) for p in img_paths] - res_ndarray = inferencer(imgs, return_vis=True) + imgs = [mmcv.imread(p['img']) for p in inputs] + inputs[0]['img'] = imgs[0] + inputs[1]['img'] = imgs[1] + res_ndarray = inferencer(inputs, return_vis=True) self.assert_predictions_equal(res_path['predictions'], res_ndarray['predictions']) self.assertIn('visualization', res_path) self.assertIn('visualization', res_ndarray) - # img dir, test different batch sizes - img_dir = 'tests/data/kitti/training/image_2/' - res_bs1 = inferencer(img_dir, batch_size=1, return_vis=True) - res_bs3 = inferencer(img_dir, batch_size=2, return_vis=True) - self.assert_predictions_equal(res_bs1['predictions'], - res_bs3['predictions']) - if model == 'pgd-kitti': - # There is a jitter operation when the mask is drawn, - # so it cannot be asserted. - for res_bs1_vis, res_bs3_vis in zip(res_bs1['visualization'], - res_bs3['visualization']): - self.assertTrue(np.allclose(res_bs1_vis, res_bs3_vis)) - + @parameterized.expand(['pgd-kitti']) def test_visualize(self, model): - img_paths = [ - 'tests/data/kitti/training/image_2/000007.png', - 'tests/data/kitti/training/image_2/000000.png' + inputs = [ + dict( + img='demo/data/kitti/000008.png', + calib='demo/data/kitti/000008.txt'), + dict( + img='demo/data/kitti/000008.png', + calib='demo/data/kitti/000008.txt') ] inferencer = MonoDet3DInferencer(model) # img_out_dir with tempfile.TemporaryDirectory() as tmp_dir: - inferencer(img_paths, img_out_dir=tmp_dir) - for img_dir in ['000007.png', '000000.png']: + inferencer(inputs, img_out_dir=tmp_dir) + for img_dir in ['000008.png', '000008.png']: self.assertTrue(osp.exists(osp.join(tmp_dir, img_dir))) + @parameterized.expand(['pgd-kitti']) def test_postprocess(self, model): # return_datasample - img_path = 'tests/data/kitti/training/image_2/000007.png' + img_path = 'demo/data/kitti/000008.png' + calib_path = 'demo/data/kitti/000008.txt' + inputs = dict(img=img_path, calib=calib_path) inferencer = MonoDet3DInferencer(model) - res = inferencer(img_path, return_datasamples=True) + res = inferencer(inputs, return_datasamples=True) self.assertTrue(is_list_of(res['predictions'], Det3DDataSample)) # pred_out_file with tempfile.TemporaryDirectory() as tmp_dir: pred_out_file = osp.join(tmp_dir, 'tmp.json') res = inferencer( - img_path, print_result=True, pred_out_file=pred_out_file) + inputs, print_result=True, pred_out_file=pred_out_file) dumped_res = mmengine.load(pred_out_file) self.assert_predictions_equal(res['predictions'], dumped_res['predictions']) From 10ec030b0f05f8c4481a3229fc27c59b56785080 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 16 Jan 2023 15:35:57 +0800 Subject: [PATCH 09/13] fix comments --- .../apis/inferencers/base_det3d_inferencer.py | 44 +++++++++++-------- .../apis/inferencers/mono_det3d_inferencer.py | 10 ++++- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/mmdet3d/apis/inferencers/base_det3d_inferencer.py b/mmdet3d/apis/inferencers/base_det3d_inferencer.py index 2a13122046..db7b4ae041 100644 --- a/mmdet3d/apis/inferencers/base_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/base_det3d_inferencer.py @@ -20,22 +20,6 @@ ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]] -def convert_SyncBN(config): - """Convert config's naiveSyncBN to BN. - - Args: - config (str or :obj:`mmengine.Config`): Config file path or the config - object. - """ - if isinstance(config, dict): - for item in config: - if item == 'norm_cfg': - config[item]['type'] = config[item]['type']. \ - replace('naiveSyncBN', 'BN') - else: - convert_SyncBN(config[item]) - - class BaseDet3DInferencer(BaseInferencer): """Base 3D object detection inferencer. @@ -44,11 +28,17 @@ class BaseDet3DInferencer(BaseInferencer): defined in metafile. For example, it could be "pgd-kitti" or "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py". + If model is not specified, user must provide the + `weights` saved by MMEngine which contains the config string. + Defaults to None. weights (str, optional): Path to the checkpoint. If it is not specified and model is a model name of metafile, the weights will be loaded from metafile. Defaults to None. device (str, optional): Device to run inference. If None, the available device will be automatically used. Defaults to None. + scope (str, optional): The scope of the model. Defaults to mmdet3d. + palette (str): Color palette used for visualization. The order of + priority is palette -> config -> checkpoint. Defaults to 'none'. """ preprocess_kwargs: set = set() @@ -62,7 +52,7 @@ class BaseDet3DInferencer(BaseInferencer): } def __init__(self, - model: Union[ModelType, str], + model: Union[ModelType, str, None] = None, weights: Optional[str] = None, device: Optional[str] = None, scope: Optional[str] = 'mmdet3d', @@ -72,13 +62,28 @@ def __init__(self, super().__init__( model=model, weights=weights, device=device, scope=scope) + def _convert_syncbn(self, cfg: ConfigType): + """Convert config's naiveSyncBN to BN. + + Args: + config (str or :obj:`mmengine.Config`): Config file path + or the config object. + """ + if isinstance(cfg, dict): + for item in cfg: + if item == 'norm_cfg': + cfg[item]['type'] = cfg[item]['type']. \ + replace('naiveSyncBN', 'BN') + else: + self._convert_syncbn(cfg[item]) + def _init_model( self, cfg: ConfigType, weights: str, device: str = 'cpu', ) -> nn.Module: - convert_SyncBN(cfg.model) + self._convert_syncbn(cfg.model) cfg.model.train_cfg = None model = MODELS.build(cfg.model) @@ -139,6 +144,8 @@ def __call__(self, return_datasamples (bool): Whether to return results as :obj:`BaseDataElement`. Defaults to False. batch_size (int): Inference batch size. Defaults to 1. + return_vis (bool): Whether to return the visualization result. + Defaults to False. show (bool): Whether to display the visualization results in a popup window. Defaults to False. wait_time (float): The interval of show (s). Defaults to 0. @@ -216,7 +223,6 @@ def postprocess( for pred in preds: result = self.pred2dict(pred) results.append(result) - # Add img to the results after printing and dumping result_dict['predictions'] = results if print_result: print(result_dict) diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py index 691cdb1437..c7165886d5 100644 --- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py @@ -23,18 +23,24 @@ class MonoDet3DInferencer(BaseDet3DInferencer): - """MMDet3D Mono3D inferencer. + """MMDet3D Monocular 3D object detection inferencer. Args: model (str, optional): Path to the config file or the model name defined in metafile. For example, it could be "pgd-kitti" or "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py". + If model is not specified, user must provide the + `weights` saved by MMEngine which contains the config string. + Defaults to None. weights (str, optional): Path to the checkpoint. If it is not specified and model is a model name of metafile, the weights will be loaded from metafile. Defaults to None. device (str, optional): Device to run inference. If None, the available device will be automatically used. Defaults to None. + scope (str, optional): The scope of the model. Defaults to mmdet3d. + palette (str): Color palette used for visualization. The order of + priority is palette -> config -> checkpoint. Defaults to 'none'. """ preprocess_kwargs: set = set() @@ -48,7 +54,7 @@ class MonoDet3DInferencer(BaseDet3DInferencer): } def __init__(self, - model: Union[ModelType, str], + model: Union[ModelType, str, None] = None, weights: Optional[str] = None, device: Optional[str] = None, scope: Optional[str] = 'mmdet3d', From 0a8898d898107ee545386629be64bf82286c0f64 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 16 Jan 2023 15:49:50 +0800 Subject: [PATCH 10/13] renmae pgd-kitti to pgd_kitti --- configs/pgd/metafile.yml | 2 +- mmdet3d/apis/inferencers/mono_det3d_inferencer.py | 2 +- .../test_inferencers/test_mono3d_det_inferencer.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configs/pgd/metafile.yml b/configs/pgd/metafile.yml index 8870ed426e..676f58edb0 100644 --- a/configs/pgd/metafile.yml +++ b/configs/pgd/metafile.yml @@ -18,7 +18,7 @@ Collections: Models: - Name: pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d Alias: - - pgd-kitti + - pgd_kitti In Collection: PGD Config: configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py Metadata: diff --git a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py index c7165886d5..95259da399 100644 --- a/mmdet3d/apis/inferencers/mono_det3d_inferencer.py +++ b/mmdet3d/apis/inferencers/mono_det3d_inferencer.py @@ -28,7 +28,7 @@ class MonoDet3DInferencer(BaseDet3DInferencer): Args: model (str, optional): Path to the config file or the model name defined in metafile. For example, it could be - "pgd-kitti" or + "pgd_kitti" or "configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py". If model is not specified, user must provide the `weights` saved by MMEngine which contains the config string. diff --git a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py index de3e657222..71df09f78f 100644 --- a/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py +++ b/tests/test_apis/test_inferencers/test_mono3d_det_inferencer.py @@ -17,7 +17,7 @@ class TestMonoDet3DInferencer(TestCase): def test_init(self): # init from metafile - MonoDet3DInferencer('pgd-kitti') + MonoDet3DInferencer('pgd_kitti') # init from cfg MonoDet3DInferencer( 'configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py', @@ -38,7 +38,7 @@ def assert_predictions_equal(self, preds1, preds2): self.assertTrue( np.allclose(pred1['labels_3d'], pred2['labels_3d'])) - @parameterized.expand(['pgd-kitti']) + @parameterized.expand(['pgd_kitti']) def test_call(self, model): # single img img_path = 'demo/data/kitti/000008.png' @@ -75,7 +75,7 @@ def test_call(self, model): self.assertIn('visualization', res_path) self.assertIn('visualization', res_ndarray) - @parameterized.expand(['pgd-kitti']) + @parameterized.expand(['pgd_kitti']) def test_visualize(self, model): inputs = [ dict( @@ -92,7 +92,7 @@ def test_visualize(self, model): for img_dir in ['000008.png', '000008.png']: self.assertTrue(osp.exists(osp.join(tmp_dir, img_dir))) - @parameterized.expand(['pgd-kitti']) + @parameterized.expand(['pgd_kitti']) def test_postprocess(self, model): # return_datasample img_path = 'demo/data/kitti/000008.png' From dc43aaa83a40c1b745d7f0b268df71cd3aea656a Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 30 Jan 2023 19:44:48 +0800 Subject: [PATCH 11/13] add parameterized in tests.txt --- requirements/tests.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/tests.txt b/requirements/tests.txt index 303cc37d6f..563fc4682d 100644 --- a/requirements/tests.txt +++ b/requirements/tests.txt @@ -5,6 +5,7 @@ interrogate isort # Note: used for kwarray.group_items, this may be ported to mmcv in the future. kwarray +parameterized pytest pytest-cov pytest-runner From d6a3cea877882a07770d32f69b3580ee776ae572 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 30 Jan 2023 20:00:24 +0800 Subject: [PATCH 12/13] add txt file --- demo/data/kitti/000008.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 demo/data/kitti/000008.txt diff --git a/demo/data/kitti/000008.txt b/demo/data/kitti/000008.txt new file mode 100644 index 0000000000..2bc863a8fd --- /dev/null +++ b/demo/data/kitti/000008.txt @@ -0,0 +1 @@ +721.5377 0.0 609.5593 44.85728 0.0 721.5377 172.854 0.2163791 0.0 0.0 1.0 0.002745884 0.0 0.0 0.0 1.0 From 966795144ec614938de79f07cc89476114561219 Mon Sep 17 00:00:00 2001 From: ChaimZhu Date: Mon, 30 Jan 2023 20:19:48 +0800 Subject: [PATCH 13/13] update loadimgfromfilemono3d to fit latest mmcv --- mmdet3d/datasets/transforms/loading.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/mmdet3d/datasets/transforms/loading.py b/mmdet3d/datasets/transforms/loading.py index f7d61fe9fd..a3bcefc5af 100644 --- a/mmdet3d/datasets/transforms/loading.py +++ b/mmdet3d/datasets/transforms/loading.py @@ -4,6 +4,7 @@ import mmcv import mmengine +import mmengine.fileio as fileio import numpy as np from mmcv.transforms import LoadImageFromFile from mmcv.transforms.base import BaseTransform @@ -255,9 +256,21 @@ def transform(self, results: dict) -> dict: 'Currently we only support load image from kitti and' 'nuscenes datasets') - img_bytes = self.file_client.get(filename) - img = mmcv.imfrombytes( - img_bytes, flag=self.color_type, backend=self.imdecode_backend) + try: + if self.file_client_args is not None: + file_client = fileio.FileClient.infer_client( + self.file_client_args, filename) + img_bytes = file_client.get(filename) + else: + img_bytes = fileio.get( + filename, backend_args=self.backend_args) + img = mmcv.imfrombytes( + img_bytes, flag=self.color_type, backend=self.imdecode_backend) + except Exception as e: + if self.ignore_empty: + return None + else: + raise e if self.to_float32: img = img.astype(np.float32)