diff --git a/configs/_base_/datasets/kitti-mono3d.py b/configs/_base_/datasets/kitti-mono3d.py
index 3bf0323e27..0bc1267d8f 100644
--- a/configs/_base_/datasets/kitti-mono3d.py
+++ b/configs/_base_/datasets/kitti-mono3d.py
@@ -80,8 +80,7 @@
 val_evaluator = dict(
     type='KittiMetric',
     ann_file=data_root + 'kitti_infos_val.pkl',
-    metric='bbox',
-    pred_box_type_3d='Camera')
+    metric='bbox')
 
 test_evaluator = val_evaluator
 
diff --git a/mmdet3d/evaluation/metrics/kitti_metric.py b/mmdet3d/evaluation/metrics/kitti_metric.py
index b0f443131e..3fa65fde54 100644
--- a/mmdet3d/evaluation/metrics/kitti_metric.py
+++ b/mmdet3d/evaluation/metrics/kitti_metric.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import tempfile
 from os import path as osp
-from typing import Dict, List, Optional, Sequence, Union
+from typing import Dict, List, Optional, Sequence, Tuple, Union
 
 import mmengine
 import numpy as np
@@ -22,44 +22,49 @@ class KittiMetric(BaseMetric):
 
     Args:
         ann_file (str): Annotation file path.
-        metric (str | list[str]): Metrics to be evaluated.
-            Default to 'bbox'.
-        pcd_limit_range (list): The range of point cloud used to
+        metric (str or List[str]): Metrics to be evaluated.
+            Defaults to 'bbox'.
+        pcd_limit_range (List[float]): The range of point cloud used to
             filter invalid predicted boxes.
-            Default to [0, -40, -3, 70.4, 40, 0.0].
+            Defaults to [0, -40, -3, 70.4, 40, 0.0].
         prefix (str, optional): The prefix that will be added in the metric
             names to disambiguate homonymous metrics of different evaluators.
             If prefix is not provided in the argument, self.default_prefix
             will be used instead. Defaults to None.
         pklfile_prefix (str, optional): The prefix of pkl files, including
             the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
-        default_cam_key (str, optional): The default camera for lidar to
-            camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
+            If not specified, a temp file will be created. Defaults to None.
+        default_cam_key (str): The default camera for lidar to camera
+            conversion. By default, KITTI: 'CAM2', Waymo: 'CAM_FRONT'.
+            Defaults to 'CAM2'
         format_only (bool): Format the output results without perform
             evaluation. It is useful when you want to format the result
             to a specific format and submit it to the test server.
             Defaults to False.
         submission_prefix (str, optional): The prefix of submission data.
             If not specified, the submission data will not be generated.
-            Default: None.
+            Defaults to None.
         collect_device (str): Device name used for collecting results
             from different ranks during distributed training. Must be 'cpu' or
             'gpu'. Defaults to 'cpu'.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmengine.fileio.FileClient` for details.
+            Defaults to dict(backend='disk').
     """
 
-    def __init__(self,
-                 ann_file: str,
-                 metric: Union[str, List[str]] = 'bbox',
-                 pred_box_type_3d: str = 'LiDAR',
-                 pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
-                 prefix: Optional[str] = None,
-                 pklfile_prefix: str = None,
-                 default_cam_key: str = 'CAM2',
-                 format_only: bool = False,
-                 submission_prefix: str = None,
-                 collect_device: str = 'cpu',
-                 file_client_args: dict = dict(backend='disk')):
+    def __init__(
+        self,
+        ann_file: str,
+        metric: Union[str, List[str]] = 'bbox',
+        pcd_limit_range: List[float] = [0, -40, -3, 70.4, 40, 0.0],
+        prefix: Optional[str] = None,
+        pklfile_prefix: Optional[str] = None,
+        default_cam_key: str = 'CAM2',
+        format_only: bool = False,
+        submission_prefix: Optional[str] = None,
+        collect_device: str = 'cpu',
+        file_client_args: dict = dict(backend='disk')
+    ) -> None:
         self.default_prefix = 'Kitti metric'
         super(KittiMetric, self).__init__(
             collect_device=collect_device, prefix=prefix)
@@ -68,25 +73,23 @@ def __init__(self,
         self.pklfile_prefix = pklfile_prefix
         self.format_only = format_only
         if self.format_only:
-            assert submission_prefix is not None, 'submission_prefix must be'
-            'not None when format_only is True, otherwise the result files'
-            'will be saved to a temp directory which will be cleaned up at'
+            assert submission_prefix is not None, 'submission_prefix must be '
+            'not None when format_only is True, otherwise the result files '
+            'will be saved to a temp directory which will be cleaned up at '
             'the end.'
 
         self.submission_prefix = submission_prefix
-        self.pred_box_type_3d = pred_box_type_3d
         self.default_cam_key = default_cam_key
         self.file_client_args = file_client_args
-        self.default_cam_key = default_cam_key
 
         allowed_metrics = ['bbox', 'img_bbox', 'mAP', 'LET_mAP']
         self.metrics = metric if isinstance(metric, list) else [metric]
         for metric in self.metrics:
             if metric not in allowed_metrics:
                 raise KeyError("metric should be one of 'bbox', 'img_bbox', "
-                               'but got {metric}.')
+                               f'but got {metric}.')
 
-    def convert_annos_to_kitti_annos(self, data_infos: dict) -> list:
+    def convert_annos_to_kitti_annos(self, data_infos: dict) -> List[dict]:
         """Convert loading annotations to Kitti annotations.
 
         Args:
@@ -169,13 +172,13 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
             result['pred_instances'] = pred_2d
             sample_idx = data_sample['sample_idx']
             result['sample_idx'] = sample_idx
-        self.results.append(result)
+            self.results.append(result)
 
-    def compute_metrics(self, results: list) -> Dict[str, float]:
+    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
         """Compute the metrics from processed results.
 
         Args:
-            results (list): The processed results of the whole dataset.
+            results (List[dict]): The processed results of the whole dataset.
 
         Returns:
             Dict[str, float]: The computed metrics. The keys are the names of
@@ -220,25 +223,25 @@ def compute_metrics(self, results: list) -> Dict[str, float]:
         return metric_dict
 
     def kitti_evaluate(self,
-                       results_dict: List[dict],
+                       results_dict: dict,
                        gt_annos: List[dict],
-                       metric: str = None,
-                       classes: List[str] = None,
-                       logger: MMLogger = None) -> dict:
+                       metric: Optional[str] = None,
+                       classes: Optional[List[str]] = None,
+                       logger: Optional[MMLogger] = None) -> Dict[str, float]:
         """Evaluation in KITTI protocol.
 
         Args:
             results_dict (dict): Formatted results of the dataset.
-            gt_annos (list[dict]): Contain gt information of each sample.
+            gt_annos (List[dict]): Contain gt information of each sample.
             metric (str, optional): Metrics to be evaluated.
-                Default: None.
+                Defaults to None.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
             logger (MMLogger, optional): Logger used for printing
-                related information during evaluation. Default: None.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+                related information during evaluation. Defaults to None.
 
         Returns:
-            dict[str, float]: Results of each evaluation metric.
+            Dict[str, float]: Results of each evaluation metric.
         """
         ap_dict = dict()
         for name in results_dict:
@@ -249,37 +252,38 @@ def kitti_evaluate(self,
             ap_result_str, ap_dict_ = kitti_eval(
                 gt_annos, results_dict[name], classes, eval_types=eval_types)
             for ap_type, ap in ap_dict_.items():
-                ap_dict[f'{name}/{ap_type}'] = float('{:.4f}'.format(ap))
+                ap_dict[f'{name}/{ap_type}'] = float(f'{ap:.4f}')
 
             print_log(f'Results of {name}:\n' + ap_result_str, logger=logger)
 
         return ap_dict
 
-    def format_results(self,
-                       results: List[dict],
-                       pklfile_prefix: str = None,
-                       submission_prefix: str = None,
-                       classes: List[str] = None):
+    def format_results(
+        self,
+        results: List[dict],
+        pklfile_prefix: Optional[str] = None,
+        submission_prefix: Optional[str] = None,
+        classes: Optional[List[str]] = None
+    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
         """Format the results to pkl file.
 
         Args:
-            results (list[dict]): Testing results of the
-                dataset.
+            results (List[dict]): Testing results of the dataset.
             pklfile_prefix (str, optional): The prefix of pkl files. It
                 includes the file path and the prefix of filename, e.g.,
                 "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
             submission_prefix (str, optional): The prefix of submitted files.
                 It includes the file path and the prefix of filename, e.g.,
                 "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+                Defaults to None.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
 
         Returns:
             tuple: (result_dict, tmp_dir), result_dict is a dict containing
-                the formatted result, tmp_dir is the temporal directory created
-                for saving json files when jsonfile_prefix is not specified.
+            the formatted result, tmp_dir is the temporal directory created
+            for saving json files when jsonfile_prefix is not specified.
         """
         if pklfile_prefix is None:
             tmp_dir = tempfile.TemporaryDirectory()
@@ -287,7 +291,7 @@ def format_results(self,
         else:
             tmp_dir = None
         result_dict = dict()
-        sample_id_list = [result['sample_idx'] for result in results]
+        sample_idx_list = [result['sample_idx'] for result in results]
         for name in results[0]:
             if submission_prefix is not None:
                 submission_prefix_ = osp.join(submission_prefix, name)
@@ -301,7 +305,7 @@ def format_results(self,
                     0] != '_' and results[0][name]:
                 net_outputs = [result[name] for result in results]
                 result_list_ = self.bbox2result_kitti(net_outputs,
-                                                      sample_id_list, classes,
+                                                      sample_idx_list, classes,
                                                       pklfile_prefix_,
                                                       submission_prefix_)
                 result_dict[name] = result_list_
@@ -309,32 +313,33 @@ def format_results(self,
                     name]:
                 net_outputs = [result[name] for result in results]
                 result_list_ = self.bbox2result_kitti2d(
-                    net_outputs, sample_id_list, classes, pklfile_prefix_,
+                    net_outputs, sample_idx_list, classes, pklfile_prefix_,
                     submission_prefix_)
                 result_dict[name] = result_list_
         return result_dict, tmp_dir
 
-    def bbox2result_kitti(self,
-                          net_outputs: list,
-                          sample_id_list: list,
-                          class_names: list,
-                          pklfile_prefix: str = None,
-                          submission_prefix: str = None):
+    def bbox2result_kitti(
+            self,
+            net_outputs: List[dict],
+            sample_idx_list: List[int],
+            class_names: List[str],
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
         """Convert 3D detection results to kitti format for evaluation and test
         submission.
 
         Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                 inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
-            class_names (list[String]): A list of class names.
+            sample_idx_list (List[int]): List of input sample idx.
+            class_names (List[str]): A list of class names.
             pklfile_prefix (str, optional): The prefix of pkl file.
                 Defaults to None.
             submission_prefix (str, optional): The prefix of submission file.
                 Defaults to None.
 
         Returns:
-            list[dict]: A list of dictionaries with the kitti format.
+            List[dict]: A list of dictionaries with the kitti format.
         """
         assert len(net_outputs) == len(self.data_infos), \
             'invalid list length of network outputs'
@@ -345,8 +350,7 @@ def bbox2result_kitti(self,
         print('\nConverting 3D prediction to KITTI format')
         for idx, pred_dicts in enumerate(
                 mmengine.track_iter_progress(net_outputs)):
-            annos = []
-            sample_idx = sample_id_list[idx]
+            sample_idx = sample_idx_list[idx]
             info = self.data_infos[sample_idx]
             # Here default used 'CAM2' to compute metric. If you want to
             # use another camera, please modify it.
@@ -393,7 +397,6 @@ def bbox2result_kitti(self,
                     anno['score'].append(score)
 
                 anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)
             else:
                 anno = {
                     'name': np.array([]),
@@ -406,7 +409,6 @@ def bbox2result_kitti(self,
                     'rotation_y': np.array([]),
                     'score': np.array([]),
                 }
-                annos.append(anno)
 
             if submission_prefix is not None:
                 curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
@@ -428,10 +430,10 @@ def bbox2result_kitti(self,
                                 anno['score'][idx]),
                             file=f)
 
-            annos[-1]['sample_id'] = np.array(
-                [sample_idx] * len(annos[-1]['score']), dtype=np.int64)
+            anno['sample_idx'] = np.array(
+                [sample_idx] * len(anno['score']), dtype=np.int64)
 
-            det_annos += annos
+            det_annos.append(anno)
 
         if pklfile_prefix is not None:
             if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -443,27 +445,28 @@ def bbox2result_kitti(self,
 
         return det_annos
 
-    def bbox2result_kitti2d(self,
-                            net_outputs: list,
-                            sample_id_list,
-                            class_names: list,
-                            pklfile_prefix: str = None,
-                            submission_prefix: str = None):
+    def bbox2result_kitti2d(
+            self,
+            net_outputs: List[dict],
+            sample_idx_list: List[int],
+            class_names: List[str],
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
         """Convert 2D detection results to kitti format for evaluation and test
         submission.
 
         Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                 inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
-            class_names (list[String]): A list of class names.
+            sample_idx_list (List[int]): List of input sample idx.
+            class_names (List[str]): A list of class names.
             pklfile_prefix (str, optional): The prefix of pkl file.
                 Defaults to None.
             submission_prefix (str, optional): The prefix of submission file.
                 Defaults to None.
 
         Returns:
-            list[dict]: A list of dictionaries have the kitti format
+            List[dict]: A list of dictionaries with the kitti format.
         """
         assert len(net_outputs) == len(self.data_infos), \
             'invalid list length of network outputs'
@@ -471,7 +474,6 @@ def bbox2result_kitti2d(self,
         print('\nConverting 2D prediction to KITTI format')
         for i, bboxes_per_sample in enumerate(
                 mmengine.track_iter_progress(net_outputs)):
-            annos = []
             anno = dict(
                 name=[],
                 truncated=[],
@@ -482,7 +484,7 @@ def bbox2result_kitti2d(self,
                 location=[],
                 rotation_y=[],
                 score=[])
-            sample_idx = sample_id_list[i]
+            sample_idx = sample_idx_list[i]
 
             num_example = 0
             bbox = bboxes_per_sample['bboxes']
@@ -504,25 +506,23 @@ def bbox2result_kitti2d(self,
                 num_example += 1
 
             if num_example == 0:
-                annos.append(
-                    dict(
-                        name=np.array([]),
-                        truncated=np.array([]),
-                        occluded=np.array([]),
-                        alpha=np.array([]),
-                        bbox=np.zeros([0, 4]),
-                        dimensions=np.zeros([0, 3]),
-                        location=np.zeros([0, 3]),
-                        rotation_y=np.array([]),
-                        score=np.array([]),
-                    ))
+                anno = dict(
+                    name=np.array([]),
+                    truncated=np.array([]),
+                    occluded=np.array([]),
+                    alpha=np.array([]),
+                    bbox=np.zeros([0, 4]),
+                    dimensions=np.zeros([0, 3]),
+                    location=np.zeros([0, 3]),
+                    rotation_y=np.array([]),
+                    score=np.array([]),
+                )
             else:
                 anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)
 
-            annos[-1]['sample_id'] = np.array(
+            anno['sample_idx'] = np.array(
                 [sample_idx] * num_example, dtype=np.int64)
-            det_annos += annos
+            det_annos.append(anno)
 
         if pklfile_prefix is not None:
             if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -537,7 +537,7 @@ def bbox2result_kitti2d(self,
             mmengine.mkdir_or_exist(submission_prefix)
             print(f'Saving KITTI submission to {submission_prefix}')
             for i, anno in enumerate(det_annos):
-                sample_idx = sample_id_list[i]
+                sample_idx = sample_idx_list[i]
                 cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
                 with open(cur_det_file, 'w') as f:
                     bbox = anno['bbox']
@@ -560,15 +560,15 @@ def bbox2result_kitti2d(self,
 
         return det_annos
 
-    def convert_valid_bboxes(self, box_dict: dict, info: dict):
+    def convert_valid_bboxes(self, box_dict: dict, info: dict) -> dict:
         """Convert the predicted boxes into valid ones.
 
         Args:
             box_dict (dict): Box dictionaries to be converted.
 
-                - boxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
-                - scores_3d (torch.Tensor): Scores of boxes.
-                - labels_3d (torch.Tensor): Class labels of boxes.
+                - bboxes_3d (:obj:`BaseInstance3DBoxes`): 3D bounding boxes.
+                - scores_3d (Tensor): Scores of boxes.
+                - labels_3d (Tensor): Class labels of boxes.
             info (dict): Data info.
 
         Returns:
@@ -576,9 +576,9 @@ def convert_valid_bboxes(self, box_dict: dict, info: dict):
 
                 - bbox (np.ndarray): 2D bounding boxes.
                 - box3d_camera (np.ndarray): 3D bounding boxes in
-                    camera coordinate.
+                  camera coordinate.
                 - box3d_lidar (np.ndarray): 3D bounding boxes in
-                    LiDAR coordinate.
+                  LiDAR coordinate.
                 - scores (np.ndarray): Scores of boxes.
                 - label_preds (np.ndarray): Class label predictions.
                 - sample_idx (int): Sample index.
@@ -654,5 +654,5 @@ def convert_valid_bboxes(self, box_dict: dict, info: dict):
                 box3d_camera=np.zeros([0, 7]),
                 box3d_lidar=np.zeros([0, 7]),
                 scores=np.zeros([0]),
-                label_preds=np.zeros([0, 4]),
+                label_preds=np.zeros([0]),
                 sample_idx=sample_idx)
diff --git a/mmdet3d/evaluation/metrics/nuscenes_metric.py b/mmdet3d/evaluation/metrics/nuscenes_metric.py
index c63aea43fa..77a95356f6 100644
--- a/mmdet3d/evaluation/metrics/nuscenes_metric.py
+++ b/mmdet3d/evaluation/metrics/nuscenes_metric.py
@@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import logging
 import tempfile
 from os import path as osp
 from typing import Dict, List, Optional, Sequence, Tuple, Union
@@ -28,22 +27,29 @@ class NuScenesMetric(BaseMetric):
     Args:
         data_root (str): Path of dataset root.
         ann_file (str): Path of annotation file.
-        metric (str | list[str]): Metrics to be evaluated.
-            Default to 'bbox'.
+        metric (str or List[str]): Metrics to be evaluated.
+            Defaults to 'bbox'.
         modality (dict): Modality to specify the sensor data used
             as input. Defaults to dict(use_camera=False, use_lidar=True).
         prefix (str, optional): The prefix that will be added in the metric
             names to disambiguate homonymous metrics of different evaluators.
             If prefix is not provided in the argument, self.default_prefix
             will be used instead. Defaults to None.
+        format_only (bool): Format the output results without perform
+            evaluation. It is useful when you want to format the result
+            to a specific format and submit it to the test server.
+            Defaults to False.
         jsonfile_prefix (str, optional): The prefix of json files including
             the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
+            If not specified, a temp file will be created. Defaults to None.
         eval_version (str): Configuration version of evaluation.
-            Defaults to  'detection_cvpr_2019'.
+            Defaults to 'detection_cvpr_2019'.
         collect_device (str): Device name used for collecting results
             from different ranks during distributed training. Must be 'cpu' or
             'gpu'. Defaults to 'cpu'.
+        file_client_args (dict): Arguments to instantiate a FileClient.
+            See :class:`mmengine.fileio.FileClient` for details.
+            Defaults to dict(backend='disk').
     """
     NameMapping = {
         'movable_object.barrier': 'barrier',
@@ -87,8 +93,9 @@ def __init__(
         data_root: str,
         ann_file: str,
         metric: Union[str, List[str]] = 'bbox',
-        modality: Dict = dict(use_camera=False, use_lidar=True),
+        modality: dict = dict(use_camera=False, use_lidar=True),
         prefix: Optional[str] = None,
+        format_only: bool = False,
         jsonfile_prefix: Optional[str] = None,
         eval_version: str = 'detection_cvpr_2019',
         collect_device: str = 'cpu',
@@ -105,6 +112,13 @@ def __init__(
         self.ann_file = ann_file
         self.data_root = data_root
         self.modality = modality
+        self.format_only = format_only
+        if self.format_only:
+            assert jsonfile_prefix is not None, 'jsonfile_prefix must be '
+            'not None when format_only is True, otherwise the result files '
+            'will be saved to a temp directory which will be cleanup at '
+            'the end.'
+
         self.jsonfile_prefix = jsonfile_prefix
         self.file_client_args = file_client_args
 
@@ -137,13 +151,13 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
             result['pred_instances'] = pred_2d
             sample_idx = data_sample['sample_idx']
             result['sample_idx'] = sample_idx
-        self.results.append(result)
+            self.results.append(result)
 
-    def compute_metrics(self, results: list) -> Dict[str, float]:
+    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
         """Compute the metrics from processed results.
 
         Args:
-            results (list): The processed results of each batch.
+            results (List[dict]): The processed results of each batch.
 
         Returns:
             Dict[str, float]: The computed metrics. The keys are the names of
@@ -160,6 +174,12 @@ def compute_metrics(self, results: list) -> Dict[str, float]:
                                                    self.jsonfile_prefix)
 
         metric_dict = {}
+
+        if self.format_only:
+            logger.info('results are saved in '
+                        f'{osp.basename(self.jsonfile_prefix)}')
+            return metric_dict
+
         for metric in self.metrics:
             ap_dict = self.nus_evaluate(
                 result_dict, classes=classes, metric=metric, logger=logger)
@@ -173,21 +193,20 @@ def compute_metrics(self, results: list) -> Dict[str, float]:
     def nus_evaluate(self,
                      result_dict: dict,
                      metric: str = 'bbox',
-                     classes: List[str] = None,
-                     logger: logging.Logger = None) -> dict:
+                     classes: Optional[List[str]] = None,
+                     logger: Optional[MMLogger] = None) -> Dict[str, float]:
         """Evaluation in Nuscenes protocol.
 
         Args:
             result_dict (dict): Formatted results of the dataset.
-            metric (str): Metrics to be evaluated.
-                Default: None.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+            metric (str): Metrics to be evaluated. Defaults to 'bbox'.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
             logger (MMLogger, optional): Logger used for printing
-                related information during evaluation. Default: None.
+                related information during evaluation. Defaults to None.
 
         Returns:
-            dict[str, float]: Results of each evaluation metric.
+            Dict[str, float]: Results of each evaluation metric.
         """
         metric_dict = dict()
         for name in result_dict:
@@ -197,22 +216,22 @@ def nus_evaluate(self,
         metric_dict.update(ret_dict)
         return metric_dict
 
-    def _evaluate_single(self,
-                         result_path: str,
-                         classes: List[None] = None,
-                         result_name: str = 'pred_instances_3d') -> dict:
+    def _evaluate_single(
+            self,
+            result_path: str,
+            classes: Optional[List[str]] = None,
+            result_name: str = 'pred_instances_3d') -> Dict[str, float]:
         """Evaluation for a single model in nuScenes protocol.
 
         Args:
             result_path (str): Path of the result file.
-                Default: 'bbox'.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
             result_name (str): Result name in the metric prefix.
-                Default: 'pred_instances_3d'.
+                Defaults to 'pred_instances_3d'.
 
         Returns:
-            dict: Dictionary of evaluation details.
+            Dict[str, float]: Dictionary of evaluation details.
         """
         from nuscenes import NuScenes
         from nuscenes.eval.detection.evaluate import NuScenesEval
@@ -239,39 +258,41 @@ def _evaluate_single(self,
         metric_prefix = f'{result_name}_NuScenes'
         for name in classes:
             for k, v in metrics['label_aps'][name].items():
-                val = float('{:.4f}'.format(v))
+                val = float(f'{v:.4f}')
                 detail[f'{metric_prefix}/{name}_AP_dist_{k}'] = val
             for k, v in metrics['label_tp_errors'][name].items():
-                val = float('{:.4f}'.format(v))
+                val = float(f'{v:.4f}')
                 detail[f'{metric_prefix}/{name}_{k}'] = val
             for k, v in metrics['tp_errors'].items():
-                val = float('{:.4f}'.format(v))
+                val = float(f'{v:.4f}')
                 detail[f'{metric_prefix}/{self.ErrNameMapping[k]}'] = val
 
         detail[f'{metric_prefix}/NDS'] = metrics['nd_score']
         detail[f'{metric_prefix}/mAP'] = metrics['mean_ap']
         return detail
 
-    def format_results(self,
-                       results: List[dict],
-                       classes: List[str] = None,
-                       jsonfile_prefix: str = None) -> Tuple:
+    def format_results(
+        self,
+        results: List[dict],
+        classes: Optional[List[str]] = None,
+        jsonfile_prefix: Optional[str] = None
+    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
         """Format the mmdet3d results to standard NuScenes json file.
 
         Args:
-            results (list[dict]): Testing results of the dataset.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+            results (List[dict]): Testing results of the dataset.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
             jsonfile_prefix (str, optional): The prefix of json files. It
                 includes the file path and the prefix of filename, e.g.,
                 "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
 
         Returns:
             tuple: Returns (result_dict, tmp_dir), where `result_dict` is a
-                dict containing the json filepaths, `tmp_dir` is the temporal
-                directory created for saving json files when
-                `jsonfile_prefix` is not specified.
+            dict containing the json filepaths, `tmp_dir` is the temporal
+            directory created for saving json files when
+            `jsonfile_prefix` is not specified.
         """
         assert isinstance(results, list), 'results must be a list'
 
@@ -281,7 +302,7 @@ def format_results(self,
         else:
             tmp_dir = None
         result_dict = dict()
-        sample_id_list = [result['sample_idx'] for result in results]
+        sample_idx_list = [result['sample_idx'] for result in results]
 
         for name in results[0]:
             if 'pred' in name and '3d' in name and name[0] != '_':
@@ -291,14 +312,14 @@ def format_results(self,
                 box_type_3d = type(results_[0]['bboxes_3d'])
                 if box_type_3d == LiDARInstance3DBoxes:
                     result_dict[name] = self._format_lidar_bbox(
-                        results_, sample_id_list, classes, tmp_file_)
+                        results_, sample_idx_list, classes, tmp_file_)
                 elif box_type_3d == CameraInstance3DBoxes:
                     result_dict[name] = self._format_camera_bbox(
-                        results_, sample_id_list, classes, tmp_file_)
+                        results_, sample_idx_list, classes, tmp_file_)
 
         return result_dict, tmp_dir
 
-    def get_attr_name(self, attr_idx, label_name):
+    def get_attr_name(self, attr_idx: int, label_name: str) -> str:
         """Get attribute from predicted index.
 
         This is a workaround to predict attribute when the predicted velocity
@@ -347,16 +368,19 @@ def get_attr_name(self, attr_idx, label_name):
 
     def _format_camera_bbox(self,
                             results: List[dict],
-                            sample_id_list: List[int],
-                            classes: List[str] = None,
-                            jsonfile_prefix: str = None) -> str:
+                            sample_idx_list: List[int],
+                            classes: Optional[List[str]] = None,
+                            jsonfile_prefix: Optional[str] = None) -> str:
         """Convert the results to the standard format.
 
         Args:
-            results (list[dict]): Testing results of the dataset.
-            jsonfile_prefix (str): The prefix of the output jsonfile.
+            results (List[dict]): Testing results of the dataset.
+            sample_idx_list (List[int]): List of result sample idx.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
+            jsonfile_prefix (str, optional): The prefix of the output jsonfile.
                 You can specify the output directory/filename by
-                modifying the jsonfile_prefix. Default: None.
+                modifying the jsonfile_prefix. Defaults to None.
 
         Returns:
             str: Path of the output json file.
@@ -379,10 +403,10 @@ def _format_camera_bbox(self,
 
         for i, det in enumerate(mmengine.track_iter_progress(results)):
 
-            sample_id = sample_id_list[i]
+            sample_idx = sample_idx_list[i]
 
-            frame_sample_id = sample_id // CAM_NUM
-            camera_type_id = sample_id % CAM_NUM
+            frame_sample_idx = sample_idx // CAM_NUM
+            camera_type_id = sample_idx % CAM_NUM
 
             if camera_type_id == 0:
                 boxes_per_frame = []
@@ -391,17 +415,17 @@ def _format_camera_bbox(self,
             # need to merge results from images of the same sample
             annos = []
             boxes, attrs = output_to_nusc_box(det)
-            sample_token = self.data_infos[frame_sample_id]['token']
+            sample_token = self.data_infos[frame_sample_idx]['token']
             camera_type = camera_types[camera_type_id]
             boxes, attrs = cam_nusc_box_to_global(
-                self.data_infos[frame_sample_id], boxes, attrs, classes,
+                self.data_infos[frame_sample_idx], boxes, attrs, classes,
                 self.eval_detection_configs, camera_type)
             boxes_per_frame.extend(boxes)
             attrs_per_frame.extend(attrs)
             # Remove redundant predictions caused by overlap of images
-            if (sample_id + 1) % CAM_NUM != 0:
+            if (sample_idx + 1) % CAM_NUM != 0:
                 continue
-            boxes = global_nusc_box_to_cam(self.data_infos[frame_sample_id],
+            boxes = global_nusc_box_to_cam(self.data_infos[frame_sample_idx],
                                            boxes_per_frame, classes,
                                            self.eval_detection_configs)
             cam_boxes3d, scores, labels = nusc_box_to_cam_box3d(boxes)
@@ -432,7 +456,7 @@ def _format_camera_bbox(self,
             det = bbox3d2result(cam_boxes3d, scores, labels, attrs)
             boxes, attrs = output_to_nusc_box(det)
             boxes, attrs = cam_nusc_box_to_global(
-                self.data_infos[frame_sample_id], boxes, attrs, classes,
+                self.data_infos[frame_sample_idx], boxes, attrs, classes,
                 self.eval_detection_configs)
 
             for i, box in enumerate(boxes):
@@ -461,25 +485,25 @@ def _format_camera_bbox(self,
 
         mmengine.mkdir_or_exist(jsonfile_prefix)
         res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
-        print('Results writes to', res_path)
+        print(f'Results writes to {res_path}')
         mmengine.dump(nusc_submissions, res_path)
         return res_path
 
     def _format_lidar_bbox(self,
                            results: List[dict],
-                           sample_id_list: List[int],
-                           classes: List[str] = None,
-                           jsonfile_prefix: str = None) -> str:
+                           sample_idx_list: List[int],
+                           classes: Optional[List[str]] = None,
+                           jsonfile_prefix: Optional[str] = None) -> str:
         """Convert the results to the standard format.
 
         Args:
-            results (list[dict]): Testing results of the dataset.
-            sample_id_list (list[int]): List of result sample id.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+            results (List[dict]): Testing results of the dataset.
+            sample_idx_list (List[int]): List of result sample idx.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
             jsonfile_prefix (str, optional): The prefix of the output jsonfile.
                 You can specify the output directory/filename by
-                modifying the jsonfile_prefix. Default: None.
+                modifying the jsonfile_prefix. Defaults to None.
 
         Returns:
             str: Path of the output json file.
@@ -490,10 +514,10 @@ def _format_lidar_bbox(self,
         for i, det in enumerate(mmengine.track_iter_progress(results)):
             annos = []
             boxes, attrs = output_to_nusc_box(det)
-            sample_id = sample_id_list[i]
-            sample_token = self.data_infos[sample_id]['token']
-            boxes = lidar_nusc_box_to_global(self.data_infos[sample_id], boxes,
-                                             classes,
+            sample_idx = sample_idx_list[i]
+            sample_token = self.data_infos[sample_idx]['token']
+            boxes = lidar_nusc_box_to_global(self.data_infos[sample_idx],
+                                             boxes, classes,
                                              self.eval_detection_configs)
             for i, box in enumerate(boxes):
                 name = classes[box.label]
@@ -535,12 +559,13 @@ def _format_lidar_bbox(self,
         }
         mmengine.mkdir_or_exist(jsonfile_prefix)
         res_path = osp.join(jsonfile_prefix, 'results_nusc.json')
-        print('Results writes to', res_path)
+        print(f'Results writes to {res_path}')
         mmengine.dump(nusc_submissions, res_path)
         return res_path
 
 
-def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
+def output_to_nusc_box(
+        detection: dict) -> Tuple[List[NuScenesBox], Union[np.ndarray, None]]:
     """Convert the output to the box class in the nuScenes.
 
     Args:
@@ -551,7 +576,8 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
             - labels_3d (torch.Tensor): Predicted box labels.
 
     Returns:
-        list[:obj:`NuScenesBox`]: List of standard NuScenesBoxes.
+        Tuple[List[:obj:`NuScenesBox`], np.ndarray or None]:
+        List of standard NuScenesBoxes and attribute labels.
     """
     bbox3d = detection['bboxes_3d']
     scores = detection['scores_3d'].numpy()
@@ -566,7 +592,7 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
 
     box_list = []
 
-    if type(bbox3d) == LiDARInstance3DBoxes:
+    if isinstance(bbox3d, LiDARInstance3DBoxes):
         # our LiDAR coordinate system -> nuScenes box coordinate system
         nus_box_dims = box_dims[:, [1, 0, 2]]
         for i in range(len(bbox3d)):
@@ -584,7 +610,7 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
                 score=scores[i],
                 velocity=velocity)
             box_list.append(box)
-    elif type(bbox3d) == CameraInstance3DBoxes:
+    elif isinstance(bbox3d, CameraInstance3DBoxes):
         # our Camera coordinate system -> nuScenes box coordinate system
         # convert the dim/rot to nuscbox convention
         nus_box_dims = box_dims[:, [2, 0, 1]]
@@ -605,7 +631,7 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
             box_list.append(box)
     else:
         raise NotImplementedError(
-            f'Do not support convert {type(bbox3d)} bboxes'
+            f'Do not support convert {type(bbox3d)} bboxes '
             'to standard NuScenesBoxes.')
 
     return box_list, attrs
@@ -619,13 +645,13 @@ def lidar_nusc_box_to_global(
     Args:
         info (dict): Info for a specific sample data, including the
             calibration information.
-        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
-        classes (list[str]): Mapped classes in the evaluation.
-        eval_configs (object): Evaluation configuration object.
+        boxes (List[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        classes (List[str]): Mapped classes in the evaluation.
+        eval_configs (:obj:`DetectionConfig`): Evaluation configuration object.
 
     Returns:
-        list: List of standard NuScenesBoxes in the global
-            coordinate.
+        List[:obj:`DetectionConfig`]: List of standard NuScenesBoxes in the
+        global coordinate.
     """
     box_list = []
     for box in boxes:
@@ -652,25 +678,26 @@ def lidar_nusc_box_to_global(
 def cam_nusc_box_to_global(
     info: dict,
     boxes: List[NuScenesBox],
-    attrs: List[str],
+    attrs: np.ndarray,
     classes: List[str],
     eval_configs: DetectionConfig,
     camera_type: str = 'CAM_FRONT',
-) -> List[NuScenesBox]:
+) -> Tuple[List[NuScenesBox], List[int]]:
     """Convert the box from camera to global coordinate.
 
     Args:
         info (dict): Info for a specific sample data, including the
             calibration information.
-        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
-        attrs (list[str]): List of attributes.
-        camera_type (str): Type of camera.
-        classes (list[str]): Mapped classes in the evaluation.
-        eval_configs (object): Evaluation configuration object.
+        boxes (List[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        attrs (np.ndarray): Predicted attributes.
+        classes (List[str]): Mapped classes in the evaluation.
+        eval_configs (:obj:`DetectionConfig`): Evaluation configuration object.
+        camera_type (str): Type of camera. Defaults to 'CAM_FRONT'.
 
     Returns:
-        list: List of standard NuScenesBoxes in the global
-            coordinate.
+        Tuple[List[:obj:`NuScenesBox`], List[int]]:
+        List of standard NuScenesBoxes in the global coordinate and
+        attribute label.
     """
     box_list = []
     attr_list = []
@@ -704,13 +731,13 @@ def global_nusc_box_to_cam(info: dict, boxes: List[NuScenesBox],
     Args:
         info (dict): Info for a specific sample data, including the
             calibration information.
-        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
-        classes (list[str]): Mapped classes in the evaluation.
-        eval_configs (object): Evaluation configuration object.
+        boxes (List[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        classes (List[str]): Mapped classes in the evaluation.
+        eval_configs (:obj:`DetectionConfig`): Evaluation configuration object.
 
     Returns:
-        list: List of standard NuScenesBoxes in the global
-            coordinate.
+        List[:obj:`NuScenesBox`]: List of standard NuScenesBoxes in
+        camera coordinate.
     """
     box_list = []
     for box in boxes:
@@ -736,15 +763,17 @@ def global_nusc_box_to_cam(info: dict, boxes: List[NuScenesBox],
     return box_list
 
 
-def nusc_box_to_cam_box3d(boxes: List[NuScenesBox]):
+def nusc_box_to_cam_box3d(
+    boxes: List[NuScenesBox]
+) -> Tuple[CameraInstance3DBoxes, torch.Tensor, torch.Tensor]:
     """Convert boxes from :obj:`NuScenesBox` to :obj:`CameraInstance3DBoxes`.
 
     Args:
-        boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
+        boxes (:obj:`List[NuScenesBox]`): List of predicted NuScenesBoxes.
 
     Returns:
-        tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):
-            Converted 3D bounding boxes, scores and labels.
+        Tuple[:obj:`CameraInstance3DBoxes`, torch.Tensor, torch.Tensor]:
+        Converted 3D bounding boxes, scores and labels.
     """
     locs = torch.Tensor([b.center for b in boxes]).view(-1, 3)
     dims = torch.Tensor([b.wlh for b in boxes]).view(-1, 3)
diff --git a/mmdet3d/evaluation/metrics/waymo_metric.py b/mmdet3d/evaluation/metrics/waymo_metric.py
index 272b8cb0b2..5fae87412b 100644
--- a/mmdet3d/evaluation/metrics/waymo_metric.py
+++ b/mmdet3d/evaluation/metrics/waymo_metric.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import tempfile
 from os import path as osp
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 import mmengine
 import numpy as np
@@ -25,50 +25,56 @@ class WaymoMetric(KittiMetric):
         ann_file (str): The path of the annotation file in kitti format.
         waymo_bin_file (str): The path of the annotation file in waymo format.
         data_root (str): Path of dataset root.
-                         Used for storing waymo evaluation programs.
-        split (str): The split of the evaluation set.
-        metric (str | list[str]): Metrics to be evaluated.
-            Default to 'mAP'.
-        pcd_limit_range (list): The range of point cloud used to
+            Used for storing waymo evaluation programs.
+        split (str): The split of the evaluation set. Defaults to 'training'.
+        metric (str or List[str]): Metrics to be evaluated.
+            Defaults to 'mAP'.
+        pcd_limit_range (List[float]): The range of point cloud used to
             filter invalid predicted boxes.
-            Default to [0, -40, -3, 70.4, 40, 0.0].
+            Defaults to [-85, -85, -5, 85, 85, 5].
+        convert_kitti_format (bool): Whether to convert the results to
+            kitti format. Now, in order to be compatible with camera-based
+            methods, defaults to True.
         prefix (str, optional): The prefix that will be added in the metric
             names to disambiguate homonymous metrics of different evaluators.
             If prefix is not provided in the argument, self.default_prefix
             will be used instead. Defaults to None.
-        convert_kitti_format (bool, optional): Whether convert the reuslts to
-            kitti format. Now, in order to be compatible with camera-based
-            methods, defaults to True.
+        format_only (bool): Format the output results without perform
+            evaluation. It is useful when you want to format the result
+            to a specific format and submit it to the test server.
+            Defaults to False.
         pklfile_prefix (str, optional): The prefix of pkl files, including
             the file path and the prefix of filename, e.g., "a/b/prefix".
-            If not specified, a temp file will be created. Default: None.
+            If not specified, a temp file will be created. Defaults to None.
         submission_prefix (str, optional): The prefix of submission data.
             If not specified, the submission data will not be generated.
-            Default: None.
-        load_type (str, optional): Type of loading mode during training.
+            Defaults to None.
+        load_type (str): Type of loading mode during training.
 
             - 'frame_based': Load all of the instances in the frame.
             - 'mv_image_based': Load all of the instances in the frame and need
-                to convert to the FOV-based data type to support image-based
-                detector.
-            - 'fov_image_base': Only load the instances inside the default cam,
-                and need to convert to the FOV-based data type to support
-                image-based detector.
-        default_cam_key (str, optional): The default camera for lidar to
-            camear conversion. By default, KITTI: CAM2, Waymo: CAM_FRONT
-        use_pred_sample_idx (bool, optional): In formating results, use the
-            sample index from the prediction or from the load annoataitons.
+              to convert to the FOV-based data type to support image-based
+              detector.
+            - 'fov_image_based': Only load the instances inside the default
+              cam, and need to convert to the FOV-based data type to support
+              image-based detector.
+        default_cam_key (str): The default camera for lidar to camera
+            conversion. By default, KITTI: 'CAM2', Waymo: 'CAM_FRONT'.
+            Defaults to 'CAM_FRONT'.
+        use_pred_sample_idx (bool): In formating results, use the
+            sample index from the prediction or from the load annotations.
             By default, KITTI: True, Waymo: False, Waymo has a conversion
-            process, which needs to use the sample id from load annotation.
+            process, which needs to use the sample idx from load annotation.
+            Defaults to False.
         collect_device (str): Device name used for collecting results
             from different ranks during distributed training. Must be 'cpu' or
             'gpu'. Defaults to 'cpu'.
-        file_client_args (dict): file client for reading gt in waymo format.
+        file_client_args (dict): File client for reading gt in waymo format.
             Defaults to ``dict(backend='disk')``.
-        idx2metainfo (Optional[str], optional): The file path of the metainfo
-            in waymmo. It stores the mapping from sample_idx to metainfo.
-            The metainfo must contain the keys: 'idx2contextname' and
-            'idx2timestamp'. Defaults to None.
+        idx2metainfo (str, optional): The file path of the metainfo in waymo.
+            It stores the mapping from sample_idx to metainfo. The metainfo
+            must contain the keys: 'idx2contextname' and 'idx2timestamp'.
+            Defaults to None.
     """
     num_cams = 5
 
@@ -81,14 +87,15 @@ def __init__(self,
                  pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5],
                  convert_kitti_format: bool = True,
                  prefix: Optional[str] = None,
-                 pklfile_prefix: str = None,
-                 submission_prefix: str = None,
+                 format_only: bool = False,
+                 pklfile_prefix: Optional[str] = None,
+                 submission_prefix: Optional[str] = None,
                  load_type: str = 'frame_based',
                  default_cam_key: str = 'CAM_FRONT',
                  use_pred_sample_idx: bool = False,
                  collect_device: str = 'cpu',
                  file_client_args: dict = dict(backend='disk'),
-                 idx2metainfo: Optional[str] = None):
+                 idx2metainfo: Optional[str] = None) -> None:
         self.waymo_bin_file = waymo_bin_file
         self.data_root = data_root
         self.split = split
@@ -101,7 +108,7 @@ def __init__(self,
         else:
             self.idx2metainfo = None
 
-        super().__init__(
+        super(WaymoMetric, self).__init__(
             ann_file=ann_file,
             metric=metric,
             pcd_limit_range=pcd_limit_range,
@@ -111,13 +118,20 @@ def __init__(self,
             default_cam_key=default_cam_key,
             collect_device=collect_device,
             file_client_args=file_client_args)
+        self.format_only = format_only
+        if self.format_only:
+            assert pklfile_prefix is not None, 'pklfile_prefix must be '
+            'not None when format_only is True, otherwise the result files '
+            'will be saved to a temp directory which will be cleaned up at '
+            'the end.'
+
         self.default_prefix = 'Waymo metric'
 
-    def compute_metrics(self, results: list) -> Dict[str, float]:
+    def compute_metrics(self, results: List[dict]) -> Dict[str, float]:
         """Compute the metrics from processed results.
 
         Args:
-            results (list): The processed results of the whole dataset.
+            results (List[dict]): The processed results of the whole dataset.
 
         Returns:
             Dict[str, float]: The computed metrics. The keys are the names of
@@ -155,7 +169,7 @@ def compute_metrics(self, results: list) -> Dict[str, float]:
                     if 'image_sweeps' in info:
                         camera_info['image_sweeps'] = info['image_sweeps']
 
-                    # TODO check if need to modify the sample id
+                    # TODO check if need to modify the sample idx
                     # TODO check when will use it except for evaluation.
                     camera_info['sample_idx'] = info['sample_idx']
                     new_data_infos.append(camera_info)
@@ -175,6 +189,12 @@ def compute_metrics(self, results: list) -> Dict[str, float]:
             classes=self.classes)
 
         metric_dict = {}
+
+        if self.format_only:
+            logger.info('results are saved in '
+                        f'{osp.dirname(self.pklfile_prefix)}')
+            return metric_dict
+
         for metric in self.metrics:
             ap_dict = self.waymo_evaluate(
                 pklfile_prefix, metric=metric, logger=logger)
@@ -188,19 +208,19 @@ def compute_metrics(self, results: list) -> Dict[str, float]:
 
     def waymo_evaluate(self,
                        pklfile_prefix: str,
-                       metric: str = None,
-                       logger: MMLogger = None) -> dict:
+                       metric: Optional[str] = None,
+                       logger: Optional[MMLogger] = None) -> Dict[str, float]:
         """Evaluation in Waymo protocol.
 
         Args:
             pklfile_prefix (str): The location that stored the prediction
                 results.
-            metric (str): Metric to be evaluated. Defaults to None.
+            metric (str, optional): Metric to be evaluated. Defaults to None.
             logger (MMLogger, optional): Logger used for printing
-                related information during evaluation. Default: None.
+                related information during evaluation. Defaults to None.
 
         Returns:
-            dict[str, float]: Results of each evaluation metric.
+            Dict[str, float]: Results of each evaluation metric.
         """
 
         import subprocess
@@ -238,8 +258,6 @@ def waymo_evaluate(self,
             }
             mAP_splits = ret_texts.split('mAP ')
             mAPH_splits = ret_texts.split('mAPH ')
-            mAP_splits = ret_texts.split('mAP ')
-            mAPH_splits = ret_texts.split('mAPH ')
             for idx, key in enumerate(ap_dict.keys()):
                 split_idx = int(idx / 2) + 1
                 if idx % 2 == 0:  # mAP
@@ -307,31 +325,32 @@ def waymo_evaluate(self,
                     ap_dict['Cyclist mAPH']) / 3
         return ap_dict
 
-    def format_results(self,
-                       results: List[dict],
-                       pklfile_prefix: str = None,
-                       submission_prefix: str = None,
-                       classes: List[str] = None):
+    def format_results(
+        self,
+        results: List[dict],
+        pklfile_prefix: Optional[str] = None,
+        submission_prefix: Optional[str] = None,
+        classes: Optional[List[str]] = None
+    ) -> Tuple[dict, Union[tempfile.TemporaryDirectory, None]]:
         """Format the results to bin file.
 
         Args:
-            results (list[dict]): Testing results of the
-                dataset.
+            results (List[dict]): Testing results of the dataset.
             pklfile_prefix (str, optional): The prefix of pkl files. It
                 includes the file path and the prefix of filename, e.g.,
                 "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
+                Defaults to None.
             submission_prefix (str, optional): The prefix of submitted files.
                 It includes the file path and the prefix of filename, e.g.,
                 "a/b/prefix". If not specified, a temp file will be created.
-                Default: None.
-            classes (list[String], optional): A list of class name. Defaults
-                to None.
+                Defaults to None.
+            classes (List[str], optional): A list of class name.
+                Defaults to None.
 
         Returns:
             tuple: (result_dict, tmp_dir), result_dict is a dict containing
-                the formatted result, tmp_dir is the temporal directory created
-                for saving json files when jsonfile_prefix is not specified.
+            the formatted result, tmp_dir is the temporal directory created
+            for saving json files when jsonfile_prefix is not specified.
         """
         waymo_save_tmp_dir = tempfile.TemporaryDirectory()
         waymo_results_save_dir = waymo_save_tmp_dir.name
@@ -378,15 +397,16 @@ def format_results(self,
         return final_results, waymo_save_tmp_dir
 
     def merge_multi_view_boxes(self, box_dict_per_frame: List[dict],
-                               cam0_info: dict):
+                               cam0_info: dict) -> dict:
         """Merge bounding boxes predicted from multi-view images.
+
         Args:
-            box_dict_per_frame (list[dict]): The results of prediction
+            box_dict_per_frame (List[dict]): The results of prediction
                 for each camera.
-            cam2_info (dict): store the sample id for the given frame.
+            cam0_info (dict): Store the sample idx for the given frame.
 
         Returns:
-            merged_box_dict (dict), store the merge results
+            dict: Merged results.
         """
         box_dict = dict()
         # convert list[dict] to dict[list]
@@ -444,27 +464,28 @@ def merge_multi_view_boxes(self, box_dict_per_frame: List[dict],
         )
         return merged_box_dict
 
-    def bbox2result_kitti(self,
-                          net_outputs: list,
-                          sample_id_list: list,
-                          class_names: list,
-                          pklfile_prefix: str = None,
-                          submission_prefix: str = None):
+    def bbox2result_kitti(
+            self,
+            net_outputs: List[dict],
+            sample_idx_list: List[int],
+            class_names: List[str],
+            pklfile_prefix: Optional[str] = None,
+            submission_prefix: Optional[str] = None) -> List[dict]:
         """Convert 3D detection results to kitti format for evaluation and test
         submission.
 
         Args:
-            net_outputs (list[dict]): List of array storing the
+            net_outputs (List[dict]): List of dict storing the
                 inferenced bounding boxes and scores.
-            sample_id_list (list[int]): List of input sample id.
-            class_names (list[String]): A list of class names.
+            sample_idx_list (List[int]): List of input sample idx.
+            class_names (List[str]): A list of class names.
             pklfile_prefix (str, optional): The prefix of pkl file.
                 Defaults to None.
             submission_prefix (str, optional): The prefix of submission file.
                 Defaults to None.
 
         Returns:
-            list[dict]: A list of dictionaries with the kitti format.
+            List[dict]: A list of dictionaries with the kitti format.
         """
         if submission_prefix is not None:
             mmengine.mkdir_or_exist(submission_prefix)
@@ -473,8 +494,7 @@ def bbox2result_kitti(self,
         print('\nConverting prediction to KITTI format')
         for idx, pred_dicts in enumerate(
                 mmengine.track_iter_progress(net_outputs)):
-            annos = []
-            sample_idx = sample_id_list[idx]
+            sample_idx = sample_idx_list[idx]
             info = self.data_infos[sample_idx]
 
             if self.load_type == 'mv_image_based':
@@ -536,7 +556,6 @@ def bbox2result_kitti(self,
                     anno['score'].append(score)
 
                 anno = {k: np.stack(v) for k, v in anno.items()}
-                annos.append(anno)
             else:
                 anno = {
                     'name': np.array([]),
@@ -549,7 +568,6 @@ def bbox2result_kitti(self,
                     'rotation_y': np.array([]),
                     'score': np.array([]),
                 }
-                annos.append(anno)
 
             if submission_prefix is not None:
                 curr_file = f'{submission_prefix}/{sample_idx:06d}.txt'
@@ -577,10 +595,10 @@ def bbox2result_kitti(self,
                 # In waymo validation sample_idx in prediction is 000xxx
                 # but in info file it is 1000xxx
                 save_sample_idx = box_dict['sample_idx']
-            annos[-1]['sample_idx'] = np.array(
-                [save_sample_idx] * len(annos[-1]['score']), dtype=np.int64)
+            anno['sample_idx'] = np.array(
+                [save_sample_idx] * len(anno['score']), dtype=np.int64)
 
-            det_annos += annos
+            det_annos.append(anno)
 
         if pklfile_prefix is not None:
             if not pklfile_prefix.endswith(('.pkl', '.pickle')):
@@ -592,16 +610,16 @@ def bbox2result_kitti(self,
 
         return det_annos
 
-    def convert_valid_bboxes(self, box_dict: dict, info: dict):
+    def convert_valid_bboxes(self, box_dict: dict, info: dict) -> dict:
         """Convert the predicted boxes into valid ones. Should handle the
         load_model (frame_based, mv_image_based, fov_image_based), separately.
 
         Args:
             box_dict (dict): Box dictionaries to be converted.
 
-                - bboxes_3d (:obj:`LiDARInstance3DBoxes`): 3D bounding boxes.
-                - scores_3d (torch.Tensor): Scores of boxes.
-                - labels_3d (torch.Tensor): Class labels of boxes.
+                - bboxes_3d (:obj:`BaseInstance3DBoxes`): 3D bounding boxes.
+                - scores_3d (Tensor): Scores of boxes.
+                - labels_3d (Tensor): Class labels of boxes.
             info (dict): Data info.
 
         Returns:
@@ -609,9 +627,9 @@ def convert_valid_bboxes(self, box_dict: dict, info: dict):
 
                 - bbox (np.ndarray): 2D bounding boxes.
                 - box3d_camera (np.ndarray): 3D bounding boxes in
-                    camera coordinate.
+                  camera coordinate.
                 - box3d_lidar (np.ndarray): 3D bounding boxes in
-                    LiDAR coordinate.
+                  LiDAR coordinate.
                 - scores (np.ndarray): Scores of boxes.
                 - label_preds (np.ndarray): Class label predictions.
                 - sample_idx (int): Sample index.
@@ -673,7 +691,7 @@ def convert_valid_bboxes(self, box_dict: dict, info: dict):
             valid_pcd_inds = ((box_preds_lidar.center > limit_range[:3]) &
                               (box_preds_lidar.center < limit_range[3:]))
             valid_inds = valid_pcd_inds.all(-1)
-        if self.load_type in ['mv_image_based', 'fov_image_based']:
+        elif self.load_type in ['mv_image_based', 'fov_image_based']:
             valid_inds = valid_cam_inds
 
         if valid_inds.sum() > 0: