fix visualization.md

open-mmlab · May 4, 2023 · 74d1116 · 74d1116
1 parent c1090fc
commit 74d1116
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 35 deletions.
diff --git a/docs/en/user_guides/visualization.md b/docs/en/user_guides/visualization.md
@@ -42,18 +42,19 @@ We support drawing 3D boxes on point cloud by using `draw_bboxes_3d`.
 
 ```python
 import torch
+import numpy as np
 
 from mmdet3d.visualization import Det3DLocalVisualizer
 from mmdet3d.structures import LiDARInstance3DBoxes
 
-points = np.fromfile('tests/data/kitti/training/velodyne/000000.bin', dtype=np.float32)
+points = np.fromfile('demo/data/kitti/000008.bin', dtype=np.float32)
 points = points.reshape(-1, 4)
 visualizer = Det3DLocalVisualizer()
 # set point cloud in visualizer
 visualizer.set_points(points)
-bboxes_3d = LiDARInstance3DBoxes(torch.tensor(
-                [[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900,
-                  -1.5808]])),
+bboxes_3d = LiDARInstance3DBoxes(
+    torch.tensor([[8.7314, -1.8559, -1.5997, 4.2000, 3.4800, 1.8900,
+                   -1.5808]]))
 # Draw 3D bboxes
 visualizer.draw_bboxes_3d(bboxes_3d)
 visualizer.show()
@@ -92,8 +93,6 @@ visualizer.draw_proj_bboxes_3d(gt_bboxes_3d, input_meta)
 visualizer.show()
 ```
 
-![mono3d](../../../resources/mono3d.png)
-
 ### Drawing BEV Boxes
 
 We support drawing BEV boxes by using `draw_bev_bboxes`.
@@ -120,23 +119,22 @@ visualizer.draw_bev_bboxes(gt_bboxes_3d, edge_colors='orange')
 visualizer.show()
 ```
 
-<img src="../../../resources/bev.png" width = "50%" />
-
 ### Drawing 3D Semantic Mask
 
 We support draw segmentation mask via per-point colorization by using `draw_seg_mask`.
 
 ```python
-import torch
+import numpy as np
 
 from mmdet3d.visualization import Det3DLocalVisualizer
 
-points = np.fromfile('tests/data/s3dis/points/Area_1_office_2.bin', dtype=np.float32)
+points = np.fromfile('demo/data/sunrgbd/000017.bin', dtype=np.float32)
 points = points.reshape(-1, 3)
 visualizer = Det3DLocalVisualizer()
 mask = np.random.rand(points.shape[0], 3)
 points_with_mask = np.concatenate((points, mask), axis=-1)
 # Draw 3D points with mask
+visualizer.set_points(points, pcd_mode=2, vis_mode='add')
 visualizer.draw_seg_mask(points_with_mask)
 visualizer.show()
 ```
@@ -168,10 +166,10 @@ This allows the inference and results generation to be done in remote server and
 We also provide scripts to visualize the dataset without inference. You can use `tools/misc/browse_dataset.py` to show loaded data and ground-truth online and save them on the disk. Currently we support single-modality 3D detection and 3D segmentation on all the datasets, multi-modality 3D detection on KITTI and SUN RGB-D, as well as monocular 3D detection on nuScenes. To browse the KITTI dataset, you can run the following command:
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task det --output-dir ${OUTPUT_DIR}
+python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task lidar_det --output-dir ${OUTPUT_DIR}
 ```
 
-**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window.
+**Notice**: Once specifying `--output-dir`, the images of views specified by users will be saved when pressing `_ESC_` in open3d window. If you want to zoom out/in the point clouds to inspect more details, you could specify `--show-interval=0` in the command.
 
 To verify the data consistency and the effect of data augmentation, you can also add `--aug` flag to visualize the data after data augmentation using the command as below:
 
@@ -182,23 +180,23 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
 If you also want to show 2D images with 3D bounding boxes projected onto them, you need to find a config that supports multi-modality data loading, and then change the `--task` args to `multi-modality_det`. An example is showed below:
 
 ```shell
-python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
+python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_multi_modality.png)
 
 You can simply browse different datasets using different configs, e.g. visualizing the ScanNet dataset in 3D semantic segmentation task:
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR} --online
+python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_seg.png)
 
 And browsing the nuScenes dataset in monocular 3D detection task:
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
+python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_mono.png)
diff --git a/docs/zh_cn/user_guides/visualization.md b/docs/zh_cn/user_guides/visualization.md
@@ -42,18 +42,19 @@ visualizer.show()
 
 ```python
 import torch
+import numpy as np
 
 from mmdet3d.visualization import Det3DLocalVisualizer
 from mmdet3d.structures import LiDARInstance3DBoxes
 
-points = np.fromfile('tests/data/kitti/training/velodyne/000000.bin', dtype=np.float32)
+points = np.fromfile('demo/data/kitti/000008.bin', dtype=np.float32)
 points = points.reshape(-1, 4)
 visualizer = Det3DLocalVisualizer()
 # set point cloud in visualizer
 visualizer.set_points(points)
-bboxes_3d = LiDARInstance3DBoxes(torch.tensor(
-                [[8.7314, -1.8559, -1.5997, 1.2000, 0.4800, 1.8900,
-                  -1.5808]])),
+bboxes_3d = LiDARInstance3DBoxes(
+    torch.tensor([[8.7314, -1.8559, -1.5997, 4.2000, 3.4800, 1.8900,
+                   -1.5808]]))
 # Draw 3D bboxes
 visualizer.draw_bboxes_3d(bboxes_3d)
 visualizer.show()
@@ -92,8 +93,6 @@ visualizer.draw_proj_bboxes_3d(gt_bboxes_3d, input_meta)
 visualizer.show()
 ```
 
-![mono3d](../../../resources/mono3d.png)
-
 ### 绘制 BEV 视角的框
 
 通过使用 `draw_bev_bboxes`，我们支持绘制 BEV 视角下的框。
@@ -120,23 +119,22 @@ visualizer.draw_bev_bboxes(gt_bboxes_3d, edge_colors='orange')
 visualizer.show()
 ```
 
-<img src="../../../resources/bev.png" width = "50%" />
-
 ### 绘制 3D 分割掩码
 
 通过使用 `draw_seg_mask`，我们支持通过逐点着色来绘制分割掩码。
 
 ```python
-import torch
+import numpy as np
 
 from mmdet3d.visualization import Det3DLocalVisualizer
 
-points = np.fromfile('tests/data/s3dis/points/Area_1_office_2.bin', dtype=np.float32)
+points = np.fromfile('demo/data/sunrgbd/000017.bin', dtype=np.float32)
 points = points.reshape(-1, 3)
 visualizer = Det3DLocalVisualizer()
 mask = np.random.rand(points.shape[0], 3)
 points_with_mask = np.concatenate((points, mask), axis=-1)
 # Draw 3D points with mask
+visualizer.set_points(points, pcd_mode=2, vis_mode='add')
 visualizer.draw_seg_mask(points_with_mask)
 visualizer.show()
 ```
@@ -171,7 +169,7 @@ python tools/misc/visualize_results.py ${CONFIG_FILE} --result ${RESULTS_PATH} -
 python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py --task lidar_det --output-dir ${OUTPUT_DIR}
 ```
 
-**注意**：一旦指定了 `--output-dir`，当在 open3d 窗口中按下 `_ESC_` 时，用户指定的视图图像将会被保存下来。
+**注意**：一旦指定了 `--output-dir`，当在 open3d 窗口中按下 `_ESC_` 时，用户指定的视图图像将会被保存下来。如果你想要对点云进行缩放操作以观察更多细节， 你可以在命令中指定 `--show-interval=0`。
 
 为了验证数据的一致性和数据增强的效果，你可以加上 `--aug` 来可视化数据增强后的数据，指令如下所示：
 
@@ -182,23 +180,23 @@ python tools/misc/browse_dataset.py configs/_base_/datasets/kitti-3d-3class.py -
 如果你想显示带有投影的 3D 边界框的 2D 图像，你需要一个支持多模态数据加载的配置文件，并将 `--task` 参数改为 `multi-modality_det`。示例如下：
 
 ```shell
-python tools/misc/browse_dataset.py configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
+python tools/misc/browse_dataset.py configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --task multi-modality_det --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_multi_modality.png)
 
 你可以使用不同的配置浏览不同的数据集，例如在 3D 语义分割任务中可视化 ScanNet 数据集：
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR} --online
+python tools/misc/browse_dataset.py configs/_base_/datasets/scannet-seg.py --task lidar_seg --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_seg.png)
 
 在单目 3D 检测任务中浏览 nuScenes 数据集：
 
 ```shell
-python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR} --online
+python tools/misc/browse_dataset.py configs/_base_/datasets/nus-mono3d.py --task mono_det --output-dir ${OUTPUT_DIR}
 ```
 
 ![](../../../resources/browse_dataset_mono.png)
diff --git a/mmdet3d/visualization/local_visualizer.py b/mmdet3d/visualization/local_visualizer.py
@@ -424,7 +424,7 @@ def draw_bev_bboxes(self,
     def draw_points_on_image(self,
                              points: Union[np.ndarray, Tensor],
                              pts2img: np.ndarray,
-                             sizes: Union[np.ndarray, int] = 10) -> None:
+                             sizes: Union[np.ndarray, int] = 3) -> None:
         """Draw projected points on the image.
 
         Args:
@@ -447,7 +447,7 @@ def draw_points_on_image(self,
             c=colors,
             cmap=color_map,
             s=sizes,
-            alpha=0.5,
+            alpha=0.7,
             edgecolors='none')
 
     # TODO: set bbox color according to palette
@@ -505,7 +505,8 @@ def draw_proj_bboxes_3d(
 
         corners_2d = proj_bbox3d_to_img(bboxes_3d, input_meta)
         if img_size is not None:
-            # filter out the bbox where half of stuff is outside the image
+            # Filter out the bbox where half of stuff is outside the image.
+            # This is for the visualization of multi-view image.
             valid_point_idx = (corners_2d[..., 0] >= 0) & \
                         (corners_2d[..., 0] <= img_size[0]) & \
                         (corners_2d[..., 1] >= 0) & (corners_2d[..., 1] <= img_size[1])  # noqa: E501
@@ -655,6 +656,8 @@ def _draw_instances_3d(self,
                 data_3d['img'] = composed_img
             else:
                 # show single-view image
+                # TODO: Solve the problem: some line segments of 3d bboxes are
+                # out of image by a large margin
                 if isinstance(data_input['img'], Tensor):
                     img = img.permute(1, 2, 0).numpy()
                     img = img[..., [2, 1, 0]]  # bgr to rgb
@@ -733,11 +736,17 @@ def show(self,
         # firstly and then show point cloud since the running of
         # Open3D will block the process
         if hasattr(self, '_image'):
-            if drawn_img_3d is not None:
+            if drawn_img is None and drawn_img_3d is None:
+                # use the image got by Visualizer.get_image()
                 super().show(drawn_img_3d, win_name, img_wait_time,
                              continue_key)
-            if drawn_img is not None:
-                super().show(drawn_img, win_name, img_wait_time, continue_key)
+            else:
+                if drawn_img_3d is not None:
+                    super().show(drawn_img_3d, win_name, img_wait_time,
+                                 continue_key)
+                if drawn_img is not None:
+                    super().show(drawn_img, win_name, img_wait_time,
+                                 continue_key)
 
         if hasattr(self, 'o3d_vis'):
             self.o3d_vis.poll_events()