From 148a0856ab47d4b4a92894838a4525adaddaaf0b Mon Sep 17 00:00:00 2001
From: Jingwei Zhang <zjw18@mails.tsinghua.edu.cn>
Date: Wed, 14 Jun 2023 15:31:00 +0800
Subject: [PATCH] [Feature] Support downloading datasets from OpenDataLab using
 mim (#2593)

* init commit

* add dataset unzip scripts

* polish docs

* polish docs
---
 MANIFEST.in                                   |  1 +
 dataset-index.yml                             | 29 ++++++++++++
 docs/en/user_guides/dataset_prepare.md        | 47 +++++++++++++++++--
 ...75_second_secfpn_8xb4-cyclic-20e_nus-3d.py |  4 +-
 setup.py                                      |  4 +-
 tools/dataset_converters/kitti_unzip.sh       | 12 +++++
 tools/dataset_converters/nuscenes_unzip.sh    | 17 +++++++
 .../dataset_converters/semantickitti_unzip.sh | 12 +++++
 8 files changed, 118 insertions(+), 8 deletions(-)
 create mode 100644 dataset-index.yml
 create mode 100755 tools/dataset_converters/kitti_unzip.sh
 create mode 100644 tools/dataset_converters/nuscenes_unzip.sh
 create mode 100644 tools/dataset_converters/semantickitti_unzip.sh

diff --git a/MANIFEST.in b/MANIFEST.in
index 7b9cae69d7..4d334909a5 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 include mmdet3d/.mim/model-index.yml
+include mmdet3d/.mim/dataset-index.yml
 include requirements/*.txt
 recursive-include mmdet3d/.mim/ops *.cpp *.cu *.h *.cc
 recursive-include mmdet3d/.mim/configs *.py *.yml
diff --git a/dataset-index.yml b/dataset-index.yml
new file mode 100644
index 0000000000..6fbbbdb27f
--- /dev/null
+++ b/dataset-index.yml
@@ -0,0 +1,29 @@
+kitti:
+  # The name of dataset in OpenDataLab referring to
+  # https://opendatalab.com/KITTI_Object/cli. You can also download it
+  # by running `odl get ${dataset}` independently
+  dataset: KITTI_Object
+  download_root: data
+  data_root: data/kitti
+  # Scripts for unzipping datasets
+  script: tools/dataset_converters/kitti_unzip.sh
+
+nuscenes:
+  # The name of dataset in OpenDataLab referring to
+  # https://opendatalab.com/nuScenes/cli. You can also download it
+  # by running `odl get ${dataset}` independently
+  dataset: nuScenes
+  download_root: data
+  data_root: data/nuscenes
+  # Scripts for unzipping datasets
+  script: tools/dataset_converters/nuscenes_unzip.sh
+
+semantickitti:
+  # The name of dataset in OpenDataLab referring to
+  # https://opendatalab.com/SemanticKITTI/cli. You can also download it
+  # by running `odl get ${dataset}` independently
+  dataset: SemanticKITTI
+  download_root: data
+  data_root: data/semantickitti
+  # Scripts for unzipping datasets
+  script: tools/dataset_converters/semantickitti_unzip.sh
diff --git a/docs/en/user_guides/dataset_prepare.md b/docs/en/user_guides/dataset_prepare.md
index 27d61270e3..d1b7cad14e 100644
--- a/docs/en/user_guides/dataset_prepare.md
+++ b/docs/en/user_guides/dataset_prepare.md
@@ -86,7 +86,20 @@ mmdetection3d
 
 ### KITTI
 
-Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare KITTI data splits by running:
+1. Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Alternatively, you
+   can download the dataset from [OpenDataLab](https://opendatalab.com/) using MIM. The command scripts are the following:
+
+```bash
+# install OpenDataLab CLI tools
+pip install -U opendatalab
+# log in OpenDataLab. Note that you should register an account on [OpenDataLab](https://opendatalab.com/) before.
+pip install odl
+odl login
+# download and preprocess by MIM
+mim download mmdet3d --dataset kitti
+```
+
+2. Prepare KITTI data splits by running:
 
 ```bash
 mkdir ./data/kitti/ && mkdir ./data/kitti/ImageSets
@@ -98,7 +111,7 @@ wget -c  https://raw.githubusercontent.com/traveller59/second.pytorch/master/sec
 wget -c  https://raw.githubusercontent.com/traveller59/second.pytorch/master/second/data/ImageSets/trainval.txt --no-check-certificate --content-disposition -O ./data/kitti/ImageSets/trainval.txt
 ```
 
-Then generate info files by running:
+3. Generate info files by running:
 
 ```bash
 python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitti --extra-tag kitti
@@ -160,7 +173,20 @@ Note that:
 
 ### NuScenes
 
-Download nuScenes V1.0 full dataset data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data by running:
+1. Download nuScenes V1.0 full dataset data [HERE](https://www.nuscenes.org/download). Alternatively, you
+   can download the dataset from [OpenDataLab](https://opendatalab.com/) using MIM. The downloading and unzipping command scripts are the following:
+
+```bash
+# install OpenDataLab CLI tools
+pip install -U opendatalab
+# log in OpenDataLab. Note that you should register an account on [OpenDataLab](https://opendatalab.com/) before.
+pip install odl
+odl login
+# download and preprocess by MIM
+mim download mmdet3d --dataset nuscenes
+```
+
+2. Prepare nuscenes data by running:
 
 ```bash
 python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes
@@ -187,9 +213,20 @@ Note that we follow the original folder names for clear organization. Please ren
 
 ### SemanticKITTI
 
-Download SemanticKITTI dataset [HERE](http://semantic-kitti.org/dataset.html#download) and unzip all zip files.
+1. Download SemanticKITTI dataset [HERE](http://semantic-kitti.org/dataset.html#download) and unzip all zip files. Alternatively, you
+   can download the dataset from [OpenDataLab](https://opendatalab.com/) using MIM. The downloading and unzipping command scripts are the following:
+
+```bash
+# install OpenDataLab CLI tools
+pip install -U opendatalab
+# log in OpenDataLab. Note that you should register an account on [OpenDataLab](https://opendatalab.com/) before.
+pip install odl
+odl login
+# download and preprocess by MIM
+mim download mmdet3d --dataset semantickitti
+```
 
-Then generate info files by running:
+2. Generate info files by running:
 
 ```bash
 python ./tools/create_data.py semantickitti --root-path ./data/semantickitti --out-dir ./data/semantickitti --extra-tag semantickitti
diff --git a/projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py b/projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py
index f0b6eeba30..a08bb66ad1 100644
--- a/projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py
+++ b/projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py
@@ -127,7 +127,7 @@
             'ori_lidar2img', 'img_aug_matrix', 'box_type_3d', 'sample_idx',
             'lidar_path', 'img_path', 'transformation_3d_flow', 'pcd_rotation',
             'pcd_scale_factor', 'pcd_trans', 'img_aug_matrix',
-            'lidar_aug_matrix'
+            'lidar_aug_matrix', 'num_pts_feats'
         ])
 ]
 
@@ -168,7 +168,7 @@
         meta_keys=[
             'cam2img', 'ori_cam2img', 'lidar2cam', 'lidar2img', 'cam2lidar',
             'ori_lidar2img', 'img_aug_matrix', 'box_type_3d', 'sample_idx',
-            'lidar_path', 'img_path'
+            'lidar_path', 'img_path', 'num_pts_feats'
         ])
 ]
 
diff --git a/setup.py b/setup.py
index 6c7a8261d3..86ad8a266d 100644
--- a/setup.py
+++ b/setup.py
@@ -158,7 +158,9 @@ def add_mim_extention():
     else:
         return
 
-    filenames = ['tools', 'configs', 'demo', 'model-index.yml']
+    filenames = [
+        'tools', 'configs', 'demo', 'model-index.yml', 'dataset-index.yml'
+    ]
     repo_path = osp.dirname(__file__)
     mim_path = osp.join(repo_path, 'mmdet3d', '.mim')
     os.makedirs(mim_path, exist_ok=True)
diff --git a/tools/dataset_converters/kitti_unzip.sh b/tools/dataset_converters/kitti_unzip.sh
new file mode 100755
index 0000000000..834ddaf82e
--- /dev/null
+++ b/tools/dataset_converters/kitti_unzip.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+DOWNLOAD_DIR=$1  # The directory where the downloaded data set is stored
+DATA_ROOT=$2  # The root directory of the converted dataset
+
+for zip_file in $DOWNLOAD_DIR/KITTI_Object/raw/*.zip; do
+    echo "Unzipping $zip_file to $DATA_ROOT ......"
+	unzip -oq $zip_file -d $DATA_ROOT
+    echo "[Done] Unzip $zip_file to $DATA_ROOT"
+    # delete the original files
+	rm -f $zip_file
+done
diff --git a/tools/dataset_converters/nuscenes_unzip.sh b/tools/dataset_converters/nuscenes_unzip.sh
new file mode 100644
index 0000000000..ad2d0a3536
--- /dev/null
+++ b/tools/dataset_converters/nuscenes_unzip.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+DOWNLOAD_DIR=$1  # The directory where the downloaded data set is stored
+DATA_ROOT=$2  # The root directory of the converted dataset
+
+for split in $DOWNLOAD_DIR/nuScenes/raw/*; do
+    for tgz_file in $split/*; do
+        if [[ $tgz_file == *.tgz ]]
+        then
+            echo "Unzipping $tgz_file to $DATA_ROOT ......"
+            unzip -oq $tgz_file -d $DATA_ROOT/
+            echo "[Done] Unzip $tgz_file to $DATA_ROOT"
+        fi
+        # delete the original files
+        rm -f $tgz_file
+    done
+done
diff --git a/tools/dataset_converters/semantickitti_unzip.sh b/tools/dataset_converters/semantickitti_unzip.sh
new file mode 100644
index 0000000000..79892cc714
--- /dev/null
+++ b/tools/dataset_converters/semantickitti_unzip.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+DOWNLOAD_DIR=$1  # The directory where the downloaded data set is stored
+DATA_ROOT=$2  # The root directory of the converted dataset
+
+for zip_file in $DOWNLOAD_DIR/SemanticKITTI/raw/*.zip; do
+    echo "Unzipping $zip_file to $DATA_ROOT ......"
+	unzip -oq $zip_file -d $DATA_ROOT
+    echo "[Done] Unzip $zip_file to $DATA_ROOT"
+    # delete the original files
+	rm -f $zip_file
+done