From 148a0856ab47d4b4a92894838a4525adaddaaf0b Mon Sep 17 00:00:00 2001 From: Jingwei Zhang Date: Wed, 14 Jun 2023 15:31:00 +0800 Subject: [PATCH] [Feature] Support downloading datasets from OpenDataLab using mim (#2593) * init commit * add dataset unzip scripts * polish docs * polish docs --- MANIFEST.in | 1 + dataset-index.yml | 29 ++++++++++++ docs/en/user_guides/dataset_prepare.md | 47 +++++++++++++++++-- ...75_second_secfpn_8xb4-cyclic-20e_nus-3d.py | 4 +- setup.py | 4 +- tools/dataset_converters/kitti_unzip.sh | 12 +++++ tools/dataset_converters/nuscenes_unzip.sh | 17 +++++++ .../dataset_converters/semantickitti_unzip.sh | 12 +++++ 8 files changed, 118 insertions(+), 8 deletions(-) create mode 100644 dataset-index.yml create mode 100755 tools/dataset_converters/kitti_unzip.sh create mode 100644 tools/dataset_converters/nuscenes_unzip.sh create mode 100644 tools/dataset_converters/semantickitti_unzip.sh diff --git a/MANIFEST.in b/MANIFEST.in index 7b9cae69d7..4d334909a5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ include mmdet3d/.mim/model-index.yml +include mmdet3d/.mim/dataset-index.yml include requirements/*.txt recursive-include mmdet3d/.mim/ops *.cpp *.cu *.h *.cc recursive-include mmdet3d/.mim/configs *.py *.yml diff --git a/dataset-index.yml b/dataset-index.yml new file mode 100644 index 0000000000..6fbbbdb27f --- /dev/null +++ b/dataset-index.yml @@ -0,0 +1,29 @@ +kitti: + # The name of dataset in OpenDataLab referring to + # https://opendatalab.com/KITTI_Object/cli. You can also download it + # by running `odl get ${dataset}` independently + dataset: KITTI_Object + download_root: data + data_root: data/kitti + # Scripts for unzipping datasets + script: tools/dataset_converters/kitti_unzip.sh + +nuscenes: + # The name of dataset in OpenDataLab referring to + # https://opendatalab.com/nuScenes/cli. You can also download it + # by running `odl get ${dataset}` independently + dataset: nuScenes + download_root: data + data_root: data/nuscenes + # Scripts for unzipping datasets + script: tools/dataset_converters/nuscenes_unzip.sh + +semantickitti: + # The name of dataset in OpenDataLab referring to + # https://opendatalab.com/SemanticKITTI/cli. You can also download it + # by running `odl get ${dataset}` independently + dataset: SemanticKITTI + download_root: data + data_root: data/semantickitti + # Scripts for unzipping datasets + script: tools/dataset_converters/semantickitti_unzip.sh diff --git a/docs/en/user_guides/dataset_prepare.md b/docs/en/user_guides/dataset_prepare.md index 27d61270e3..d1b7cad14e 100644 --- a/docs/en/user_guides/dataset_prepare.md +++ b/docs/en/user_guides/dataset_prepare.md @@ -86,7 +86,20 @@ mmdetection3d ### KITTI -Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare KITTI data splits by running: +1. Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Alternatively, you + can download the dataset from [OpenDataLab](https://opendatalab.com/) using MIM. The command scripts are the following: + +```bash +# install OpenDataLab CLI tools +pip install -U opendatalab +# log in OpenDataLab. Note that you should register an account on [OpenDataLab](https://opendatalab.com/) before. +pip install odl +odl login +# download and preprocess by MIM +mim download mmdet3d --dataset kitti +``` + +2. Prepare KITTI data splits by running: ```bash mkdir ./data/kitti/ && mkdir ./data/kitti/ImageSets @@ -98,7 +111,7 @@ wget -c https://raw.githubusercontent.com/traveller59/second.pytorch/master/sec wget -c https://raw.githubusercontent.com/traveller59/second.pytorch/master/second/data/ImageSets/trainval.txt --no-check-certificate --content-disposition -O ./data/kitti/ImageSets/trainval.txt ``` -Then generate info files by running: +3. Generate info files by running: ```bash python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitti --extra-tag kitti @@ -160,7 +173,20 @@ Note that: ### NuScenes -Download nuScenes V1.0 full dataset data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data by running: +1. Download nuScenes V1.0 full dataset data [HERE](https://www.nuscenes.org/download). Alternatively, you + can download the dataset from [OpenDataLab](https://opendatalab.com/) using MIM. The downloading and unzipping command scripts are the following: + +```bash +# install OpenDataLab CLI tools +pip install -U opendatalab +# log in OpenDataLab. Note that you should register an account on [OpenDataLab](https://opendatalab.com/) before. +pip install odl +odl login +# download and preprocess by MIM +mim download mmdet3d --dataset nuscenes +``` + +2. Prepare nuscenes data by running: ```bash python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes @@ -187,9 +213,20 @@ Note that we follow the original folder names for clear organization. Please ren ### SemanticKITTI -Download SemanticKITTI dataset [HERE](http://semantic-kitti.org/dataset.html#download) and unzip all zip files. +1. Download SemanticKITTI dataset [HERE](http://semantic-kitti.org/dataset.html#download) and unzip all zip files. Alternatively, you + can download the dataset from [OpenDataLab](https://opendatalab.com/) using MIM. The downloading and unzipping command scripts are the following: + +```bash +# install OpenDataLab CLI tools +pip install -U opendatalab +# log in OpenDataLab. Note that you should register an account on [OpenDataLab](https://opendatalab.com/) before. +pip install odl +odl login +# download and preprocess by MIM +mim download mmdet3d --dataset semantickitti +``` -Then generate info files by running: +2. Generate info files by running: ```bash python ./tools/create_data.py semantickitti --root-path ./data/semantickitti --out-dir ./data/semantickitti --extra-tag semantickitti diff --git a/projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py b/projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py index f0b6eeba30..a08bb66ad1 100644 --- a/projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py +++ b/projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py @@ -127,7 +127,7 @@ 'ori_lidar2img', 'img_aug_matrix', 'box_type_3d', 'sample_idx', 'lidar_path', 'img_path', 'transformation_3d_flow', 'pcd_rotation', 'pcd_scale_factor', 'pcd_trans', 'img_aug_matrix', - 'lidar_aug_matrix' + 'lidar_aug_matrix', 'num_pts_feats' ]) ] @@ -168,7 +168,7 @@ meta_keys=[ 'cam2img', 'ori_cam2img', 'lidar2cam', 'lidar2img', 'cam2lidar', 'ori_lidar2img', 'img_aug_matrix', 'box_type_3d', 'sample_idx', - 'lidar_path', 'img_path' + 'lidar_path', 'img_path', 'num_pts_feats' ]) ] diff --git a/setup.py b/setup.py index 6c7a8261d3..86ad8a266d 100644 --- a/setup.py +++ b/setup.py @@ -158,7 +158,9 @@ def add_mim_extention(): else: return - filenames = ['tools', 'configs', 'demo', 'model-index.yml'] + filenames = [ + 'tools', 'configs', 'demo', 'model-index.yml', 'dataset-index.yml' + ] repo_path = osp.dirname(__file__) mim_path = osp.join(repo_path, 'mmdet3d', '.mim') os.makedirs(mim_path, exist_ok=True) diff --git a/tools/dataset_converters/kitti_unzip.sh b/tools/dataset_converters/kitti_unzip.sh new file mode 100755 index 0000000000..834ddaf82e --- /dev/null +++ b/tools/dataset_converters/kitti_unzip.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +DOWNLOAD_DIR=$1 # The directory where the downloaded data set is stored +DATA_ROOT=$2 # The root directory of the converted dataset + +for zip_file in $DOWNLOAD_DIR/KITTI_Object/raw/*.zip; do + echo "Unzipping $zip_file to $DATA_ROOT ......" + unzip -oq $zip_file -d $DATA_ROOT + echo "[Done] Unzip $zip_file to $DATA_ROOT" + # delete the original files + rm -f $zip_file +done diff --git a/tools/dataset_converters/nuscenes_unzip.sh b/tools/dataset_converters/nuscenes_unzip.sh new file mode 100644 index 0000000000..ad2d0a3536 --- /dev/null +++ b/tools/dataset_converters/nuscenes_unzip.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +DOWNLOAD_DIR=$1 # The directory where the downloaded data set is stored +DATA_ROOT=$2 # The root directory of the converted dataset + +for split in $DOWNLOAD_DIR/nuScenes/raw/*; do + for tgz_file in $split/*; do + if [[ $tgz_file == *.tgz ]] + then + echo "Unzipping $tgz_file to $DATA_ROOT ......" + unzip -oq $tgz_file -d $DATA_ROOT/ + echo "[Done] Unzip $tgz_file to $DATA_ROOT" + fi + # delete the original files + rm -f $tgz_file + done +done diff --git a/tools/dataset_converters/semantickitti_unzip.sh b/tools/dataset_converters/semantickitti_unzip.sh new file mode 100644 index 0000000000..79892cc714 --- /dev/null +++ b/tools/dataset_converters/semantickitti_unzip.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +DOWNLOAD_DIR=$1 # The directory where the downloaded data set is stored +DATA_ROOT=$2 # The root directory of the converted dataset + +for zip_file in $DOWNLOAD_DIR/SemanticKITTI/raw/*.zip; do + echo "Unzipping $zip_file to $DATA_ROOT ......" + unzip -oq $zip_file -d $DATA_ROOT + echo "[Done] Unzip $zip_file to $DATA_ROOT" + # delete the original files + rm -f $zip_file +done