[Enhance] Add script for data update (#774)

* Fixed wrong config paths and fixed a bug in test * Fixed metafile * Coord sys refactor (main code) * Update test_waymo_dataset.py * Manually resolve conflict * Removed unused lines and fixed imports * remove coord2box and box2coord * update dir_limit_offset * Some minor improvements * Removed some \s in comments * Revert a change * Change Box3DMode to Coord3DMode where points are converted * Fix points_in_bbox function * Fix Imvoxelnet config * Revert adding a line * Fix rotation bug when batch size is 0 * Keep sign of dir_scores as before * Fix several comments * Add a comment * Fix docstring * Add data update scripts * Fix comments
open-mmlab · Aug 4, 2021 · 2060ee4 · 2060ee4
1 parent 9b7d77f
commit 2060ee4
Show file tree

Hide file tree

Showing 4 changed files with 205 additions and 7 deletions.
diff --git a/docs/data_preparation.md b/docs/data_preparation.md
@@ -78,7 +78,7 @@ mmdetection3d
 
 ### KITTI
 
-Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare KITTI data by running
+Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare KITTI data splits by running
 
 ```bash
 mkdir ./data/kitti/ && mkdir ./data/kitti/ImageSets
@@ -88,10 +88,20 @@ wget -c  https://raw.githubusercontent.com/traveller59/second.pytorch/master/sec
 wget -c  https://raw.githubusercontent.com/traveller59/second.pytorch/master/second/data/ImageSets/train.txt --no-check-certificate --content-disposition -O ./data/kitti/ImageSets/train.txt
 wget -c  https://raw.githubusercontent.com/traveller59/second.pytorch/master/second/data/ImageSets/val.txt --no-check-certificate --content-disposition -O ./data/kitti/ImageSets/val.txt
 wget -c  https://raw.githubusercontent.com/traveller59/second.pytorch/master/second/data/ImageSets/trainval.txt --no-check-certificate --content-disposition -O ./data/kitti/ImageSets/trainval.txt
+```
+
+Then generate info files by running
 
+```
 python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitti --extra-tag kitti
 ```
 
+In an environment using slurm, users may run the following command instead
+
+```
+sh tools/create_data.sh <partition> kitti
+```
+
 ### Waymo
 
 Download Waymo open dataset V1.2 [HERE](https://waymo.com/open/download/) and its data split [HERE](https://drive.google.com/drive/folders/18BVuF_RYJF0NjZpt8SnfzANiakoRMf0o?usp=sharing). Then put tfrecord files into corresponding folders in `data/waymo/waymo_format/` and put the data split txt files into `data/waymo/kitti_format/ImageSets`. Download ground truth bin file for validation set [HERE](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_2_0/validation/ground_truth_objects) and put it into `data/waymo/waymo_format/`. A tip is that you can use `gsutil` to download the large-scale dataset with commands. You can take this [tool](https://github.com/RalphMao/Waymo-Dataset-Tool) as an example for more details. Subsequently, prepare waymo data by running

diff --git a/tools/create_data.sh b/tools/create_data.sh
@@ -5,8 +5,7 @@ export PYTHONPATH=`pwd`:$PYTHONPATH
 
 PARTITION=$1
 JOB_NAME=$2
-CONFIG=$3
-WORK_DIR=$4
+DATASET=$3
 GPUS=${GPUS:-1}
 GPUS_PER_NODE=${GPUS_PER_NODE:-1}
 SRUN_ARGS=${SRUN_ARGS:-""}
@@ -19,7 +18,7 @@ srun -p ${PARTITION} \
     --ntasks-per-node=${GPUS_PER_NODE} \
     --kill-on-bad-exit=1 \
     ${SRUN_ARGS} \
-    python -u tools/create_data.py kitti \
-            --root-path ./data/kitti \
-            --out-dir ./data/kitti \
-            --extra-tag kitti
+    python -u tools/create_data.py ${DATASET} \
+            --root-path ./data/${DATASET} \
+            --out-dir ./data/${DATASET} \
+            --extra-tag ${DATASET}
diff --git a/tools/update_data_coords.py b/tools/update_data_coords.py
@@ -0,0 +1,167 @@
+import argparse
+import mmcv
+import numpy as np
+import time
+from os import path as osp
+
+from mmdet3d.core.bbox import limit_period
+
+
+def update_sunrgbd_infos(root_dir, out_dir, pkl_files):
+    print(f'{pkl_files} will be modified because '
+          f'of the refactor of the Depth coordinate system.')
+    if root_dir == out_dir:
+        print(f'Warning, you are overwriting '
+              f'the original data under {root_dir}.')
+        time.sleep(3)
+    for pkl_file in pkl_files:
+        in_path = osp.join(root_dir, pkl_file)
+        print(f'Reading from input file: {in_path}.')
+        a = mmcv.load(in_path)
+        print('Start updating:')
+        for item in mmcv.track_iter_progress(a):
+            if 'rotation_y' in item['annos']:
+                item['annos']['rotation_y'] = -item['annos']['rotation_y']
+                item['annos']['gt_boxes_upright_depth'][:, -1:] = \
+                    -item['annos']['gt_boxes_upright_depth'][:, -1:]
+
+        out_path = osp.join(out_dir, pkl_file)
+        print(f'Writing to output file: {out_path}.')
+        mmcv.dump(a, out_path, 'pkl')
+
+
+def update_outdoor_dbinfos(root_dir, out_dir, pkl_files):
+    print(f'{pkl_files} will be modified because '
+          f'of the refactor of the LIDAR coordinate system.')
+    if root_dir == out_dir:
+        print(f'Warning, you are overwriting '
+              f'the original data under {root_dir}.')
+        time.sleep(3)
+    for pkl_file in pkl_files:
+        in_path = osp.join(root_dir, pkl_file)
+        print(f'Reading from input file: {in_path}.')
+        a = mmcv.load(in_path)
+        print('Start updating:')
+        for k in a.keys():
+            print(f'Updating samples of class {k}:')
+            for item in mmcv.track_iter_progress(a[k]):
+                boxes = item['box3d_lidar'].copy()
+                # swap l, w (or dx, dy)
+                item['box3d_lidar'][3] = boxes[4]
+                item['box3d_lidar'][4] = boxes[3]
+                # change yaw
+                item['box3d_lidar'][6] = -boxes[6] - np.pi / 2
+                item['box3d_lidar'][6] = limit_period(
+                    item['box3d_lidar'][6], period=np.pi * 2)
+
+        out_path = osp.join(out_dir, pkl_file)
+        print(f'Writing to output file: {out_path}.')
+        mmcv.dump(a, out_path, 'pkl')
+
+
+def update_nuscenes_or_lyft_infos(root_dir, out_dir, pkl_files):
+
+    print(f'{pkl_files} will be modified because '
+          f'of the refactor of the LIDAR coordinate system.')
+    if root_dir == out_dir:
+        print(f'Warning, you are overwriting '
+              f'the original data under {root_dir}.')
+        time.sleep(3)
+    for pkl_file in pkl_files:
+        in_path = osp.join(root_dir, pkl_file)
+        print(f'Reading from input file: {in_path}.')
+        a = mmcv.load(in_path)
+        print('Start updating:')
+        for item in mmcv.track_iter_progress(a['infos']):
+            boxes = item['gt_boxes'].copy()
+            # swap l, w (or dx, dy)
+            item['gt_boxes'][:, 3] = boxes[:, 4]
+            item['gt_boxes'][:, 4] = boxes[:, 3]
+            # change yaw
+            item['gt_boxes'][:, 6] = -boxes[:, 6] - np.pi / 2
+            item['gt_boxes'][:, 6] = limit_period(
+                item['gt_boxes'][:, 6], period=np.pi * 2)
+
+        out_path = osp.join(out_dir, pkl_file)
+        print(f'Writing to output file: {out_path}.')
+        mmcv.dump(a, out_path, 'pkl')
+
+
+parser = argparse.ArgumentParser(description='Arg parser for data coords '
+                                 'update due to coords sys refactor.')
+parser.add_argument('dataset', metavar='kitti', help='name of the dataset')
+parser.add_argument(
+    '--root-dir',
+    type=str,
+    default='./data/kitti',
+    help='specify the root dir of dataset')
+parser.add_argument(
+    '--version',
+    type=str,
+    default='v1.0',
+    required=False,
+    help='specify the dataset version, no need for kitti')
+parser.add_argument(
+    '--out-dir',
+    type=str,
+    default=None,
+    required=False,
+    help='name of info pkl')
+args = parser.parse_args()
+
+if __name__ == '__main__':
+    if args.out_dir is None:
+        args.out_dir = args.root_dir
+    if args.dataset == 'kitti':
+        # KITTI infos is in CAM coord sys (unchanged)
+        # KITTI dbinfos is in LIDAR coord sys (changed)
+        # so we only update dbinfos
+        pkl_files = ['kitti_dbinfos_train.pkl']
+        update_outdoor_dbinfos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
+    elif args.dataset == 'nuscenes':
+        # nuScenes infos is in LIDAR coord sys (changed)
+        # nuScenes dbinfos is in LIDAR coord sys (changed)
+        # so we update both infos and dbinfos
+        pkl_files = ['nuscenes_infos_val.pkl']
+        if args.version != 'v1.0-mini':
+            pkl_files.append('nuscenes_infos_train.pkl')
+        else:
+            pkl_files.append('nuscenes_infos_train_tiny.pkl')
+        update_nuscenes_or_lyft_infos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
+        if args.version != 'v1.0-mini':
+            pkl_files = ['nuscenes_dbinfos_train.pkl']
+            update_outdoor_dbinfos(
+                root_dir=args.root_dir,
+                out_dir=args.out_dir,
+                pkl_files=pkl_files)
+    elif args.dataset == 'lyft':
+        # Lyft infos is in LIDAR coord sys (changed)
+        # Lyft has no dbinfos
+        # so we update infos
+        pkl_files = ['lyft_infos_train.pkl', 'lyft_infos_val.pkl']
+        update_nuscenes_or_lyft_infos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
+    elif args.dataset == 'waymo':
+        # Waymo infos is in CAM coord sys (unchanged)
+        # Waymo dbinfos is in LIDAR coord sys (changed)
+        # so we only update dbinfos
+        pkl_files = ['waymo_dbinfos_train.pkl']
+        update_outdoor_dbinfos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
+    elif args.dataset == 'scannet':
+        # ScanNet infos is in DEPTH coord sys (changed)
+        # but bbox is without yaw
+        # so ScanNet is unaffected
+        pass
+    elif args.dataset == 's3dis':
+        # Segmentation datasets are not affected
+        pass
+    elif args.dataset == 'sunrgbd':
+        # SUNRGBD infos is in DEPTH coord sys (changed)
+        # and bbox is with yaw
+        # so we update infos
+        pkl_files = ['sunrgbd_infos_train.pkl', 'sunrgbd_infos_val.pkl']
+        update_sunrgbd_infos(
+            root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files)
diff --git a/tools/update_data_coords.sh b/tools/update_data_coords.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+set -x
+export PYTHONPATH=`pwd`:$PYTHONPATH
+
+PARTITION=$1
+DATASET=$2
+GPUS=${GPUS:-1}
+GPUS_PER_NODE=${GPUS_PER_NODE:-1}
+SRUN_ARGS=${SRUN_ARGS:-""}
+JOB_NAME=update_data_coords
+
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/update_data_coords.py ${DATASET} \
+            --root-dir ./data/${DATASET} \
+            --out-dir ./data/${DATASET}