From aa586c8f6c1218db077565b5eedde12ecd129789 Mon Sep 17 00:00:00 2001 From: Yezhen Cong <52420115+THU17cyz@users.noreply.github.com> Date: Thu, 29 Jul 2021 15:35:31 +0800 Subject: [PATCH] [Enhance] Add script for data update (#774) * Fixed wrong config paths and fixed a bug in test * Fixed metafile * Coord sys refactor (main code) * Update test_waymo_dataset.py * Manually resolve conflict * Removed unused lines and fixed imports * remove coord2box and box2coord * update dir_limit_offset * Some minor improvements * Removed some \s in comments * Revert a change * Change Box3DMode to Coord3DMode where points are converted * Fix points_in_bbox function * Fix Imvoxelnet config * Revert adding a line * Fix rotation bug when batch size is 0 * Keep sign of dir_scores as before * Fix several comments * Add a comment * Fix docstring * Add data update scripts * Fix comments --- docs/data_preparation.md | 12 ++- tools/create_data.sh | 11 ++- tools/update_data_coords.py | 167 ++++++++++++++++++++++++++++++++++++ tools/update_data_coords.sh | 22 +++++ 4 files changed, 205 insertions(+), 7 deletions(-) create mode 100644 tools/update_data_coords.py create mode 100644 tools/update_data_coords.sh diff --git a/docs/data_preparation.md b/docs/data_preparation.md index 970f594238..159f248cc9 100644 --- a/docs/data_preparation.md +++ b/docs/data_preparation.md @@ -78,7 +78,7 @@ mmdetection3d ### KITTI -Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare KITTI data by running +Download KITTI 3D detection data [HERE](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Prepare KITTI data splits by running ```bash mkdir ./data/kitti/ && mkdir ./data/kitti/ImageSets @@ -88,10 +88,20 @@ wget -c https://raw.githubusercontent.com/traveller59/second.pytorch/master/sec wget -c https://raw.githubusercontent.com/traveller59/second.pytorch/master/second/data/ImageSets/train.txt --no-check-certificate --content-disposition -O ./data/kitti/ImageSets/train.txt wget -c https://raw.githubusercontent.com/traveller59/second.pytorch/master/second/data/ImageSets/val.txt --no-check-certificate --content-disposition -O ./data/kitti/ImageSets/val.txt wget -c https://raw.githubusercontent.com/traveller59/second.pytorch/master/second/data/ImageSets/trainval.txt --no-check-certificate --content-disposition -O ./data/kitti/ImageSets/trainval.txt +``` + +Then generate info files by running +``` python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitti --extra-tag kitti ``` +In an environment using slurm, users may run the following command instead + +``` +sh tools/create_data.sh kitti +``` + ### Waymo Download Waymo open dataset V1.2 [HERE](https://waymo.com/open/download/) and its data split [HERE](https://drive.google.com/drive/folders/18BVuF_RYJF0NjZpt8SnfzANiakoRMf0o?usp=sharing). Then put tfrecord files into corresponding folders in `data/waymo/waymo_format/` and put the data split txt files into `data/waymo/kitti_format/ImageSets`. Download ground truth bin file for validation set [HERE](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_2_0/validation/ground_truth_objects) and put it into `data/waymo/waymo_format/`. A tip is that you can use `gsutil` to download the large-scale dataset with commands. You can take this [tool](https://github.com/RalphMao/Waymo-Dataset-Tool) as an example for more details. Subsequently, prepare waymo data by running diff --git a/tools/create_data.sh b/tools/create_data.sh index 4007de4095..9a57852f71 100755 --- a/tools/create_data.sh +++ b/tools/create_data.sh @@ -5,8 +5,7 @@ export PYTHONPATH=`pwd`:$PYTHONPATH PARTITION=$1 JOB_NAME=$2 -CONFIG=$3 -WORK_DIR=$4 +DATASET=$3 GPUS=${GPUS:-1} GPUS_PER_NODE=${GPUS_PER_NODE:-1} SRUN_ARGS=${SRUN_ARGS:-""} @@ -19,7 +18,7 @@ srun -p ${PARTITION} \ --ntasks-per-node=${GPUS_PER_NODE} \ --kill-on-bad-exit=1 \ ${SRUN_ARGS} \ - python -u tools/create_data.py kitti \ - --root-path ./data/kitti \ - --out-dir ./data/kitti \ - --extra-tag kitti + python -u tools/create_data.py ${DATASET} \ + --root-path ./data/${DATASET} \ + --out-dir ./data/${DATASET} \ + --extra-tag ${DATASET} diff --git a/tools/update_data_coords.py b/tools/update_data_coords.py new file mode 100644 index 0000000000..87165aa808 --- /dev/null +++ b/tools/update_data_coords.py @@ -0,0 +1,167 @@ +import argparse +import mmcv +import numpy as np +import time +from os import path as osp + +from mmdet3d.core.bbox import limit_period + + +def update_sunrgbd_infos(root_dir, out_dir, pkl_files): + print(f'{pkl_files} will be modified because ' + f'of the refactor of the Depth coordinate system.') + if root_dir == out_dir: + print(f'Warning, you are overwriting ' + f'the original data under {root_dir}.') + time.sleep(3) + for pkl_file in pkl_files: + in_path = osp.join(root_dir, pkl_file) + print(f'Reading from input file: {in_path}.') + a = mmcv.load(in_path) + print('Start updating:') + for item in mmcv.track_iter_progress(a): + if 'rotation_y' in item['annos']: + item['annos']['rotation_y'] = -item['annos']['rotation_y'] + item['annos']['gt_boxes_upright_depth'][:, -1:] = \ + -item['annos']['gt_boxes_upright_depth'][:, -1:] + + out_path = osp.join(out_dir, pkl_file) + print(f'Writing to output file: {out_path}.') + mmcv.dump(a, out_path, 'pkl') + + +def update_outdoor_dbinfos(root_dir, out_dir, pkl_files): + print(f'{pkl_files} will be modified because ' + f'of the refactor of the LIDAR coordinate system.') + if root_dir == out_dir: + print(f'Warning, you are overwriting ' + f'the original data under {root_dir}.') + time.sleep(3) + for pkl_file in pkl_files: + in_path = osp.join(root_dir, pkl_file) + print(f'Reading from input file: {in_path}.') + a = mmcv.load(in_path) + print('Start updating:') + for k in a.keys(): + print(f'Updating samples of class {k}:') + for item in mmcv.track_iter_progress(a[k]): + boxes = item['box3d_lidar'].copy() + # swap l, w (or dx, dy) + item['box3d_lidar'][3] = boxes[4] + item['box3d_lidar'][4] = boxes[3] + # change yaw + item['box3d_lidar'][6] = -boxes[6] - np.pi / 2 + item['box3d_lidar'][6] = limit_period( + item['box3d_lidar'][6], period=np.pi * 2) + + out_path = osp.join(out_dir, pkl_file) + print(f'Writing to output file: {out_path}.') + mmcv.dump(a, out_path, 'pkl') + + +def update_nuscenes_or_lyft_infos(root_dir, out_dir, pkl_files): + + print(f'{pkl_files} will be modified because ' + f'of the refactor of the LIDAR coordinate system.') + if root_dir == out_dir: + print(f'Warning, you are overwriting ' + f'the original data under {root_dir}.') + time.sleep(3) + for pkl_file in pkl_files: + in_path = osp.join(root_dir, pkl_file) + print(f'Reading from input file: {in_path}.') + a = mmcv.load(in_path) + print('Start updating:') + for item in mmcv.track_iter_progress(a['infos']): + boxes = item['gt_boxes'].copy() + # swap l, w (or dx, dy) + item['gt_boxes'][:, 3] = boxes[:, 4] + item['gt_boxes'][:, 4] = boxes[:, 3] + # change yaw + item['gt_boxes'][:, 6] = -boxes[:, 6] - np.pi / 2 + item['gt_boxes'][:, 6] = limit_period( + item['gt_boxes'][:, 6], period=np.pi * 2) + + out_path = osp.join(out_dir, pkl_file) + print(f'Writing to output file: {out_path}.') + mmcv.dump(a, out_path, 'pkl') + + +parser = argparse.ArgumentParser(description='Arg parser for data coords ' + 'update due to coords sys refactor.') +parser.add_argument('dataset', metavar='kitti', help='name of the dataset') +parser.add_argument( + '--root-dir', + type=str, + default='./data/kitti', + help='specify the root dir of dataset') +parser.add_argument( + '--version', + type=str, + default='v1.0', + required=False, + help='specify the dataset version, no need for kitti') +parser.add_argument( + '--out-dir', + type=str, + default=None, + required=False, + help='name of info pkl') +args = parser.parse_args() + +if __name__ == '__main__': + if args.out_dir is None: + args.out_dir = args.root_dir + if args.dataset == 'kitti': + # KITTI infos is in CAM coord sys (unchanged) + # KITTI dbinfos is in LIDAR coord sys (changed) + # so we only update dbinfos + pkl_files = ['kitti_dbinfos_train.pkl'] + update_outdoor_dbinfos( + root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files) + elif args.dataset == 'nuscenes': + # nuScenes infos is in LIDAR coord sys (changed) + # nuScenes dbinfos is in LIDAR coord sys (changed) + # so we update both infos and dbinfos + pkl_files = ['nuscenes_infos_val.pkl'] + if args.version != 'v1.0-mini': + pkl_files.append('nuscenes_infos_train.pkl') + else: + pkl_files.append('nuscenes_infos_train_tiny.pkl') + update_nuscenes_or_lyft_infos( + root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files) + if args.version != 'v1.0-mini': + pkl_files = ['nuscenes_dbinfos_train.pkl'] + update_outdoor_dbinfos( + root_dir=args.root_dir, + out_dir=args.out_dir, + pkl_files=pkl_files) + elif args.dataset == 'lyft': + # Lyft infos is in LIDAR coord sys (changed) + # Lyft has no dbinfos + # so we update infos + pkl_files = ['lyft_infos_train.pkl', 'lyft_infos_val.pkl'] + update_nuscenes_or_lyft_infos( + root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files) + elif args.dataset == 'waymo': + # Waymo infos is in CAM coord sys (unchanged) + # Waymo dbinfos is in LIDAR coord sys (changed) + # so we only update dbinfos + pkl_files = ['waymo_dbinfos_train.pkl'] + update_outdoor_dbinfos( + root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files) + elif args.dataset == 'scannet': + # ScanNet infos is in DEPTH coord sys (changed) + # but bbox is without yaw + # so ScanNet is unaffected + pass + elif args.dataset == 's3dis': + # Segmentation datasets are not affected + pass + elif args.dataset == 'sunrgbd': + # SUNRGBD infos is in DEPTH coord sys (changed) + # and bbox is with yaw + # so we update infos + pkl_files = ['sunrgbd_infos_train.pkl', 'sunrgbd_infos_val.pkl'] + update_sunrgbd_infos( + root_dir=args.root_dir, out_dir=args.out_dir, pkl_files=pkl_files) diff --git a/tools/update_data_coords.sh b/tools/update_data_coords.sh new file mode 100644 index 0000000000..bd8db62838 --- /dev/null +++ b/tools/update_data_coords.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +set -x +export PYTHONPATH=`pwd`:$PYTHONPATH + +PARTITION=$1 +DATASET=$2 +GPUS=${GPUS:-1} +GPUS_PER_NODE=${GPUS_PER_NODE:-1} +SRUN_ARGS=${SRUN_ARGS:-""} +JOB_NAME=update_data_coords + +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u tools/update_data_coords.py ${DATASET} \ + --root-dir ./data/${DATASET} \ + --out-dir ./data/${DATASET}