From d3483290a8c113a550d2d2f5d53f090e4c7b8673 Mon Sep 17 00:00:00 2001 From: Danila Rukhovich Date: Wed, 4 Aug 2021 16:38:06 +0300 Subject: [PATCH 1/8] add s3dis dataset --- configs/_base_/datasets/s3dis-3d-5class.py | 110 +++++++++++++ mmdet3d/datasets/__init__.py | 13 +- mmdet3d/datasets/s3dis_dataset.py | 174 +++++++++++++++++++-- tools/data_converter/s3dis_data_utils.py | 52 +++++- tools/misc/browse_dataset.py | 2 +- 5 files changed, 326 insertions(+), 25 deletions(-) create mode 100644 configs/_base_/datasets/s3dis-3d-5class.py diff --git a/configs/_base_/datasets/s3dis-3d-5class.py b/configs/_base_/datasets/s3dis-3d-5class.py new file mode 100644 index 0000000000..f45f9d71c5 --- /dev/null +++ b/configs/_base_/datasets/s3dis-3d-5class.py @@ -0,0 +1,110 @@ +# dataset settings +dataset_type = 'S3DISDataset' +data_root = './data/s3dis/' +class_names = ('table', 'chair', 'sofa', 'bookcase', 'board') +train_area = [1, 2, 3, 4, 6] +test_area = 5 + +train_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=True, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), + dict(type='IndoorPointSample', num_points=40000), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0.5, + flip_ratio_bev_vertical=0.5), + dict( + type='GlobalRotScaleTrans', + rot_range=[-0.087266, 0.087266], + scale_ratio_range=[1.0, 1.0], + shift_height=True), + dict(type='DefaultFormatBundle3D', class_names=class_names), + dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) +] +test_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=True, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='MultiScaleFlipAug3D', + img_scale=(1333, 800), + pts_scale_ratio=1, + flip=False, + transforms=[ + dict( + type='GlobalRotScaleTrans', + rot_range=[0, 0], + scale_ratio_range=[1., 1.], + translation_std=[0, 0, 0]), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0.5, + flip_ratio_bev_vertical=0.5), + dict(type='IndoorPointSample', num_points=40000), + dict( + type='DefaultFormatBundle3D', + class_names=class_names, + with_label=False), + dict(type='Collect3D', keys=['points']) + ]) +] +# construct a pipeline for data and gt loading in show function +# please keep its loading function consistent with test_pipeline (e.g. client) +eval_pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='DefaultFormatBundle3D', + class_names=class_names, + with_label=False), + dict(type='Collect3D', keys=['points']) +] + +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=5, + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_files=[ + data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area + ], + pipeline=train_pipeline, + filter_empty_gt=False, + classes=class_names, + box_type_3d='Depth')), + val=dict( + type=dataset_type, + data_root=data_root, + ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', + pipeline=test_pipeline, + classes=class_names, + test_mode=True, + box_type_3d='Depth'), + test=dict( + type=dataset_type, + data_root=data_root, + ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', + pipeline=test_pipeline, + classes=class_names, + test_mode=True, + box_type_3d='Depth')) + +evaluation = dict(pipeline=eval_pipeline) diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py index 6bc252edf9..a8fce6c9ed 100644 --- a/mmdet3d/datasets/__init__.py +++ b/mmdet3d/datasets/__init__.py @@ -15,7 +15,7 @@ ObjectRangeFilter, ObjectSample, PointShuffle, PointsRangeFilter, RandomDropPointsColor, RandomFlip3D, RandomJitterPoints, VoxelBasedPointSampler) -from .s3dis_dataset import S3DISSegDataset +from .s3dis_dataset import S3DISDataset, S3DISSegDataset from .scannet_dataset import ScanNetDataset, ScanNetSegDataset from .semantickitti_dataset import SemanticKITTIDataset from .sunrgbd_dataset import SUNRGBDDataset @@ -23,12 +23,11 @@ from .waymo_dataset import WaymoDataset __all__ = [ - 'KittiDataset', 'KittiMonoDataset', 'GroupSampler', - 'DistributedGroupSampler', 'build_dataloader', 'RepeatFactorDataset', - 'DATASETS', 'build_dataset', 'CocoDataset', 'NuScenesDataset', - 'NuScenesMonoDataset', 'LyftDataset', 'ObjectSample', 'RandomFlip3D', - 'ObjectNoise', 'GlobalRotScaleTrans', 'PointShuffle', 'ObjectRangeFilter', - 'PointsRangeFilter', 'Collect3D', 'LoadPointsFromFile', 'S3DISSegDataset', + 'KittiDataset', 'KittiMonoDataset', 'build_dataloader', 'DATASETS', + 'build_dataset', 'NuScenesDataset', 'NuScenesMonoDataset', 'LyftDataset', + 'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans', + 'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter', + 'LoadPointsFromFile', 'S3DISSegDataset', 'S3DISDataset', 'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset', 'ScanNetDataset', 'ScanNetSegDataset', 'SemanticKITTIDataset', 'Custom3DDataset', diff --git a/mmdet3d/datasets/s3dis_dataset.py b/mmdet3d/datasets/s3dis_dataset.py index 185f35ad2d..51cead6da3 100644 --- a/mmdet3d/datasets/s3dis_dataset.py +++ b/mmdet3d/datasets/s3dis_dataset.py @@ -2,14 +2,14 @@ from os import path as osp from mmdet3d.core import show_seg_result +from mmdet3d.core.bbox import DepthInstance3DBoxes from mmdet.datasets import DATASETS from mmseg.datasets import DATASETS as SEG_DATASETS +from .custom_3d import Custom3DDataset from .custom_3d_seg import Custom3DSegDataset from .pipelines import Compose -@DATASETS.register_module() -@SEG_DATASETS.register_module() class _S3DISSegDataset(Custom3DSegDataset): r"""S3DIS Dataset for Semantic Segmentation Task. @@ -237,23 +237,14 @@ def __init__(self, ] # data_infos and scene_idxs need to be concat - self.concat_data_infos([dst.data_infos for dst in datasets]) + self.data_infos = concat_data_infos( + [dst.data_infos for dst in datasets]) self.concat_scene_idxs([dst.scene_idxs for dst in datasets]) # set group flag for the sampler if not self.test_mode: self._set_group_flag() - def concat_data_infos(self, data_infos): - """Concat data_infos from several datasets to form self.data_infos. - - Args: - data_infos (list[list[dict]]) - """ - self.data_infos = [ - info for one_data_infos in data_infos for info in one_data_infos - ] - def concat_scene_idxs(self, scene_idxs): """Concat scene_idxs from several datasets to form self.scene_idxs. @@ -294,3 +285,160 @@ def _check_scene_idxs(self, scene_idx, num): return scene_idx # single idx return self._duplicate_to_list(scene_idx, num) + + +class _S3DISDataset(Custom3DDataset): + """S3DIS Dataset for Detection Task.""" + CLASSES = ('table', 'chair', 'sofa', 'bookcase', 'board') + + def __init__(self, + data_root, + ann_file, + pipeline=None, + classes=None, + modality=None, + box_type_3d='Depth', + filter_empty_gt=True, + test_mode=False): + super().__init__( + data_root=data_root, + ann_file=ann_file, + pipeline=pipeline, + classes=classes, + modality=modality, + box_type_3d=box_type_3d, + filter_empty_gt=filter_empty_gt, + test_mode=test_mode) + + def get_ann_info(self, index): + """Get annotation info according to the given index. + + Args: + index (int): Index of the annotation data to get. + + Returns: + dict: annotation information consists of the following keys: + + - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ + 3D ground truth bboxes + - gt_labels_3d (np.ndarray): Labels of ground truths. + - pts_instance_mask_path (str): Path of instance masks. + - pts_semantic_mask_path (str): Path of semantic masks. + """ + # Use index to get the annos, thus the evalhook could also use this api + info = self.data_infos[index] + if info['annos']['gt_num'] != 0: + gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype( + np.float32) # k, 6 + gt_labels_3d = info['annos']['class'].astype(np.long) + else: + gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32) + gt_labels_3d = np.zeros((0, ), dtype=np.long) + + # to target box structure + gt_bboxes_3d = DepthInstance3DBoxes( + gt_bboxes_3d, + box_dim=gt_bboxes_3d.shape[-1], + with_yaw=False, + origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) + + pts_instance_mask_path = osp.join(self.data_root, + info['pts_instance_mask_path']) + pts_semantic_mask_path = osp.join(self.data_root, + info['pts_semantic_mask_path']) + + anns_results = dict( + gt_bboxes_3d=gt_bboxes_3d, + gt_labels_3d=gt_labels_3d, + pts_instance_mask_path=pts_instance_mask_path, + pts_semantic_mask_path=pts_semantic_mask_path) + return anns_results + + def get_data_info(self, index): + """Get data info according to the given index. + + Args: + index (int): Index of the sample data to get. + + Returns: + dict: Data information that will be passed to the data \ + preprocessing pipelines. It includes the following keys: + + - pts_filename (str): Filename of point clouds. + - file_name (str): Filename of point clouds. + - ann_info (dict): Annotation info. + """ + info = self.data_infos[index] + pts_filename = osp.join(self.data_root, info['pts_path']) + input_dict = dict(pts_filename=pts_filename) + + if not self.test_mode: + annos = self.get_ann_info(index) + input_dict['ann_info'] = annos + if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any(): + return None + return input_dict + + def _build_default_pipeline(self): + """Build the default pipeline for this dataset.""" + pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='DefaultFormatBundle3D', + class_names=self.CLASSES, + with_label=False), + dict(type='Collect3D', keys=['points']) + ] + return Compose(pipeline) + + +@DATASETS.register_module() +class S3DISDataset(_S3DISDataset): + """S3DIS Dataset for Detection Task.""" + + def __init__(self, + data_root, + ann_files, + pipeline=None, + classes=None, + modality=None, + box_type_3d='Depth', + filter_empty_gt=True, + test_mode=False): + super().__init__( + data_root=data_root, + ann_file=ann_files[0], + pipeline=pipeline, + classes=classes, + modality=modality, + box_type_3d=box_type_3d, + filter_empty_gt=filter_empty_gt, + test_mode=test_mode) + + datasets = [ + _S3DISDataset( + data_root=data_root, + ann_file=ann_files[i], + pipeline=pipeline, + classes=classes, + modality=modality, + box_type_3d=box_type_3d, + filter_empty_gt=filter_empty_gt, + test_mode=test_mode) for i in range(len(ann_files)) + ] + self.data_infos = concat_data_infos( + [dst.data_infos for dst in datasets]) + + +def concat_data_infos(data_infos): + """Concat data_infos from several datasets to form self.data_infos. + + Args: + data_infos (list[list[dict]]) + """ + return [info for one_data_infos in data_infos for info in one_data_infos] diff --git a/tools/data_converter/s3dis_data_utils.py b/tools/data_converter/s3dis_data_utils.py index b9b809127e..ce92098ab3 100644 --- a/tools/data_converter/s3dis_data_utils.py +++ b/tools/data_converter/s3dis_data_utils.py @@ -24,13 +24,21 @@ def __init__(self, root_path, split='Area_1'): 'ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter' ] - self.cat2label = {cat: self.classes.index(cat) for cat in self.classes} - self.label2cat = {self.cat2label[t]: t for t in self.cat2label} + + # Use all 13 classes for segmentation. self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) self.cat_ids2class = { cat_id: i for i, cat_id in enumerate(list(self.cat_ids)) } + # Following https://arxiv.org/abs/2006.12356, use 5 furniture classes + # for detection: table, chair, sofa, bookcase, board. + self.furniture_cat_ids = np.array([7, 8, 9, 10, 11]) + self.furniture_cat_ids2class = { + cat_id: i + for i, cat_id in enumerate(list(self.furniture_cat_ids)) + } + assert split in [ 'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6' ] @@ -99,15 +107,51 @@ def process_single_scene(sample_idx): 'instance_mask', f'{self.split}_{sample_idx}.bin') info['pts_semantic_mask_path'] = osp.join( 'semantic_mask', f'{self.split}_{sample_idx}.bin') + info['annos'] = self.get_bboxes(points, pts_instance_mask, + pts_semantic_mask) return info sample_id_list = sample_id_list if sample_id_list is not None \ else self.sample_id_list - with futures.ThreadPoolExecutor(num_workers) as executor: + with futures.ThreadPoolExecutor(1) as executor: infos = executor.map(process_single_scene, sample_id_list) return list(infos) + def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask): + """Convert instance masks to axis-aligned bounding boxes. + + Args: + points (np.array): Scene points of shape (n, 6). + pts_instance_mask (np.array): Instance labels of shape (n,). + pts_semantic_mask (np.array): Semantic labels of shape (n,). + + Returns: + infos (dict): Bounding boxes with labels. + """ + bboxes, labels = [], [] + for i in range(1, pts_instance_mask.max()): + ids = pts_instance_mask == i + # check if all instance points have same semantic label + assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max() + label = pts_semantic_mask[ids][0] + # keep only furniture objects + if label in self.furniture_cat_ids2class: + labels.append( + self.furniture_cat_ids2class[pts_semantic_mask[ids][0]]) + pts = points[:, :3][ids] + min_pts = pts.min(axis=0) + max_pts = pts.max(axis=0) + locations = (min_pts + max_pts) / 2 + dimensions = max_pts - min_pts + bboxes.append(np.concatenate((locations, dimensions))) + annotation = dict() + # follow ScanNet and SUN RGB-D keys + annotation['gt_boxes_upright_depth'] = np.array(bboxes) + annotation['class'] = np.array(labels) + annotation['gt_num'] = len(labels) + return annotation + class S3DISSegData(object): """S3DIS dataset used to generate infos for semantic segmentation task. @@ -191,7 +235,7 @@ def get_scene_idxs_and_label_weight(self): num_iter = int(np.sum(num_point_all) / float(self.num_points)) scene_idxs = [] for idx in range(len(self.data_infos)): - scene_idxs.extend([idx] * round(sample_prob[idx] * num_iter)) + scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter))) scene_idxs = np.array(scene_idxs).astype(np.int32) # calculate label weight, adopted from PointNet++ diff --git a/tools/misc/browse_dataset.py b/tools/misc/browse_dataset.py index 007883d837..397bb9e18f 100644 --- a/tools/misc/browse_dataset.py +++ b/tools/misc/browse_dataset.py @@ -200,7 +200,7 @@ def main(): data_path = data_info['point_cloud']['velodyne_path'] elif dataset_type in [ 'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset', - 'S3DISSegDataset' + 'S3DISSegDataset', 'S3DISDataset' ]: data_path = data_info['pts_path'] elif dataset_type in ['NuScenesDataset', 'LyftDataset']: From 20bb60e59eae4a600119c1fe353350204a84d9a3 Mon Sep 17 00:00:00 2001 From: Danila Rukhovich Date: Thu, 5 Aug 2021 10:13:26 +0300 Subject: [PATCH 2/8] fix comments; remove _S3DISDataset if favour of ConcatDataset --- configs/_base_/datasets/s3dis-3d-5class.py | 23 +- mmdet3d/datasets/s3dis_dataset.py | 310 ++++++++++----------- tools/data_converter/s3dis_data_utils.py | 34 +-- tools/misc/browse_dataset.py | 5 + 4 files changed, 183 insertions(+), 189 deletions(-) diff --git a/configs/_base_/datasets/s3dis-3d-5class.py b/configs/_base_/datasets/s3dis-3d-5class.py index f45f9d71c5..b17f20ba95 100644 --- a/configs/_base_/datasets/s3dis-3d-5class.py +++ b/configs/_base_/datasets/s3dis-3d-5class.py @@ -81,19 +81,22 @@ type='RepeatDataset', times=5, dataset=dict( - type=dataset_type, - data_root=data_root, - ann_files=[ - data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area + type='ConcatDataset', + datasets=[ + dict( + type=dataset_type, + data_root=data_root, + ann_file=data_root + f's3dis_infos_Area_{i}.pkl', + pipeline=train_pipeline, + filter_empty_gt=False, + classes=class_names, + box_type_3d='Depth') for i in train_area ], - pipeline=train_pipeline, - filter_empty_gt=False, - classes=class_names, - box_type_3d='Depth')), + separate_eval=False)), val=dict( type=dataset_type, data_root=data_root, - ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', + ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', pipeline=test_pipeline, classes=class_names, test_mode=True, @@ -101,7 +104,7 @@ test=dict( type=dataset_type, data_root=data_root, - ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', + ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', pipeline=test_pipeline, classes=class_names, test_mode=True, diff --git a/mmdet3d/datasets/s3dis_dataset.py b/mmdet3d/datasets/s3dis_dataset.py index 51cead6da3..e4d2efd7b8 100644 --- a/mmdet3d/datasets/s3dis_dataset.py +++ b/mmdet3d/datasets/s3dis_dataset.py @@ -10,6 +10,146 @@ from .pipelines import Compose +@DATASETS.register_module() +class S3DISDataset(Custom3DDataset): + """S3DIS Dataset for Detection Task. + + This class is the inner dataset for S3DIS. Since S3DIS has 6 areas, we + often train on 5 of them and test on the remaining one. The one for + test is Area_5 as suggested in `GSDN `_. + To concatenate 5 areas during training + `mmdet.datasets.dataset_wrappers.ConcatDataset` should be used. + + Args: + data_root (str): Path of dataset root. + ann_file (str): Path of annotation file. + pipeline (list[dict], optional): Pipeline used for data processing. + Defaults to None. + classes (tuple[str], optional): Classes used in the dataset. + Defaults to None. + modality (dict, optional): Modality to specify the sensor data used + as input. Defaults to None. + box_type_3d (str, optional): Type of 3D box of this dataset. + Based on the `box_type_3d`, the dataset will encapsulate the box + to its original format then converted them to `box_type_3d`. + Defaults to 'Depth' in this dataset. Available options includes + + - 'LiDAR': Box in LiDAR coordinates. + - 'Depth': Box in depth coordinates, usually for indoor dataset. + - 'Camera': Box in camera coordinates. + filter_empty_gt (bool, optional): Whether to filter empty GT. + Defaults to True. + test_mode (bool, optional): Whether the dataset is in test mode. + Defaults to False. + """ + CLASSES = ('table', 'chair', 'sofa', 'bookcase', 'board') + + def __init__(self, + data_root, + ann_file, + pipeline=None, + classes=None, + modality=None, + box_type_3d='Depth', + filter_empty_gt=True, + test_mode=False): + super().__init__( + data_root=data_root, + ann_file=ann_file, + pipeline=pipeline, + classes=classes, + modality=modality, + box_type_3d=box_type_3d, + filter_empty_gt=filter_empty_gt, + test_mode=test_mode) + + def get_ann_info(self, index): + """Get annotation info according to the given index. + + Args: + index (int): Index of the annotation data to get. + + Returns: + dict: annotation information consists of the following keys: + + - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ + 3D ground truth bboxes + - gt_labels_3d (np.ndarray): Labels of ground truths. + - pts_instance_mask_path (str): Path of instance masks. + - pts_semantic_mask_path (str): Path of semantic masks. + """ + # Use index to get the annos, thus the evalhook could also use this api + info = self.data_infos[index] + if info['annos']['gt_num'] != 0: + gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype( + np.float32) # k, 6 + gt_labels_3d = info['annos']['class'].astype(np.long) + else: + gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32) + gt_labels_3d = np.zeros((0, ), dtype=np.long) + + # to target box structure + gt_bboxes_3d = DepthInstance3DBoxes( + gt_bboxes_3d, + box_dim=gt_bboxes_3d.shape[-1], + with_yaw=False, + origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) + + pts_instance_mask_path = osp.join(self.data_root, + info['pts_instance_mask_path']) + pts_semantic_mask_path = osp.join(self.data_root, + info['pts_semantic_mask_path']) + + anns_results = dict( + gt_bboxes_3d=gt_bboxes_3d, + gt_labels_3d=gt_labels_3d, + pts_instance_mask_path=pts_instance_mask_path, + pts_semantic_mask_path=pts_semantic_mask_path) + return anns_results + + def get_data_info(self, index): + """Get data info according to the given index. + + Args: + index (int): Index of the sample data to get. + + Returns: + dict: Data information that will be passed to the data \ + preprocessing pipelines. It includes the following keys: + + - pts_filename (str): Filename of point clouds. + - file_name (str): Filename of point clouds. + - ann_info (dict): Annotation info. + """ + info = self.data_infos[index] + pts_filename = osp.join(self.data_root, info['pts_path']) + input_dict = dict(pts_filename=pts_filename) + + if not self.test_mode: + annos = self.get_ann_info(index) + input_dict['ann_info'] = annos + if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any(): + return None + return input_dict + + def _build_default_pipeline(self): + """Build the default pipeline for this dataset.""" + pipeline = [ + dict( + type='LoadPointsFromFile', + coord_type='DEPTH', + shift_height=False, + load_dim=6, + use_dim=[0, 1, 2, 3, 4, 5]), + dict( + type='DefaultFormatBundle3D', + class_names=self.CLASSES, + with_label=False), + dict(type='Collect3D', keys=['points']) + ] + return Compose(pipeline) + + class _S3DISSegDataset(Custom3DSegDataset): r"""S3DIS Dataset for Semantic Segmentation Task. @@ -237,14 +377,23 @@ def __init__(self, ] # data_infos and scene_idxs need to be concat - self.data_infos = concat_data_infos( - [dst.data_infos for dst in datasets]) + self.concat_data_infos([dst.data_infos for dst in datasets]) self.concat_scene_idxs([dst.scene_idxs for dst in datasets]) # set group flag for the sampler if not self.test_mode: self._set_group_flag() + def concat_data_infos(self, data_infos): + """Concat data_infos from several datasets to form self.data_infos. + + Args: + data_infos (list[list[dict]]) + """ + self.data_infos = [ + info for one_data_infos in data_infos for info in one_data_infos + ] + def concat_scene_idxs(self, scene_idxs): """Concat scene_idxs from several datasets to form self.scene_idxs. @@ -285,160 +434,3 @@ def _check_scene_idxs(self, scene_idx, num): return scene_idx # single idx return self._duplicate_to_list(scene_idx, num) - - -class _S3DISDataset(Custom3DDataset): - """S3DIS Dataset for Detection Task.""" - CLASSES = ('table', 'chair', 'sofa', 'bookcase', 'board') - - def __init__(self, - data_root, - ann_file, - pipeline=None, - classes=None, - modality=None, - box_type_3d='Depth', - filter_empty_gt=True, - test_mode=False): - super().__init__( - data_root=data_root, - ann_file=ann_file, - pipeline=pipeline, - classes=classes, - modality=modality, - box_type_3d=box_type_3d, - filter_empty_gt=filter_empty_gt, - test_mode=test_mode) - - def get_ann_info(self, index): - """Get annotation info according to the given index. - - Args: - index (int): Index of the annotation data to get. - - Returns: - dict: annotation information consists of the following keys: - - - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ - 3D ground truth bboxes - - gt_labels_3d (np.ndarray): Labels of ground truths. - - pts_instance_mask_path (str): Path of instance masks. - - pts_semantic_mask_path (str): Path of semantic masks. - """ - # Use index to get the annos, thus the evalhook could also use this api - info = self.data_infos[index] - if info['annos']['gt_num'] != 0: - gt_bboxes_3d = info['annos']['gt_boxes_upright_depth'].astype( - np.float32) # k, 6 - gt_labels_3d = info['annos']['class'].astype(np.long) - else: - gt_bboxes_3d = np.zeros((0, 6), dtype=np.float32) - gt_labels_3d = np.zeros((0, ), dtype=np.long) - - # to target box structure - gt_bboxes_3d = DepthInstance3DBoxes( - gt_bboxes_3d, - box_dim=gt_bboxes_3d.shape[-1], - with_yaw=False, - origin=(0.5, 0.5, 0.5)).convert_to(self.box_mode_3d) - - pts_instance_mask_path = osp.join(self.data_root, - info['pts_instance_mask_path']) - pts_semantic_mask_path = osp.join(self.data_root, - info['pts_semantic_mask_path']) - - anns_results = dict( - gt_bboxes_3d=gt_bboxes_3d, - gt_labels_3d=gt_labels_3d, - pts_instance_mask_path=pts_instance_mask_path, - pts_semantic_mask_path=pts_semantic_mask_path) - return anns_results - - def get_data_info(self, index): - """Get data info according to the given index. - - Args: - index (int): Index of the sample data to get. - - Returns: - dict: Data information that will be passed to the data \ - preprocessing pipelines. It includes the following keys: - - - pts_filename (str): Filename of point clouds. - - file_name (str): Filename of point clouds. - - ann_info (dict): Annotation info. - """ - info = self.data_infos[index] - pts_filename = osp.join(self.data_root, info['pts_path']) - input_dict = dict(pts_filename=pts_filename) - - if not self.test_mode: - annos = self.get_ann_info(index) - input_dict['ann_info'] = annos - if self.filter_empty_gt and ~(annos['gt_labels_3d'] != -1).any(): - return None - return input_dict - - def _build_default_pipeline(self): - """Build the default pipeline for this dataset.""" - pipeline = [ - dict( - type='LoadPointsFromFile', - coord_type='DEPTH', - shift_height=False, - load_dim=6, - use_dim=[0, 1, 2, 3, 4, 5]), - dict( - type='DefaultFormatBundle3D', - class_names=self.CLASSES, - with_label=False), - dict(type='Collect3D', keys=['points']) - ] - return Compose(pipeline) - - -@DATASETS.register_module() -class S3DISDataset(_S3DISDataset): - """S3DIS Dataset for Detection Task.""" - - def __init__(self, - data_root, - ann_files, - pipeline=None, - classes=None, - modality=None, - box_type_3d='Depth', - filter_empty_gt=True, - test_mode=False): - super().__init__( - data_root=data_root, - ann_file=ann_files[0], - pipeline=pipeline, - classes=classes, - modality=modality, - box_type_3d=box_type_3d, - filter_empty_gt=filter_empty_gt, - test_mode=test_mode) - - datasets = [ - _S3DISDataset( - data_root=data_root, - ann_file=ann_files[i], - pipeline=pipeline, - classes=classes, - modality=modality, - box_type_3d=box_type_3d, - filter_empty_gt=filter_empty_gt, - test_mode=test_mode) for i in range(len(ann_files)) - ] - self.data_infos = concat_data_infos( - [dst.data_infos for dst in datasets]) - - -def concat_data_infos(data_infos): - """Concat data_infos from several datasets to form self.data_infos. - - Args: - data_infos (list[list[dict]]) - """ - return [info for one_data_infos in data_infos for info in one_data_infos] diff --git a/tools/data_converter/s3dis_data_utils.py b/tools/data_converter/s3dis_data_utils.py index ce92098ab3..f144692847 100644 --- a/tools/data_converter/s3dis_data_utils.py +++ b/tools/data_converter/s3dis_data_utils.py @@ -20,24 +20,14 @@ def __init__(self, root_path, split='Area_1'): self.split = split self.data_dir = osp.join(root_path, 'Stanford3dDataset_v1.2_Aligned_Version') - self.classes = [ - 'ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', - 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter' - ] - # Use all 13 classes for segmentation. - self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) + # Following `GSDN `_, use 5 furniture + # classes for detection: table, chair, sofa, bookcase, board. + self.cat_ids = np.array([7, 8, 9, 10, 11]) self.cat_ids2class = { cat_id: i for i, cat_id in enumerate(list(self.cat_ids)) } - # Following https://arxiv.org/abs/2006.12356, use 5 furniture classes - # for detection: table, chair, sofa, bookcase, board. - self.furniture_cat_ids = np.array([7, 8, 9, 10, 11]) - self.furniture_cat_ids2class = { - cat_id: i - for i, cat_id in enumerate(list(self.furniture_cat_ids)) - } assert split in [ 'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6' @@ -114,7 +104,7 @@ def process_single_scene(sample_idx): sample_id_list = sample_id_list if sample_id_list is not None \ else self.sample_id_list - with futures.ThreadPoolExecutor(1) as executor: + with futures.ThreadPoolExecutor(num_workers) as executor: infos = executor.map(process_single_scene, sample_id_list) return list(infos) @@ -123,11 +113,16 @@ def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask): Args: points (np.array): Scene points of shape (n, 6). - pts_instance_mask (np.array): Instance labels of shape (n,). - pts_semantic_mask (np.array): Semantic labels of shape (n,). + pts_instance_mask (np.ndarray): Instance labels of shape (n,). + pts_semantic_mask (np.ndarray): Semantic labels of shape (n,). Returns: - infos (dict): Bounding boxes with labels. + dict: A dict containing detection infos with following keys: + + - gt_boxes_upright_depth (np.ndarray): Bounding boxes + of shape (n, 6) + - class (np.ndarray): Box labels of shape (n,) + - gt_num (int): Number of boxes. """ bboxes, labels = [], [] for i in range(1, pts_instance_mask.max()): @@ -136,9 +131,8 @@ def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask): assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max() label = pts_semantic_mask[ids][0] # keep only furniture objects - if label in self.furniture_cat_ids2class: - labels.append( - self.furniture_cat_ids2class[pts_semantic_mask[ids][0]]) + if label in self.cat_ids2class: + labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]]) pts = points[:, :3][ids] min_pts = pts.min(axis=0) max_pts = pts.max(axis=0) diff --git a/tools/misc/browse_dataset.py b/tools/misc/browse_dataset.py index 397bb9e18f..3e591d4e4e 100644 --- a/tools/misc/browse_dataset.py +++ b/tools/misc/browse_dataset.py @@ -9,6 +9,7 @@ from mmdet3d.core.visualizer import (show_multi_modality_result, show_result, show_seg_result) from mmdet3d.datasets import build_dataset +from mmdet.datasets.dataset_wrappers import ConcatDataset def parse_args(): @@ -189,6 +190,10 @@ def main(): cfg.data.train, default_args=dict(filter_empty_gt=False)) except TypeError: # seg dataset doesn't have `filter_empty_gt` key dataset = build_dataset(cfg.data.train) + + # use only first dataset for ConcatDataset + if isinstance(dataset, ConcatDataset): + dataset = dataset.datasets[0] data_infos = dataset.data_infos dataset_type = cfg.dataset_type From 7effad77e53e3c8e84a2b019fe6ff6646113fd83 Mon Sep 17 00:00:00 2001 From: Danila Rukhovich Date: Thu, 5 Aug 2021 11:19:08 +0300 Subject: [PATCH 3/8] fix comments --- configs/_base_/datasets/s3dis-3d-5class.py | 1 + mmdet3d/datasets/s3dis_dataset.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/configs/_base_/datasets/s3dis-3d-5class.py b/configs/_base_/datasets/s3dis-3d-5class.py index b17f20ba95..e4813ca1b5 100644 --- a/configs/_base_/datasets/s3dis-3d-5class.py +++ b/configs/_base_/datasets/s3dis-3d-5class.py @@ -21,6 +21,7 @@ flip_ratio_bev_vertical=0.5), dict( type='GlobalRotScaleTrans', + # following ScanNet dataset the rotation range is 5 degrees rot_range=[-0.087266, 0.087266], scale_ratio_range=[1.0, 1.0], shift_height=True), diff --git a/mmdet3d/datasets/s3dis_dataset.py b/mmdet3d/datasets/s3dis_dataset.py index e4d2efd7b8..76bc0d6853 100644 --- a/mmdet3d/datasets/s3dis_dataset.py +++ b/mmdet3d/datasets/s3dis_dataset.py @@ -72,7 +72,7 @@ def get_ann_info(self, index): Returns: dict: annotation information consists of the following keys: - - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ + - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): 3D ground truth bboxes - gt_labels_3d (np.ndarray): Labels of ground truths. - pts_instance_mask_path (str): Path of instance masks. @@ -114,7 +114,7 @@ def get_data_info(self, index): index (int): Index of the sample data to get. Returns: - dict: Data information that will be passed to the data \ + dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: - pts_filename (str): Filename of point clouds. @@ -175,7 +175,7 @@ class _S3DISSegDataset(Custom3DSegDataset): as input. Defaults to None. test_mode (bool, optional): Whether the dataset is in test mode. Defaults to False. - ignore_index (int, optional): The label index to be ignored, e.g. \ + ignore_index (int, optional): The label index to be ignored, e.g. unannotated points. If None is given, set to len(self.CLASSES). Defaults to None. scene_idxs (np.ndarray | str, optional): Precomputed index to load @@ -328,7 +328,7 @@ class S3DISSegDataset(_S3DISSegDataset): as input. Defaults to None. test_mode (bool, optional): Whether the dataset is in test mode. Defaults to False. - ignore_index (int, optional): The label index to be ignored, e.g. \ + ignore_index (int, optional): The label index to be ignored, e.g. unannotated points. If None is given, set to len(self.CLASSES). Defaults to None. scene_idxs (list[np.ndarray] | list[str], optional): Precomputed index From 08330842dbfa42ad64a99eccce9bd91412b64ea4 Mon Sep 17 00:00:00 2001 From: Danila Rukhovich Date: Thu, 5 Aug 2021 11:45:25 +0300 Subject: [PATCH 4/8] fix comments --- configs/_base_/datasets/s3dis-3d-5class.py | 4 ++-- mmdet3d/datasets/s3dis_dataset.py | 2 +- tools/misc/browse_dataset.py | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/configs/_base_/datasets/s3dis-3d-5class.py b/configs/_base_/datasets/s3dis-3d-5class.py index e4813ca1b5..2422766fa3 100644 --- a/configs/_base_/datasets/s3dis-3d-5class.py +++ b/configs/_base_/datasets/s3dis-3d-5class.py @@ -13,7 +13,7 @@ load_dim=6, use_dim=[0, 1, 2, 3, 4, 5]), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), - dict(type='IndoorPointSample', num_points=40000), + dict(type='PointSample', num_points=40000), dict( type='RandomFlip3D', sync_2d=False, @@ -51,7 +51,7 @@ sync_2d=False, flip_ratio_bev_horizontal=0.5, flip_ratio_bev_vertical=0.5), - dict(type='IndoorPointSample', num_points=40000), + dict(type='PointSample', num_points=40000), dict( type='DefaultFormatBundle3D', class_names=class_names, diff --git a/mmdet3d/datasets/s3dis_dataset.py b/mmdet3d/datasets/s3dis_dataset.py index 76bc0d6853..69ac95e798 100644 --- a/mmdet3d/datasets/s3dis_dataset.py +++ b/mmdet3d/datasets/s3dis_dataset.py @@ -12,7 +12,7 @@ @DATASETS.register_module() class S3DISDataset(Custom3DDataset): - """S3DIS Dataset for Detection Task. + r"""S3DIS Dataset for Detection Task. This class is the inner dataset for S3DIS. Since S3DIS has 6 areas, we often train on 5 of them and test on the remaining one. The one for diff --git a/tools/misc/browse_dataset.py b/tools/misc/browse_dataset.py index 3e591d4e4e..5aa592874d 100644 --- a/tools/misc/browse_dataset.py +++ b/tools/misc/browse_dataset.py @@ -63,6 +63,9 @@ def build_data_cfg(config_path, skip_type, cfg_options): # so we don't need to worry about it later if cfg.data.train['type'] == 'RepeatDataset': cfg.data.train = cfg.data.train.dataset + # use only first dataset for `ConcatDataset` + if cfg.data.train['type'] == 'ConcatDataset': + cfg.data.train = cfg.data.train.datasets[0] train_data_cfg = cfg.data.train # eval_pipeline purely consists of loading functions # use eval_pipeline for data loading @@ -190,10 +193,6 @@ def main(): cfg.data.train, default_args=dict(filter_empty_gt=False)) except TypeError: # seg dataset doesn't have `filter_empty_gt` key dataset = build_dataset(cfg.data.train) - - # use only first dataset for ConcatDataset - if isinstance(dataset, ConcatDataset): - dataset = dataset.datasets[0] data_infos = dataset.data_infos dataset_type = cfg.dataset_type From ea83da94d0b4d08626b7b9f3b1ce280cf78a9417 Mon Sep 17 00:00:00 2001 From: Danila Rukhovich Date: Thu, 5 Aug 2021 11:56:59 +0300 Subject: [PATCH 5/8] fix link in __init__/py --- mmdet3d/datasets/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mmdet3d/datasets/__init__.py b/mmdet3d/datasets/__init__.py index 3ea2998417..2fcd650a1d 100644 --- a/mmdet3d/datasets/__init__.py +++ b/mmdet3d/datasets/__init__.py @@ -7,14 +7,17 @@ from .lyft_dataset import LyftDataset from .nuscenes_dataset import NuScenesDataset from .nuscenes_mono_dataset import NuScenesMonoDataset +# yapf: disable from .pipelines import (BackgroundPointsFilter, GlobalAlignment, GlobalRotScaleTrans, IndoorPatchPointSample, IndoorPointSample, LoadAnnotations3D, LoadPointsFromFile, LoadPointsFromMultiSweeps, NormalizePointsColor, ObjectNameFilter, ObjectNoise, - ObjectRangeFilter, ObjectSample, PointShuffle, - PointsRangeFilter, RandomDropPointsColor, RandomFlip3D, - RandomJitterPoints, VoxelBasedPointSampler, PointSample) + ObjectRangeFilter, ObjectSample, PointSample, + PointShuffle, PointsRangeFilter, RandomDropPointsColor, + RandomFlip3D, RandomJitterPoints, + VoxelBasedPointSampler) +# yapf: enable from .s3dis_dataset import S3DISDataset, S3DISSegDataset from .scannet_dataset import ScanNetDataset, ScanNetSegDataset from .semantickitti_dataset import SemanticKITTIDataset From f6cf4719fee9a35fdb1c43f0d00df7adb60beb62 Mon Sep 17 00:00:00 2001 From: Danila Rukhovich Date: Thu, 5 Aug 2021 14:05:04 +0300 Subject: [PATCH 6/8] remove unused import --- tools/misc/browse_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/misc/browse_dataset.py b/tools/misc/browse_dataset.py index 5aa592874d..8fece0866b 100644 --- a/tools/misc/browse_dataset.py +++ b/tools/misc/browse_dataset.py @@ -9,7 +9,6 @@ from mmdet3d.core.visualizer import (show_multi_modality_result, show_result, show_seg_result) from mmdet3d.datasets import build_dataset -from mmdet.datasets.dataset_wrappers import ConcatDataset def parse_args(): From 513c351d79720cf3970ad412a25e6063482c3921 Mon Sep 17 00:00:00 2001 From: Danila Rukhovich Date: Fri, 6 Aug 2021 19:09:17 +0300 Subject: [PATCH 7/8] add dataset tests --- tests/data/s3dis/s3dis_infos.pkl | Bin 280 -> 1013 bytes .../test_datasets/test_s3dis_dataset.py | 60 +++++++++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/tests/data/s3dis/s3dis_infos.pkl b/tests/data/s3dis/s3dis_infos.pkl index 668e277cf44163c5b06f65e73677be654e30accc..99ce10824ffc0e2753fc4deaa57738ed72d8db0f 100644 GIT binary patch literal 1013 zcmaKq+fUR`5QmrB!Y)?@5do3qqN~Uvh>7viQ%z7}m524ghD2(&+ojdr?oKZRVu(Nl zgcyUy8xk?7NFYXu4@%UhGk=(#c7ccz=b@)_X685NoNvZ*Z-Q?Wa>8P<&*#geb;FjF zWZH>C?kMDi#S*q_I4LQi%C=*w7UTO8(1BGB}Zbq0Yzf2|%`Rd(iXDWCY*x@Rm1%W zLZz6)TF0PD^s_Z7+q%&S`=+2e3j3#_CJG0_LXHp+{8Lc7DH*p%GOV&r%w-jcZm1W0 zLe4ZCj6y?L$n*5h!J)pszAL|&c^|=NLt|`4s}w`5-6i_9l8@4jTE2*WEFn07vogU4 zLj#1qG`a|V+ta`o((zM-zR#2rx^8NOzUvtx_z1t9B=!Atjqi`+503t{^H`m5gAC(p zg0%W&Lg(rH2#<>`xJX!sG9t#$k~4HBL~h}7hR_dmp_?cbI7i?-u5mT_L*#CipT}jK zyGXboz9GH1gcAhsH_|x{pIpl&_>eBqXQYiz<8zLFy6o`xZeJx$xPbE9? ziQhdjuG1AdMF!}1T;*^aXK|dvr|fv#b-ICzTnbm}NLf35#V2v4gQu_YGkrlC>6e2X z-Dto`t{E2sJcKlcpeYK?HnhaN;ZTyYWkIV}p?M$=YmCm3D1<#alW&=vw0}XgdmwrlJQuxdTWem{-S%C;|Ff-ph`8tNh{6dQI%D1>vEgN73SC0LapcHv D#1K}b delta 11 Scmey$K7(n4C?j)eq8 Date: Fri, 6 Aug 2021 20:29:10 +0300 Subject: [PATCH 8/8] add pytest.skip --- tests/test_data/test_datasets/test_s3dis_dataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_data/test_datasets/test_s3dis_dataset.py b/tests/test_data/test_datasets/test_s3dis_dataset.py index 3200dfc174..dd037d25c0 100644 --- a/tests/test_data/test_datasets/test_s3dis_dataset.py +++ b/tests/test_data/test_datasets/test_s3dis_dataset.py @@ -42,6 +42,8 @@ def test_getitem(): def test_evaluate(): + if not torch.cuda.is_available(): + pytest.skip() from mmdet3d.core.bbox.structures import DepthInstance3DBoxes root_path = './tests/data/s3dis' ann_file = './tests/data/s3dis/s3dis_infos.pkl'