diff --git a/docs/datasets/scannet_det.md b/docs/datasets/scannet_det.md index 3d775552e1..6e35dfd9d4 100644 --- a/docs/datasets/scannet_det.md +++ b/docs/datasets/scannet_det.md @@ -110,8 +110,8 @@ def export(mesh_file, instance_ids[verts] = object_id if object_id not in object_id_to_label_id: object_id_to_label_id[object_id] = label_ids[verts][0] - # bbox format is [x, y, z, dx, dy, dz, label_id] - # [x, y, z] is gravity center of bbox, [dx, dy, dz] is axis-aligned + # bbox format is [x, y, z, x_size, y_size, z_size, label_id] + # [x, y, z] is gravity center of bbox, [x_size, y_size, z_size] is axis-aligned # [label_id] is semantic label id in 'nyu40id' standard # Note: since 3D bbox is axis-aligned, the yaw is 0. unaligned_bboxes = extract_bbox(mesh_vertices, object_id_to_segs, diff --git a/mmdet3d/apis/inference.py b/mmdet3d/apis/inference.py index bb500b51ce..bd2dcdb385 100644 --- a/mmdet3d/apis/inference.py +++ b/mmdet3d/apis/inference.py @@ -460,15 +460,17 @@ def show_result_meshlab(data, data (dict): Contain data from pipeline. result (dict): Predicted result from model. out_dir (str): Directory to save visualized result. - score_thr (float): Minimum score of bboxes to be shown. Default: 0.0 - show (bool): Visualize the results online. Defaults to False. - snapshot (bool): Whether to save the online results. Defaults to False. - task (str): Distinguish which task result to visualize. Currently we - support 3D detection, multi-modality detection and 3D segmentation. - Defaults to 'det'. - palette (list[list[int]]] | np.ndarray | None): The palette of - segmentation map. If None is given, random palette will be - generated. Defaults to None. + score_thr (float, optional): Minimum score of bboxes to be shown. + Default: 0.0 + show (bool, optional): Visualize the results online. Defaults to False. + snapshot (bool, optional): Whether to save the online results. + Defaults to False. + task (str, optional): Distinguish which task result to visualize. + Currently we support 3D detection, multi-modality detection and + 3D segmentation. Defaults to 'det'. + palette (list[list[int]]] | np.ndarray, optional): The palette + of segmentation map. If None is given, random palette will be + generated. Defaults to None. """ assert task in ['det', 'multi_modality-det', 'seg', 'mono-det'], \ f'unsupported visualization task {task}' diff --git a/mmdet3d/apis/test.py b/mmdet3d/apis/test.py index 0c6d7b3325..bb899b93eb 100644 --- a/mmdet3d/apis/test.py +++ b/mmdet3d/apis/test.py @@ -22,9 +22,9 @@ def single_gpu_test(model, Args: model (nn.Module): Model to be tested. data_loader (nn.Dataloader): Pytorch data loader. - show (bool): Whether to save viualization results. + show (bool, optional): Whether to save viualization results. Default: True. - out_dir (str): The path to save visualization results. + out_dir (str, optional): The path to save visualization results. Default: None. Returns: diff --git a/mmdet3d/core/anchor/anchor_3d_generator.py b/mmdet3d/core/anchor/anchor_3d_generator.py index 5717eb7fb1..aa9255b08a 100644 --- a/mmdet3d/core/anchor/anchor_3d_generator.py +++ b/mmdet3d/core/anchor/anchor_3d_generator.py @@ -19,15 +19,21 @@ class Anchor3DRangeGenerator(object): ranges (list[list[float]]): Ranges of different anchors. The ranges are the same across different feature levels. But may vary for different anchor sizes if size_per_range is True. - sizes (list[list[float]]): 3D sizes of anchors. - scales (list[int]): Scales of anchors in different feature levels. - rotations (list[float]): Rotations of anchors in a feature grid. - custom_values (tuple[float]): Customized values of that anchor. For - example, in nuScenes the anchors have velocities. - reshape_out (bool): Whether to reshape the output into (N x 4). - size_per_range: Whether to use separate ranges for different sizes. - If size_per_range is True, the ranges should have the same length - as the sizes, if not, it will be duplicated. + sizes (list[list[float]], optional): 3D sizes of anchors. + Defaults to [[3.9, 1.6, 1.56]]. + scales (list[int], optional): Scales of anchors in different feature + levels. Defaults to [1]. + rotations (list[float], optional): Rotations of anchors in a feature + grid. Defaults to [0, 1.5707963]. + custom_values (tuple[float], optional): Customized values of that + anchor. For example, in nuScenes the anchors have velocities. + Defaults to (). + reshape_out (bool, optional): Whether to reshape the output into + (N x 4). Defaults to True. + size_per_range (bool, optional): Whether to use separate ranges for + different sizes. If size_per_range is True, the ranges should have + the same length as the sizes, if not, it will be duplicated. + Defaults to True. """ def __init__(self, @@ -86,13 +92,14 @@ def grid_anchors(self, featmap_sizes, device='cuda'): Args: featmap_sizes (list[tuple]): List of feature map sizes in multiple feature levels. - device (str): Device where the anchors will be put on. + device (str, optional): Device where the anchors will be put on. + Defaults to 'cuda'. Returns: - list[torch.Tensor]: Anchors in multiple feature levels. \ - The sizes of each tensor should be [N, 4], where \ - N = width * height * num_base_anchors, width and height \ - are the sizes of the corresponding feature lavel, \ + list[torch.Tensor]: Anchors in multiple feature levels. + The sizes of each tensor should be [N, 4], where + N = width * height * num_base_anchors, width and height + are the sizes of the corresponding feature lavel, num_base_anchors is the number of anchors for that level. """ assert self.num_levels == len(featmap_sizes) @@ -161,14 +168,18 @@ def anchors_single_range(self, shape [6]. The order is consistent with that of anchors, i.e., (x_min, y_min, z_min, x_max, y_max, z_max). scale (float | int, optional): The scale factor of anchors. - sizes (list[list] | np.ndarray | torch.Tensor): Anchor size with - shape [N, 3], in order of x, y, z. - rotations (list[float] | np.ndarray | torch.Tensor): Rotations of - anchors in a single feature grid. + Defaults to 1. + sizes (list[list] | np.ndarray | torch.Tensor, optional): + Anchor size with shape [N, 3], in order of x, y, z. + Defaults to [[3.9, 1.6, 1.56]]. + rotations (list[float] | np.ndarray | torch.Tensor, optional): + Rotations of anchors in a single feature grid. + Defaults to [0, 1.5707963]. device (str): Devices that the anchors will be put on. + Defaults to 'cuda'. Returns: - torch.Tensor: Anchors with shape \ + torch.Tensor: Anchors with shape [*feature_size, num_sizes, num_rots, 7]. """ if len(feature_size) == 2: @@ -231,10 +242,10 @@ class AlignedAnchor3DRangeGenerator(Anchor3DRangeGenerator): up corner to distribute anchors. Args: - anchor_corner (bool): Whether to align with the corner of the voxel - grid. By default it is False and the anchor's center will be + anchor_corner (bool, optional): Whether to align with the corner of the + voxel grid. By default it is False and the anchor's center will be the same as the corresponding voxel's center, which is also the - center of the corresponding greature grid. + center of the corresponding greature grid. Defaults to False. """ def __init__(self, align_corner=False, **kwargs): @@ -256,15 +267,18 @@ def anchors_single_range(self, anchor_range (torch.Tensor | list[float]): Range of anchors with shape [6]. The order is consistent with that of anchors, i.e., (x_min, y_min, z_min, x_max, y_max, z_max). - scale (float | int, optional): The scale factor of anchors. - sizes (list[list] | np.ndarray | torch.Tensor): Anchor size with - shape [N, 3], in order of x, y, z. - rotations (list[float] | np.ndarray | torch.Tensor): Rotations of - anchors in a single feature grid. - device (str): Devices that the anchors will be put on. + scale (float | int): The scale factor of anchors. + sizes (list[list] | np.ndarray | torch.Tensor, optional): + Anchor size with shape [N, 3], in order of x, y, z. + Defaults to [[3.9, 1.6, 1.56]]. + rotations (list[float] | np.ndarray | torch.Tensor, optional): + Rotations of anchors in a single feature grid. + Defaults to [0, 1.5707963]. + device (str, optional): Devices that the anchors will be put on. + Defaults to 'cuda'. Returns: - torch.Tensor: Anchors with shape \ + torch.Tensor: Anchors with shape [*feature_size, num_sizes, num_rots, 7]. """ if len(feature_size) == 2: @@ -334,7 +348,7 @@ class AlignedAnchor3DRangeGeneratorPerCls(AlignedAnchor3DRangeGenerator): Note that feature maps of different classes may be different. Args: - kwargs (dict): Arguments are the same as those in \ + kwargs (dict): Arguments are the same as those in :class:`AlignedAnchor3DRangeGenerator`. """ @@ -347,15 +361,16 @@ def grid_anchors(self, featmap_sizes, device='cuda'): """Generate grid anchors in multiple feature levels. Args: - featmap_sizes (list[tuple]): List of feature map sizes for \ + featmap_sizes (list[tuple]): List of feature map sizes for different classes in a single feature level. - device (str): Device where the anchors will be put on. + device (str, optional): Device where the anchors will be put on. + Defaults to 'cuda'. Returns: - list[list[torch.Tensor]]: Anchors in multiple feature levels. \ - Note that in this anchor generator, we currently only \ - support single feature level. The sizes of each tensor \ - should be [num_sizes/ranges*num_rots*featmap_size, \ + list[list[torch.Tensor]]: Anchors in multiple feature levels. + Note that in this anchor generator, we currently only + support single feature level. The sizes of each tensor + should be [num_sizes/ranges*num_rots*featmap_size, box_code_size]. """ multi_level_anchors = [] @@ -371,7 +386,7 @@ def multi_cls_grid_anchors(self, featmap_sizes, scale, device='cuda'): This function is usually called by method ``self.grid_anchors``. Args: - featmap_sizes (list[tuple]): List of feature map sizes for \ + featmap_sizes (list[tuple]): List of feature map sizes for different classes in a single feature level. scale (float): Scale factor of the anchors in the current level. device (str, optional): Device the tensor will be put on. diff --git a/mmdet3d/core/bbox/box_np_ops.py b/mmdet3d/core/bbox/box_np_ops.py index 0a1c062e33..26a43e60ed 100644 --- a/mmdet3d/core/bbox/box_np_ops.py +++ b/mmdet3d/core/bbox/box_np_ops.py @@ -1,6 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. # TODO: clean the functions in this file and move the APIs into box structures # in the future +# NOTICE: All functions in this file are valid for LiDAR or depth boxes only +# if we use default parameters. + import numba import numpy as np @@ -47,13 +50,13 @@ def box_camera_to_lidar(data, r_rect, velo2cam): np.ndarray, shape=[N, 3]: Boxes in lidar coordinate. """ xyz = data[:, 0:3] - dx, dy, dz = data[:, 3:4], data[:, 4:5], data[:, 5:6] + x_size, y_size, z_size = data[:, 3:4], data[:, 4:5], data[:, 5:6] r = data[:, 6:7] xyz_lidar = camera_to_lidar(xyz, r_rect, velo2cam) # yaw and dims also needs to be converted r_new = -r - np.pi / 2 r_new = limit_period(r_new, period=np.pi * 2) - return np.concatenate([xyz_lidar, dx, dz, dy, r_new], axis=1) + return np.concatenate([xyz_lidar, x_size, z_size, y_size, r_new], axis=1) def corners_nd(dims, origin=0.5): @@ -92,7 +95,7 @@ def corners_nd(dims, origin=0.5): def center_to_corner_box2d(centers, dims, angles=None, origin=0.5): """Convert kitti locations, dimensions and angles to corners. - format: center(xy), dims(xy), angles(clockwise when positive) + format: center(xy), dims(xy), angles(counterclockwise when positive) Args: centers (np.ndarray): Locations in kitti label file with shape (N, 2). @@ -187,7 +190,7 @@ def center_to_corner_box3d(centers, np.ndarray: Corners with the shape of (N, 8, 3). """ # 'length' in kitti format is in x axis. - # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(wlh)(lidar) + # yzx(hwl)(kitti label file)<->xyz(lhw)(camera)<->z(-x)(-y)(lwh)(lidar) # center in kitti format is [0.5, 1.0, 0.5] in xyz. corners = corners_nd(dims, origin=origin) # corners: [N, 8, 3] @@ -348,7 +351,10 @@ def corner_to_surfaces_3d(corners): def points_in_rbbox(points, rbbox, z_axis=2, origin=(0.5, 0.5, 0)): - """Check points in rotated bbox and return indicces. + """Check points in rotated bbox and return indices. + + Note: + This function is for counterclockwise boxes. Args: points (np.ndarray, shape=[N, 3+dim]): Points to query. @@ -404,7 +410,7 @@ def create_anchors_3d_range(feature_size, rotations (list[float] | np.ndarray | torch.Tensor, optional): Rotations of anchors in a single feature grid. Defaults to (0, np.pi / 2). - dtype (type, optional): Data type. Default to np.float32. + dtype (type, optional): Data type. Defaults to np.float32. Returns: np.ndarray: Range based anchors with shape of @@ -478,6 +484,9 @@ def iou_jit(boxes, query_boxes, mode='iou', eps=0.0): """Calculate box iou. Note that jit version runs ~10x faster than the box_overlaps function in mmdet3d.core.evaluation. + Note: + This function is for counterclockwise boxes. + Args: boxes (np.ndarray): Input bounding boxes with shape of (N, 4). query_boxes (np.ndarray): Query boxes with shape of (K, 4). @@ -515,7 +524,10 @@ def iou_jit(boxes, query_boxes, mode='iou', eps=0.0): def projection_matrix_to_CRT_kitti(proj): - """Split projection matrix of kitti. + """Split projection matrix of KITTI. + + Note: + This function is for KITTI only. P = C @ [R|T] C is upper triangular matrix, so we need to inverse CR and use QR @@ -541,6 +553,9 @@ def projection_matrix_to_CRT_kitti(proj): def remove_outside_points(points, rect, Trv2c, P2, image_shape): """Remove points which are outside of image. + Note: + This function is for KITTI only. + Args: points (np.ndarray, shape=[N, 3+dims]): Total points. rect (np.ndarray, shape=[4, 4]): Matrix to project points in @@ -691,7 +706,7 @@ def points_in_convex_polygon_3d_jit(points, @numba.jit -def points_in_convex_polygon_jit(points, polygon, clockwise=True): +def points_in_convex_polygon_jit(points, polygon, clockwise=False): """Check points is in 2d convex polygons. True when point in polygon. Args: @@ -747,10 +762,13 @@ def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True): |/ |/ 2 -------- 1 + Note: + This function is for LiDAR boxes only. + Args: boxes3d (np.ndarray): Boxes with shape of (N, 7) - [x, y, z, dx, dy, dz, ry] in LiDAR coords, see the definition of - ry in KITTI dataset. + [x, y, z, x_size, y_size, z_size, ry] in LiDAR coords, + see the definition of ry in KITTI dataset. bottom_center (bool, optional): Whether z is on the bottom center of object. Defaults to True. @@ -758,25 +776,25 @@ def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True): np.ndarray: Box corners with the shape of [N, 8, 3]. """ boxes_num = boxes3d.shape[0] - dx, dy, dz = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5] + x_size, y_size, z_size = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5] x_corners = np.array([ - dx / 2., -dx / 2., -dx / 2., dx / 2., dx / 2., -dx / 2., -dx / 2., - dx / 2. + x_size / 2., -x_size / 2., -x_size / 2., x_size / 2., x_size / 2., + -x_size / 2., -x_size / 2., x_size / 2. ], dtype=np.float32).T y_corners = np.array([ - -dy / 2., -dy / 2., dy / 2., dy / 2., -dy / 2., -dy / 2., dy / 2., - dy / 2. + -y_size / 2., -y_size / 2., y_size / 2., y_size / 2., -y_size / 2., + -y_size / 2., y_size / 2., y_size / 2. ], dtype=np.float32).T if bottom_center: z_corners = np.zeros((boxes_num, 8), dtype=np.float32) - z_corners[:, 4:8] = dz.reshape(boxes_num, 1).repeat( + z_corners[:, 4:8] = z_size.reshape(boxes_num, 1).repeat( 4, axis=1) # (N, 8) else: z_corners = np.array([ - -dz / 2., -dz / 2., -dz / 2., -dz / 2., dz / 2., dz / 2., dz / 2., - dz / 2. + -z_size / 2., -z_size / 2., -z_size / 2., -z_size / 2., + z_size / 2., z_size / 2., z_size / 2., z_size / 2. ], dtype=np.float32).T diff --git a/mmdet3d/core/bbox/coders/anchor_free_bbox_coder.py b/mmdet3d/core/bbox/coders/anchor_free_bbox_coder.py index 812cae8573..d64f38b5c9 100644 --- a/mmdet3d/core/bbox/coders/anchor_free_bbox_coder.py +++ b/mmdet3d/core/bbox/coders/anchor_free_bbox_coder.py @@ -25,7 +25,7 @@ def encode(self, gt_bboxes_3d, gt_labels_3d): """Encode ground truth to prediction targets. Args: - gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes \ + gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes with shape (n, 7). gt_labels_3d (torch.Tensor): Ground truth classes. diff --git a/mmdet3d/core/bbox/coders/centerpoint_bbox_coders.py b/mmdet3d/core/bbox/coders/centerpoint_bbox_coders.py index 2af76ca65a..6d43a63d4b 100644 --- a/mmdet3d/core/bbox/coders/centerpoint_bbox_coders.py +++ b/mmdet3d/core/bbox/coders/centerpoint_bbox_coders.py @@ -13,12 +13,12 @@ class CenterPointBBoxCoder(BaseBBoxCoder): pc_range (list[float]): Range of point cloud. out_size_factor (int): Downsample factor of the model. voxel_size (list[float]): Size of voxel. - post_center_range (list[float]): Limit of the center. + post_center_range (list[float], optional): Limit of the center. Default: None. - max_num (int): Max number to be kept. Default: 100. - score_threshold (float): Threshold to filter boxes based on score. - Default: None. - code_size (int): Code size of bboxes. Default: 9 + max_num (int, optional): Max number to be kept. Default: 100. + score_threshold (float, optional): Threshold to filter boxes + based on score. Default: None. + code_size (int, optional): Code size of bboxes. Default: 9 """ def __init__(self, @@ -45,7 +45,8 @@ def _gather_feat(self, feats, inds, feat_masks=None): feats (torch.Tensor): Features to be transposed and gathered with the shape of [B, 2, W, H]. inds (torch.Tensor): Indexes with the shape of [B, N]. - feat_masks (torch.Tensor): Mask of the feats. Default: None. + feat_masks (torch.Tensor, optional): Mask of the feats. + Default: None. Returns: torch.Tensor: Gathered feats. @@ -64,7 +65,7 @@ def _topk(self, scores, K=80): Args: scores (torch.Tensor): scores with the shape of [B, N, W, H]. - K (int): Number to be kept. Defaults to 80. + K (int, optional): Number to be kept. Defaults to 80. Returns: tuple[torch.Tensor] @@ -135,9 +136,9 @@ def decode(self, dim (torch.Tensor): Dim of the boxes with the shape of [B, 1, W, H]. vel (torch.Tensor): Velocity with the shape of [B, 1, W, H]. - reg (torch.Tensor): Regression value of the boxes in 2D with - the shape of [B, 2, W, H]. Default: None. - task_id (int): Index of task. Default: -1. + reg (torch.Tensor, optional): Regression value of the boxes in + 2D with the shape of [B, 2, W, H]. Default: None. + task_id (int, optional): Index of task. Default: -1. Returns: list[dict]: Decoded boxes. diff --git a/mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py b/mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py index 1fe491a3ab..931e839872 100644 --- a/mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py +++ b/mmdet3d/core/bbox/coders/delta_xyzwhlr_bbox_coder.py @@ -19,9 +19,9 @@ def __init__(self, code_size=7): @staticmethod def encode(src_boxes, dst_boxes): - """Get box regression transformation deltas (dx, dy, dz, dw, dh, dl, - dr, dv*) that can be used to transform the `src_boxes` into the - `target_boxes`. + """Get box regression transformation deltas (dx, dy, dz, dx_size, + dy_size, dz_size, dr, dv*) that can be used to transform the + `src_boxes` into the `target_boxes`. Args: src_boxes (torch.Tensor): source boxes, e.g., object proposals. @@ -56,13 +56,13 @@ def encode(src_boxes, dst_boxes): @staticmethod def decode(anchors, deltas): - """Apply transformation `deltas` (dx, dy, dz, dw, dh, dl, dr, dv*) to - `boxes`. + """Apply transformation `deltas` (dx, dy, dz, dx_size, dy_size, + dz_size, dr, dv*) to `boxes`. Args: anchors (torch.Tensor): Parameters of anchors with shape (N, 7). deltas (torch.Tensor): Encoded boxes with shape - (N, 7+n) [x, y, z, w, l, h, r, velo*]. + (N, 7+n) [x, y, z, x_size, y_size, z_size, r, velo*]. Returns: torch.Tensor: Decoded boxes. diff --git a/mmdet3d/core/bbox/coders/groupfree3d_bbox_coder.py b/mmdet3d/core/bbox/coders/groupfree3d_bbox_coder.py index e20de72fb1..08d83e92c7 100644 --- a/mmdet3d/core/bbox/coders/groupfree3d_bbox_coder.py +++ b/mmdet3d/core/bbox/coders/groupfree3d_bbox_coder.py @@ -14,9 +14,10 @@ class GroupFree3DBBoxCoder(PartialBinBasedBBoxCoder): num_dir_bins (int): Number of bins to encode direction angle. num_sizes (int): Number of size clusters. mean_sizes (list[list[int]]): Mean size of bboxes in each class. - with_rot (bool): Whether the bbox is with rotation. Defaults to True. - size_cls_agnostic (bool): Whether the predicted size is class-agnostic. + with_rot (bool, optional): Whether the bbox is with rotation. Defaults to True. + size_cls_agnostic (bool, optional): Whether the predicted size is + class-agnostic. Defaults to True. """ def __init__(self, @@ -36,7 +37,7 @@ def encode(self, gt_bboxes_3d, gt_labels_3d): """Encode ground truth to prediction targets. Args: - gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes \ + gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes with shape (n, 7). gt_labels_3d (torch.Tensor): Ground truth classes. @@ -76,7 +77,7 @@ def decode(self, bbox_out, prefix=''): - size_class: predicted bbox size class. - size_res: predicted bbox size residual. - size: predicted class-agnostic bbox size - prefix (str): Decode predictions with specific prefix. + prefix (str, optional): Decode predictions with specific prefix. Defaults to ''. Returns: @@ -122,7 +123,7 @@ def split_pred(self, cls_preds, reg_preds, base_xyz, prefix=''): cls_preds (torch.Tensor): Class predicted features to split. reg_preds (torch.Tensor): Regression predicted features to split. base_xyz (torch.Tensor): Coordinates of points. - prefix (str): Decode predictions with specific prefix. + prefix (str, optional): Decode predictions with specific prefix. Defaults to ''. Returns: diff --git a/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py b/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py index 9c22f2f778..ed8020d70d 100644 --- a/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py +++ b/mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py @@ -29,7 +29,7 @@ def encode(self, gt_bboxes_3d, gt_labels_3d): """Encode ground truth to prediction targets. Args: - gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes \ + gt_bboxes_3d (BaseInstance3DBoxes): Ground truth bboxes with shape (n, 7). gt_labels_3d (torch.Tensor): Ground truth classes. diff --git a/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py b/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py index 157f6673cf..4afdabf2d6 100644 --- a/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py +++ b/mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py @@ -32,16 +32,16 @@ def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False): Args: bboxes1 (torch.Tensor): shape (N, 7+N) - [x, y, z, dx, dy, dz, ry, v]. + [x, y, z, x_size, y_size, z_size, ry, v]. bboxes2 (torch.Tensor): shape (M, 7+N) - [x, y, z, dx, dy, dz, ry, v]. + [x, y, z, x_size, y_size, z_size, ry, v]. mode (str): "iou" (intersection over union) or iof (intersection over foreground). is_aligned (bool): Whether the calculation is aligned. Return: - torch.Tensor: If ``is_aligned`` is ``True``, return ious between \ - bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is \ + torch.Tensor: If ``is_aligned`` is ``True``, return ious between + bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is ``False``, return shape is M. """ return bbox_overlaps_nearest_3d(bboxes1, bboxes2, mode, is_aligned, @@ -76,13 +76,15 @@ def __call__(self, bboxes1, bboxes2, mode='iou'): calculate the actual 3D IoUs of boxes. Args: - bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, dx, dy, dz, ry]. - bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, dx, dy, dz, ry]. + bboxes1 (torch.Tensor): with shape (N, 7+C), + (x, y, z, x_size, y_size, z_size, ry, v*). + bboxes2 (torch.Tensor): with shape (M, 7+C), + (x, y, z, x_size, y_size, z_size, ry, v*). mode (str): "iou" (intersection over union) or iof (intersection over foreground). Return: - torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2 \ + torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2 with shape (M, N) (aligned mode is not supported currently). """ return bbox_overlaps_3d(bboxes1, bboxes2, mode, self.coordinate) @@ -112,15 +114,17 @@ def bbox_overlaps_nearest_3d(bboxes1, aligned pair of bboxes1 and bboxes2. Args: - bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, dx, dy, dz, ry, v]. - bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, dx, dy, dz, ry, v]. + bboxes1 (torch.Tensor): with shape (N, 7+C), + (x, y, z, x_size, y_size, z_size, ry, v*). + bboxes2 (torch.Tensor): with shape (M, 7+C), + (x, y, z, x_size, y_size, z_size, ry, v*). mode (str): "iou" (intersection over union) or iof (intersection over foreground). is_aligned (bool): Whether the calculation is aligned Return: - torch.Tensor: If ``is_aligned`` is ``True``, return ious between \ - bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is \ + torch.Tensor: If ``is_aligned`` is ``True``, return ious between + bboxes1 and bboxes2 with shape (M, N). If ``is_aligned`` is ``False``, return shape is M. """ assert bboxes1.size(-1) == bboxes2.size(-1) >= 7 @@ -150,14 +154,16 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'): calculate the actual IoUs of boxes. Args: - bboxes1 (torch.Tensor): shape (N, 7+C) [x, y, z, dx, dy, dz, ry]. - bboxes2 (torch.Tensor): shape (M, 7+C) [x, y, z, dx, dy, dz, ry]. + bboxes1 (torch.Tensor): with shape (N, 7+C), + (x, y, z, x_size, y_size, z_size, ry, v*). + bboxes2 (torch.Tensor): with shape (M, 7+C), + (x, y, z, x_size, y_size, z_size, ry, v*). mode (str): "iou" (intersection over union) or iof (intersection over foreground). coordinate (str): 'camera' or 'lidar' coordinate system. Return: - torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2 \ + torch.Tensor: Bbox overlaps results of bboxes1 and bboxes2 with shape (M, N) (aligned mode is not supported currently). """ assert bboxes1.size(-1) == bboxes2.size(-1) >= 7 @@ -187,7 +193,7 @@ def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False): mode (str): "iou" (intersection over union) or "giou" (generalized intersection over union). is_aligned (bool, optional): If True, then m and n must be equal. - Default False. + Defaults to False. Returns: Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,) """ @@ -221,9 +227,9 @@ def axis_aligned_bbox_overlaps_3d(bboxes1, mode (str): "iou" (intersection over union) or "giou" (generalized intersection over union). is_aligned (bool, optional): If True, then m and n must be equal. - Default False. + Defaults to False. eps (float, optional): A value added to the denominator for numerical - stability. Default 1e-6. + stability. Defaults to 1e-6. Returns: Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,) diff --git a/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py b/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py index 398c31bf3b..c0de845f68 100644 --- a/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py +++ b/mmdet3d/core/bbox/samplers/iou_neg_piecewise_sampler.py @@ -139,7 +139,7 @@ def sample(self, assign_result (:obj:`AssignResult`): Bbox assigning results. bboxes (torch.Tensor): Boxes to be sampled from. gt_bboxes (torch.Tensor): Ground truth bboxes. - gt_labels (torch.Tensor, optional): Class labels of ground truth \ + gt_labels (torch.Tensor, optional): Class labels of ground truth bboxes. Returns: diff --git a/mmdet3d/core/bbox/structures/base_box3d.py b/mmdet3d/core/bbox/structures/base_box3d.py index ebf3b324f6..ec182216b5 100644 --- a/mmdet3d/core/bbox/structures/base_box3d.py +++ b/mmdet3d/core/bbox/structures/base_box3d.py @@ -1,9 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. import numpy as np import torch +import warnings from abc import abstractmethod -from mmdet3d.ops import points_in_boxes_batch, points_in_boxes_gpu +from mmdet3d.ops import points_in_boxes_all, points_in_boxes_part from mmdet3d.ops.iou3d import iou3d_cuda from .utils import limit_period, xywhr2xyxyr @@ -19,12 +20,12 @@ class BaseInstance3DBoxes(object): tensor (torch.Tensor | np.ndarray | list): a N x box_dim matrix. box_dim (int): Number of the dimension of a box. Each row is (x, y, z, x_size, y_size, z_size, yaw). - Default to 7. + Defaults to 7. with_yaw (bool): Whether the box is with yaw rotation. If False, the value of yaw will be set to 0 as minmax boxes. - Default to True. - origin (tuple[float]): The relative position of origin in the box. - Default to (0.5, 0.5, 0). This will guide the box be converted to + Defaults to True. + origin (tuple[float], optional): Relative position of the box origin. + Defaults to (0.5, 0.5, 0). This will guide the box be converted to (0.5, 0.5, 0) mode. Attributes: @@ -73,27 +74,29 @@ def volume(self): @property def dims(self): - """torch.Tensor: Corners of each box with size (N, 8, 3).""" + """torch.Tensor: Size dimensions of each box in shape (N, 3).""" return self.tensor[:, 3:6] @property def yaw(self): - """torch.Tensor: A vector with yaw of each box.""" + """torch.Tensor: A vector with yaw of each box in shape (N, ).""" return self.tensor[:, 6] @property def height(self): - """torch.Tensor: A vector with height of each box.""" + """torch.Tensor: A vector with height of each box in shape (N, ).""" return self.tensor[:, 5] @property def top_height(self): - """torch.Tensor: A vector with the top height of each box.""" + """torch.Tensor: + A vector with the top height of each box in shape (N, ).""" return self.bottom_height + self.height @property def bottom_height(self): - """torch.Tensor: A vector with bottom's height of each box.""" + """torch.Tensor: + A vector with bottom's height of each box in shape (N, ).""" return self.tensor[:, 2] @property @@ -101,35 +104,84 @@ def center(self): """Calculate the center of all the boxes. Note: - In the MMDetection3D's convention, the bottom center is + In MMDetection3D's convention, the bottom center is usually taken as the default center. The relative position of the centers in different kinds of boxes are different, e.g., the relative center of a boxes is (0.5, 1.0, 0.5) in camera and (0.5, 0.5, 0) in lidar. It is recommended to use ``bottom_center`` or ``gravity_center`` - for more clear usage. + for clearer usage. Returns: - torch.Tensor: A tensor with center of each box. + torch.Tensor: A tensor with center of each box in shape (N, 3). """ return self.bottom_center @property def bottom_center(self): - """torch.Tensor: A tensor with center of each box.""" + """torch.Tensor: A tensor with center of each box in shape (N, 3).""" return self.tensor[:, :3] @property def gravity_center(self): - """torch.Tensor: A tensor with center of each box.""" + """torch.Tensor: A tensor with center of each box in shape (N, 3).""" pass @property def corners(self): - """torch.Tensor: a tensor with 8 corners of each box.""" + """torch.Tensor: + a tensor with 8 corners of each box in shape (N, 8, 3).""" pass + @property + def bev(self): + """torch.Tensor: 2D BEV box of each box with rotation + in XYWHR format, in shape (N, 5).""" + return self.tensor[:, [0, 1, 3, 4, 6]] + + @property + def nearest_bev(self): + """torch.Tensor: A tensor of 2D BEV box of each box + without rotation.""" + # Obtain BEV boxes with rotation in XYWHR format + bev_rotated_boxes = self.bev + # convert the rotation to a valid range + rotations = bev_rotated_boxes[:, -1] + normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi)) + + # find the center of boxes + conditions = (normed_rotations > np.pi / 4)[..., None] + bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:, + [0, 1, 3, 2]], + bev_rotated_boxes[:, :4]) + + centers = bboxes_xywh[:, :2] + dims = bboxes_xywh[:, 2:] + bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1) + return bev_boxes + + def in_range_bev(self, box_range): + """Check whether the boxes are in the given range. + + Args: + box_range (list | torch.Tensor): the range of box + (x_min, y_min, x_max, y_max) + + Note: + The original implementation of SECOND checks whether boxes in + a range by checking whether the points are in a convex + polygon, we reduce the burden for simpler cases. + + Returns: + torch.Tensor: Whether each box is inside the reference range. + """ + in_range_flags = ((self.bev[:, 0] > box_range[0]) + & (self.bev[:, 1] > box_range[1]) + & (self.bev[:, 0] < box_range[2]) + & (self.bev[:, 1] < box_range[3])) + return in_range_flags + @abstractmethod def rotate(self, angle, points=None): """Rotate boxes with points (optional) with the given angle or rotation @@ -138,21 +190,28 @@ def rotate(self, angle, points=None): Args: angle (float | torch.Tensor | np.ndarray): Rotation angle or rotation matrix. - points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional): + points (torch.Tensor | numpy.ndarray | + :obj:`BasePoints`, optional): Points to rotate. Defaults to None. """ pass @abstractmethod def flip(self, bev_direction='horizontal'): - """Flip the boxes in BEV along given BEV direction.""" + """Flip the boxes in BEV along given BEV direction. + + Args: + bev_direction (str, optional): Direction by which to flip. + Can be chosen from 'horizontal' and 'vertical'. + Defaults to 'horizontal'. + """ pass def translate(self, trans_vector): """Translate boxes with the given translation vector. Args: - trans_vector (torch.Tensor): Translation vector of size 1x3. + trans_vector (torch.Tensor): Translation vector of size (1, 3). """ if not isinstance(trans_vector, torch.Tensor): trans_vector = self.tensor.new_tensor(trans_vector) @@ -182,28 +241,15 @@ def in_range_3d(self, box_range): & (self.tensor[:, 2] < box_range[5])) return in_range_flags - @abstractmethod - def in_range_bev(self, box_range): - """Check whether the boxes are in the given range. - - Args: - box_range (list | torch.Tensor): The range of box - in order of (x_min, y_min, x_max, y_max). - - Returns: - torch.Tensor: Indicating whether each box is inside - the reference range. - """ - pass - @abstractmethod def convert_to(self, dst, rt_mat=None): """Convert self to ``dst`` mode. Args: dst (:obj:`Box3DMode`): The target Box mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. @@ -221,25 +267,26 @@ def scale(self, scale_factor): scale_factors (float): Scale factors to scale the boxes. """ self.tensor[:, :6] *= scale_factor - self.tensor[:, 7:] *= scale_factor + self.tensor[:, 7:] *= scale_factor # velocity def limit_yaw(self, offset=0.5, period=np.pi): """Limit the yaw to a given period and offset. Args: - offset (float): The offset of the yaw. - period (float): The expected period. + offset (float, optional): The offset of the yaw. Defaults to 0.5. + period (float, optional): The expected period. Defaults to np.pi. """ self.tensor[:, 6] = limit_period(self.tensor[:, 6], offset, period) - def nonempty(self, threshold: float = 0.0): + def nonempty(self, threshold=0.0): """Find boxes that are non-empty. A box is considered empty, if either of its side is no larger than threshold. Args: - threshold (float): The threshold of minimal sizes. + threshold (float, optional): The threshold of minimal sizes. + Defaults to 0.0. Returns: torch.Tensor: A binary vector which represents whether each @@ -364,7 +411,7 @@ def height_overlaps(cls, boxes1, boxes2, mode='iou'): Args: boxes1 (:obj:`BaseInstance3DBoxes`): Boxes 1 contain N boxes. boxes2 (:obj:`BaseInstance3DBoxes`): Boxes 2 contain M boxes. - mode (str, optional): Mode of iou calculation. Defaults to 'iou'. + mode (str, optional): Mode of IoU calculation. Defaults to 'iou'. Returns: torch.Tensor: Calculated iou of boxes. @@ -461,34 +508,49 @@ def new_box(self, data): return original_type( new_tensor, box_dim=self.box_dim, with_yaw=self.with_yaw) - def points_in_boxes(self, points, boxes_override=None): - """Find the box which the points are in. + def points_in_boxes_part(self, points, boxes_override=None): + """Find the box in which each point is. Args: - points (torch.Tensor): Points in shape (N, 3). + points (torch.Tensor): Points in shape (1, M, 3) or (M, 3), + 3 dimensions are (x, y, z) in LiDAR or depth coordinate. + boxes_override (torch.Tensor, optional): Boxes to override + `self.tensor`. Defaults to None. Returns: - torch.Tensor: The index of box where each point are in. + torch.Tensor: The index of the first box that each point + is in, in shape (M, ). Default value is -1 + (if the point is not enclosed by any box). + + Note: + If a point is enclosed by multiple boxes, the index of the + first box will be returned. """ if boxes_override is not None: boxes = boxes_override else: boxes = self.tensor - box_idx = points_in_boxes_gpu( - points.unsqueeze(0), - boxes.unsqueeze(0).to(points.device)).squeeze(0) + if points.dim() == 2: + points = points.unsqueeze(0) + box_idx = points_in_boxes_part(points, + boxes.unsqueeze(0).to( + points.device)).squeeze(0) return box_idx - def points_in_boxes_batch(self, points, boxes_override=None): - """Find points that are in boxes (CUDA). + def points_in_boxes_all(self, points, boxes_override=None): + """Find all boxes in which each point is. Args: - points (torch.Tensor): Points in shape [1, M, 3] or [M, 3], - 3 dimensions are [x, y, z] in LiDAR coordinate. + points (torch.Tensor): Points in shape (1, M, 3) or (M, 3), + 3 dimensions are (x, y, z) in LiDAR or depth coordinate. + boxes_override (torch.Tensor, optional): Boxes to override + `self.tensor`. Defaults to None. Returns: - torch.Tensor: The index of boxes each point lies in with shape - of (B, M, T). + torch.Tensor: A tensor indicating whether a point is in a box, + in shape (M, T). T is the number of boxes. Denote this + tensor as A, if the m^th point is in the t^th box, then + `A[m, t] == 1`, elsewise `A[m, t] == 0`. """ if boxes_override is not None: boxes = boxes_override @@ -502,6 +564,18 @@ def points_in_boxes_batch(self, points, boxes_override=None): assert points_clone.dim() == 3 and points_clone.shape[0] == 1 boxes = boxes.to(points_clone.device).unsqueeze(0) - box_idxs_of_pts = points_in_boxes_batch(points_clone, boxes) + box_idxs_of_pts = points_in_boxes_all(points_clone, boxes) return box_idxs_of_pts.squeeze(0) + + def points_in_boxes(self, points, boxes_override=None): + warnings.warn('DeprecationWarning: points_in_boxes is a ' + 'deprecated method, please consider using ' + 'points_in_boxes_part.') + return self.points_in_boxes_part(points, boxes_override) + + def points_in_boxes_batch(self, points, boxes_override=None): + warnings.warn('DeprecationWarning: points_in_boxes_batch is a ' + 'deprecated method, please consider using ' + 'points_in_boxes_all.') + return self.points_in_boxes_all(points, boxes_override) diff --git a/mmdet3d/core/bbox/structures/box_3d_mode.py b/mmdet3d/core/bbox/structures/box_3d_mode.py index f8a131b0c4..72ecc360b1 100644 --- a/mmdet3d/core/bbox/structures/box_3d_mode.py +++ b/mmdet3d/core/bbox/structures/box_3d_mode.py @@ -71,12 +71,13 @@ def convert(box, src, dst, rt_mat=None, with_yaw=True): Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7. src (:obj:`Box3DMode`): The src Box mode. dst (:obj:`Box3DMode`): The target Box mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. - with_yaw (bool): If `box` is an instance of + with_yaw (bool, optional): If `box` is an instance of :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle. Defaults to True. @@ -128,13 +129,13 @@ def convert(box, src, dst, rt_mat=None, with_yaw=True): yaw = limit_period(yaw, period=np.pi * 2) elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM: if rt_mat is None: - rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]]) + rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) if with_yaw: yaw = -yaw elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH: if rt_mat is None: - rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) + rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]]) xyz_size = torch.cat([x_size, z_size, y_size], dim=-1) if with_yaw: yaw = -yaw diff --git a/mmdet3d/core/bbox/structures/cam_box3d.py b/mmdet3d/core/bbox/structures/cam_box3d.py index 5b286ca332..dd2a8eed19 100644 --- a/mmdet3d/core/bbox/structures/cam_box3d.py +++ b/mmdet3d/core/bbox/structures/cam_box3d.py @@ -4,7 +4,7 @@ from ...points import BasePoints from .base_box3d import BaseInstance3DBoxes -from .utils import limit_period, rotation_3d_in_axis +from .utils import rotation_3d_in_axis class CameraInstance3DBoxes(BaseInstance3DBoxes): @@ -28,15 +28,12 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes): The yaw is 0 at the positive direction of x axis, and decreases from the positive direction of x to the positive direction of z. - A refactor is ongoing to make the three coordinate systems - easier to understand and convert between each other. - Attributes: - tensor (torch.Tensor): Float matrix of N x box_dim. - box_dim (int): Integer indicates the dimension of a box + tensor (torch.Tensor): Float matrix in shape (N, box_dim). + box_dim (int): Integer indicating the dimension of a box Each row is (x, y, z, x_size, y_size, z_size, yaw, ...). - with_yaw (bool): If True, the value of yaw will be set to 0 as minmax - boxes. + with_yaw (bool): If True, the value of yaw will be set to 0 as + axis-aligned boxes tightly enclosing the original boxes. """ YAW_AXIS = 1 @@ -77,23 +74,25 @@ def __init__(self, @property def height(self): - """torch.Tensor: A vector with height of each box.""" + """torch.Tensor: A vector with height of each box in shape (N, ).""" return self.tensor[:, 4] @property def top_height(self): - """torch.Tensor: A vector with the top height of each box.""" + """torch.Tensor: + A vector with the top height of each box in shape (N, ).""" # the positive direction is down rather than up return self.bottom_height - self.height @property def bottom_height(self): - """torch.Tensor: A vector with bottom's height of each box.""" + """torch.Tensor: + A vector with bottom's height of each box in shape (N, ).""" return self.tensor[:, 1] @property def gravity_center(self): - """torch.Tensor: A tensor with center of each box.""" + """torch.Tensor: A tensor with center of each box in shape (N, 3).""" bottom_center = self.bottom_center gravity_center = torch.zeros_like(bottom_center) gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]] @@ -148,8 +147,8 @@ def corners(self): @property def bev(self): - """torch.Tensor: A n x 5 tensor of 2D BEV box of each box - with rotation in XYWHR format.""" + """torch.Tensor: 2D BEV box of each box with rotation + in XYWHR format, in shape (N, 5).""" bev = self.tensor[:, [0, 2, 3, 5, 6]].clone() # positive direction of the gravity axis # in cam coord system points to the earth @@ -157,27 +156,6 @@ def bev(self): bev[:, -1] = -bev[:, -1] return bev - @property - def nearest_bev(self): - """torch.Tensor: A tensor of 2D BEV box of each box - without rotation.""" - # Obtain BEV boxes with rotation in XZWHR format - bev_rotated_boxes = self.bev - # convert the rotation to a valid range - rotations = bev_rotated_boxes[:, -1] - normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi)) - - # find the center of boxes - conditions = (normed_rotations > np.pi / 4)[..., None] - bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:, - [0, 1, 3, 2]], - bev_rotated_boxes[:, :4]) - - centers = bboxes_xywh[:, :2] - dims = bboxes_xywh[:, 2:] - bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1) - return bev_boxes - def rotate(self, angle, points=None): """Rotate boxes with points (optional) with the given angle or rotation matrix. @@ -185,7 +163,7 @@ def rotate(self, angle, points=None): Args: angle (float | torch.Tensor | np.ndarray): Rotation angle or rotation matrix. - points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional): + points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional): Points to rotate. Defaults to None. Returns: @@ -237,7 +215,7 @@ def flip(self, bev_direction='horizontal', points=None): Args: bev_direction (str): Flip direction (horizontal or vertical). - points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, None): + points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional): Points to flip. Defaults to None. Returns: @@ -264,28 +242,6 @@ def flip(self, bev_direction='horizontal', points=None): points.flip(bev_direction) return points - def in_range_bev(self, box_range): - """Check whether the boxes are in the given range. - - Args: - box_range (list | torch.Tensor): The range of box - (x_min, z_min, x_max, z_max). - - Note: - The original implementation of SECOND checks whether boxes in - a range by checking whether the points are in a convex - polygon, we reduce the burden for simpler cases. - - Returns: - torch.Tensor: Indicating whether each box is inside - the reference range. - """ - in_range_flags = ((self.tensor[:, 0] > box_range[0]) - & (self.tensor[:, 2] > box_range[1]) - & (self.tensor[:, 0] < box_range[2]) - & (self.tensor[:, 2] < box_range[3])) - return in_range_flags - @classmethod def height_overlaps(cls, boxes1, boxes2, mode='iou'): """Calculate height overlaps of two boxes. @@ -322,8 +278,9 @@ def convert_to(self, dst, rt_mat=None): Args: dst (:obj:`Box3DMode`): The target Box mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from ``src`` coordinates to ``dst`` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. @@ -336,43 +293,55 @@ def convert_to(self, dst, rt_mat=None): return Box3DMode.convert( box=self, src=Box3DMode.CAM, dst=dst, rt_mat=rt_mat) - def points_in_boxes(self, points): - """Find the box which the points are in. + def points_in_boxes_part(self, points, boxes_override=None): + """Find the box in which each point is. Args: - points (torch.Tensor): Points in shape (N, 3). + points (torch.Tensor): Points in shape (1, M, 3) or (M, 3), + 3 dimensions are (x, y, z) in LiDAR or depth coordinate. + boxes_override (torch.Tensor, optional): Boxes to override + `self.tensor `. Defaults to None. Returns: - torch.Tensor: The index of box where each point are in. + torch.Tensor: The index of the box in which + each point is, in shape (M, ). Default value is -1 + (if the point is not enclosed by any box). """ from .coord_3d_mode import Coord3DMode points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM, Coord3DMode.LIDAR) - boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM, - Coord3DMode.LIDAR) + if boxes_override is not None: + boxes_lidar = boxes_override + else: + boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM, + Coord3DMode.LIDAR) - box_idx = super().points_in_boxes(self, points_lidar, boxes_lidar) + box_idx = super().points_in_boxes_part(points_lidar, boxes_lidar) return box_idx - def points_in_boxes_batch(self, points): - """Find points that are in boxes (CUDA). + def points_in_boxes_all(self, points, boxes_override=None): + """Find all boxes in which each point is. Args: - points (torch.Tensor): Points in shape [1, M, 3] or [M, 3], - 3 dimensions are [x, y, z] in LiDAR coordinate. + points (torch.Tensor): Points in shape (1, M, 3) or (M, 3), + 3 dimensions are (x, y, z) in LiDAR or depth coordinate. + boxes_override (torch.Tensor, optional): Boxes to override + `self.tensor `. Defaults to None. Returns: - torch.Tensor: The index of boxes each point lies in with shape - of (B, M, T). + torch.Tensor: The index of all boxes in which each point is, + in shape (B, M, T). """ from .coord_3d_mode import Coord3DMode points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM, Coord3DMode.LIDAR) - boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM, - Coord3DMode.LIDAR) + if boxes_override is not None: + boxes_lidar = boxes_override + else: + boxes_lidar = Coord3DMode.convert(self.tensor, Coord3DMode.CAM, + Coord3DMode.LIDAR) - box_idx = super().points_in_boxes_batch(self, points_lidar, - boxes_lidar) + box_idx = super().points_in_boxes_all(points_lidar, boxes_lidar) return box_idx diff --git a/mmdet3d/core/bbox/structures/coord_3d_mode.py b/mmdet3d/core/bbox/structures/coord_3d_mode.py index c8fdd7ca88..44dba26acb 100644 --- a/mmdet3d/core/bbox/structures/coord_3d_mode.py +++ b/mmdet3d/core/bbox/structures/coord_3d_mode.py @@ -70,8 +70,9 @@ def convert(input, src, dst, rt_mat=None, with_yaw=True, is_point=True): Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7. src (:obj:`Box3DMode` | :obj:`Coord3DMode`): The source mode. dst (:obj:`Box3DMode` | :obj:`Coord3DMode`): The target mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. @@ -113,8 +114,9 @@ def convert_box(box, src, dst, rt_mat=None, with_yaw=True): Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7. src (:obj:`Box3DMode`): The src Box mode. dst (:obj:`Box3DMode`): The target Box mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. @@ -139,8 +141,9 @@ def convert_point(point, src, dst, rt_mat=None): Can be a k-tuple, k-list or an Nxk array/tensor. src (:obj:`CoordMode`): The src Point mode. dst (:obj:`CoordMode`): The target Point mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. diff --git a/mmdet3d/core/bbox/structures/depth_box3d.py b/mmdet3d/core/bbox/structures/depth_box3d.py index da54c8a76e..38c1ec7437 100644 --- a/mmdet3d/core/bbox/structures/depth_box3d.py +++ b/mmdet3d/core/bbox/structures/depth_box3d.py @@ -4,7 +4,7 @@ from mmdet3d.core.points import BasePoints from .base_box3d import BaseInstance3DBoxes -from .utils import limit_period, rotation_3d_in_axis +from .utils import rotation_3d_in_axis class DepthInstance3DBoxes(BaseInstance3DBoxes): @@ -41,7 +41,7 @@ class DepthInstance3DBoxes(BaseInstance3DBoxes): @property def gravity_center(self): - """torch.Tensor: A tensor with center of each box.""" + """torch.Tensor: A tensor with center of each box in shape (N, 3).""" bottom_center = self.bottom_center gravity_center = torch.zeros_like(bottom_center) gravity_center[:, :2] = bottom_center[:, :2] @@ -71,8 +71,6 @@ def corners(self): (x0, y0, z0) + ----------- + --------> right x (x1, y0, z0) """ - # TODO: rotation_3d_in_axis function do not support - # empty tensor currently. assert len(self.tensor) != 0 dims = self.dims corners_norm = torch.from_numpy( @@ -90,33 +88,6 @@ def corners(self): corners += self.tensor[:, :3].view(-1, 1, 3) return corners - @property - def bev(self): - """torch.Tensor: A n x 5 tensor of 2D BEV box of each box - in XYWHR format.""" - return self.tensor[:, [0, 1, 3, 4, 6]] - - @property - def nearest_bev(self): - """torch.Tensor: A tensor of 2D BEV box of each box - without rotation.""" - # Obtain BEV boxes with rotation in XYWHR format - bev_rotated_boxes = self.bev - # convert the rotation to a valid range - rotations = bev_rotated_boxes[:, -1] - normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi)) - - # find the center of boxes - conditions = (normed_rotations > np.pi / 4)[..., None] - bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:, - [0, 1, 3, 2]], - bev_rotated_boxes[:, :4]) - - centers = bboxes_xywh[:, :2] - dims = bboxes_xywh[:, 2:] - bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1) - return bev_boxes - def rotate(self, angle, points=None): """Rotate boxes with points (optional) with the given angle or rotation matrix. @@ -124,7 +95,7 @@ def rotate(self, angle, points=None): Args: angle (float | torch.Tensor | np.ndarray): Rotation angle or rotation matrix. - points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional): + points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional): Points to rotate. Defaults to None. Returns: @@ -154,6 +125,8 @@ def rotate(self, angle, points=None): if self.with_yaw: self.tensor[:, 6] += angle else: + # for axis-aligned boxes, we take the new + # enclosing axis-aligned boxes after rotation corners_rot = self.corners @ rot_mat_T new_x_size = corners_rot[..., 0].max( dim=1, keepdim=True)[0] - corners_rot[..., 0].min( @@ -181,8 +154,9 @@ def flip(self, bev_direction='horizontal', points=None): In Depth coordinates, it flips x (horizontal) or y (vertical) axis. Args: - bev_direction (str): Flip direction (horizontal or vertical). - points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, None): + bev_direction (str, optional): Flip direction + (horizontal or vertical). Defaults to 'horizontal'. + points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional): Points to flip. Defaults to None. Returns: @@ -209,35 +183,14 @@ def flip(self, bev_direction='horizontal', points=None): points.flip(bev_direction) return points - def in_range_bev(self, box_range): - """Check whether the boxes are in the given range. - - Args: - box_range (list | torch.Tensor): The range of box - (x_min, y_min, x_max, y_max). - - Note: - In the original implementation of SECOND, checking whether - a box in the range checks whether the points are in a convex - polygon, we try to reduce the burdun for simpler cases. - - Returns: - torch.Tensor: Indicating whether each box is inside - the reference range. - """ - in_range_flags = ((self.tensor[:, 0] > box_range[0]) - & (self.tensor[:, 1] > box_range[1]) - & (self.tensor[:, 0] < box_range[2]) - & (self.tensor[:, 1] < box_range[3])) - return in_range_flags - def convert_to(self, dst, rt_mat=None): """Convert self to ``dst`` mode. Args: dst (:obj:`Box3DMode`): The target Box mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from ``src`` coordinates to ``dst`` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. @@ -257,7 +210,7 @@ def enlarged_box(self, extra_width): extra_width (float | torch.Tensor): Extra width to enlarge the box. Returns: - :obj:`LiDARInstance3DBoxes`: Enlarged boxes. + :obj:`DepthInstance3DBoxes`: Enlarged boxes. """ enlarged_boxes = self.tensor.clone() enlarged_boxes[:, 3:6] += extra_width * 2 diff --git a/mmdet3d/core/bbox/structures/lidar_box3d.py b/mmdet3d/core/bbox/structures/lidar_box3d.py index bda2b9b99c..0d14655c63 100644 --- a/mmdet3d/core/bbox/structures/lidar_box3d.py +++ b/mmdet3d/core/bbox/structures/lidar_box3d.py @@ -4,7 +4,7 @@ from mmdet3d.core.points import BasePoints from .base_box3d import BaseInstance3DBoxes -from .utils import limit_period, rotation_3d_in_axis +from .utils import rotation_3d_in_axis class LiDARInstance3DBoxes(BaseInstance3DBoxes): @@ -39,7 +39,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes): @property def gravity_center(self): - """torch.Tensor: A tensor with center of each box.""" + """torch.Tensor: A tensor with center of each box in shape (N, 3).""" bottom_center = self.bottom_center gravity_center = torch.zeros_like(bottom_center) gravity_center[:, :2] = bottom_center[:, :2] @@ -88,33 +88,6 @@ def corners(self): corners += self.tensor[:, :3].view(-1, 1, 3) return corners - @property - def bev(self): - """torch.Tensor: 2D BEV box of each box with rotation - in XYWHR format.""" - return self.tensor[:, [0, 1, 3, 4, 6]] - - @property - def nearest_bev(self): - """torch.Tensor: A tensor of 2D BEV box of each box - without rotation.""" - # Obtain BEV boxes with rotation in XYWHR format - bev_rotated_boxes = self.bev - # convert the rotation to a valid range - rotations = bev_rotated_boxes[:, -1] - normed_rotations = torch.abs(limit_period(rotations, 0.5, np.pi)) - - # find the center of boxes - conditions = (normed_rotations > np.pi / 4)[..., None] - bboxes_xywh = torch.where(conditions, bev_rotated_boxes[:, - [0, 1, 3, 2]], - bev_rotated_boxes[:, :4]) - - centers = bboxes_xywh[:, :2] - dims = bboxes_xywh[:, 2:] - bev_boxes = torch.cat([centers - dims / 2, centers + dims / 2], dim=-1) - return bev_boxes - def rotate(self, angle, points=None): """Rotate boxes with points (optional) with the given angle or rotation matrix. @@ -122,7 +95,7 @@ def rotate(self, angle, points=None): Args: angles (float | torch.Tensor | np.ndarray): Rotation angle or rotation matrix. - points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, optional): + points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional): Points to rotate. Defaults to None. Returns: @@ -174,7 +147,7 @@ def flip(self, bev_direction='horizontal', points=None): Args: bev_direction (str): Flip direction (horizontal or vertical). - points (torch.Tensor, numpy.ndarray, :obj:`BasePoints`, None): + points (torch.Tensor | np.ndarray | :obj:`BasePoints`, optional): Points to flip. Defaults to None. Returns: @@ -201,34 +174,14 @@ def flip(self, bev_direction='horizontal', points=None): points.flip(bev_direction) return points - def in_range_bev(self, box_range): - """Check whether the boxes are in the given range. - - Args: - box_range (list | torch.Tensor): the range of box - (x_min, y_min, x_max, y_max) - - Note: - The original implementation of SECOND checks whether boxes in - a range by checking whether the points are in a convex - polygon, we reduce the burden for simpler cases. - - Returns: - torch.Tensor: Whether each box is inside the reference range. - """ - in_range_flags = ((self.tensor[:, 0] > box_range[0]) - & (self.tensor[:, 1] > box_range[1]) - & (self.tensor[:, 0] < box_range[2]) - & (self.tensor[:, 1] < box_range[3])) - return in_range_flags - def convert_to(self, dst, rt_mat=None): """Convert self to ``dst`` mode. Args: dst (:obj:`Box3DMode`): the target Box mode - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from ``src`` coordinates to ``dst`` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. diff --git a/mmdet3d/core/bbox/transforms.py b/mmdet3d/core/bbox/transforms.py index 13015556ac..8a2eb90f54 100644 --- a/mmdet3d/core/bbox/transforms.py +++ b/mmdet3d/core/bbox/transforms.py @@ -51,10 +51,10 @@ def bbox3d2result(bboxes, scores, labels, attrs=None): """Convert detection results to a list of numpy arrays. Args: - bboxes (torch.Tensor): Bounding boxes with shape of (n, 5). - labels (torch.Tensor): Labels with shape of (n, ). - scores (torch.Tensor): Scores with shape of (n, ). - attrs (torch.Tensor, optional): Attributes with shape of (n, ). + bboxes (torch.Tensor): Bounding boxes with shape (N, 5). + labels (torch.Tensor): Labels with shape (N, ). + scores (torch.Tensor): Scores with shape (N, ). + attrs (torch.Tensor, optional): Attributes with shape (N, ). Defaults to None. Returns: diff --git a/mmdet3d/core/evaluation/indoor_eval.py b/mmdet3d/core/evaluation/indoor_eval.py index 50d758655a..2ff9877329 100644 --- a/mmdet3d/core/evaluation/indoor_eval.py +++ b/mmdet3d/core/evaluation/indoor_eval.py @@ -9,9 +9,9 @@ def average_precision(recalls, precisions, mode='area'): """Calculate average precision (for single or multiple scales). Args: - recalls (np.ndarray): Recalls with shape of (num_scales, num_dets) \ + recalls (np.ndarray): Recalls with shape of (num_scales, num_dets) or (num_dets, ). - precisions (np.ndarray): Precisions with shape of \ + precisions (np.ndarray): Precisions with shape of (num_scales, num_dets) or (num_dets, ). mode (str): 'area' or '11points', 'area' means calculating the area under precision-recall curve, '11points' means calculating @@ -58,13 +58,13 @@ def eval_det_cls(pred, gt, iou_thr=None): single class. Args: - pred (dict): Predictions mapping from image id to bounding boxes \ + pred (dict): Predictions mapping from image id to bounding boxes and scores. gt (dict): Ground truths mapping from image id to bounding boxes. iou_thr (list[float]): A list of iou thresholds. Return: - tuple (np.ndarray, np.ndarray, float): Recalls, precisions and \ + tuple (np.ndarray, np.ndarray, float): Recalls, precisions and average precision. """ @@ -170,10 +170,9 @@ def eval_map_recall(pred, gt, ovthresh=None): Args: pred (dict): Information of detection results, which maps class_id and predictions. - gt (dict): Information of ground truths, which maps class_id and \ + gt (dict): Information of ground truths, which maps class_id and ground truths. - ovthresh (list[float]): iou threshold. - Default: None. + ovthresh (list[float], optional): iou threshold. Default: None. Return: tuple[dict]: dict results of recall, AP, and precision for all classes. @@ -218,12 +217,12 @@ def indoor_eval(gt_annos, includes the following keys - labels_3d (torch.Tensor): Labels of boxes. - - boxes_3d (:obj:`BaseInstance3DBoxes`): \ + - boxes_3d (:obj:`BaseInstance3DBoxes`): 3D bounding boxes in Depth coordinate. - scores_3d (torch.Tensor): Scores of boxes. metric (list[float]): IoU thresholds for computing average precisions. label2cat (dict): Map from label to category. - logger (logging.Logger | str | None): The way to print the mAP + logger (logging.Logger | str, optional): The way to print the mAP summary. See `mmdet.utils.print_log()` for details. Default: None. Return: diff --git a/mmdet3d/core/evaluation/kitti_utils/rotate_iou.py b/mmdet3d/core/evaluation/kitti_utils/rotate_iou.py index 2f0c9c8e50..bd9cdad135 100644 --- a/mmdet3d/core/evaluation/kitti_utils/rotate_iou.py +++ b/mmdet3d/core/evaluation/kitti_utils/rotate_iou.py @@ -291,7 +291,8 @@ def rotate_iou_kernel_eval(N, dev_query_boxes, dev_iou, criterion=-1): - """Kernel of computing rotated iou. + """Kernel of computing rotated IoU. This function is for bev boxes in + camera coordinate system ONLY (the rotation is clockwise). Args: N (int): The number of boxes. @@ -343,10 +344,14 @@ def rotate_iou_gpu_eval(boxes, query_boxes, criterion=-1, device_id=0): in one example with numba.cuda code). convert from [this project]( https://github.com/hongzhenwang/RRPN-revise/tree/master/lib/rotation). + This function is for bev boxes in camera coordinate system ONLY + (the rotation is clockwise). + Args: boxes (torch.Tensor): rbboxes. format: centers, dims, angles(clockwise when positive) with the shape of [N, 5]. - query_boxes (float tensor: [K, 5]): rbboxes to compute iou with boxes. + query_boxes (torch.FloatTensor, shape=(K, 5)): + rbboxes to compute iou with boxes. device_id (int, optional): Defaults to 0. Device to use. criterion (int, optional): Indicate different type of iou. -1 indicate `area_inter / (area1 + area2 - area_inter)`, diff --git a/mmdet3d/core/evaluation/lyft_eval.py b/mmdet3d/core/evaluation/lyft_eval.py index b6aa775ef9..6e4f21e4b8 100644 --- a/mmdet3d/core/evaluation/lyft_eval.py +++ b/mmdet3d/core/evaluation/lyft_eval.py @@ -18,7 +18,7 @@ def load_lyft_gts(lyft, data_root, eval_split, logger=None): lyft (:obj:`LyftDataset`): Lyft class in the sdk. data_root (str): Root of data for reading splits. eval_split (str): Name of the split for evaluation. - logger (logging.Logger | str | None): Logger used for printing + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. Returns: @@ -96,7 +96,7 @@ def lyft_eval(lyft, data_root, res_path, eval_set, output_dir, logger=None): res_path (str): Path of result json file recording detections. eval_set (str): Name of the split for evaluation. output_dir (str): Output directory for output json files. - logger (logging.Logger | str | None): Logger used for printing + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. Returns: @@ -202,9 +202,9 @@ def get_single_class_aps(gt, predictions, iou_thresholds): Args: gt (list[dict]): list of dictionaries in the format described above. - predictions (list[dict]): list of dictionaries in the format \ + predictions (list[dict]): list of dictionaries in the format described below. - iou_thresholds (list[float]): IOU thresholds used to calculate \ + iou_thresholds (list[float]): IOU thresholds used to calculate TP / FN Returns: diff --git a/mmdet3d/core/evaluation/seg_eval.py b/mmdet3d/core/evaluation/seg_eval.py index ab26de0241..4a3166d685 100644 --- a/mmdet3d/core/evaluation/seg_eval.py +++ b/mmdet3d/core/evaluation/seg_eval.py @@ -77,7 +77,7 @@ def seg_eval(gt_labels, seg_preds, label2cat, ignore_index, logger=None): seg_preds (list[torch.Tensor]): Predictions. label2cat (dict): Map from label to category name. ignore_index (int): Index that will be ignored in evaluation. - logger (logging.Logger | str | None): The way to print the mAP + logger (logging.Logger | str, optional): The way to print the mAP summary. See `mmdet.utils.print_log()` for details. Default: None. Returns: diff --git a/mmdet3d/core/evaluation/waymo_utils/prediction_kitti_to_waymo.py b/mmdet3d/core/evaluation/waymo_utils/prediction_kitti_to_waymo.py index 72a3883a8a..32d6eb640e 100644 --- a/mmdet3d/core/evaluation/waymo_utils/prediction_kitti_to_waymo.py +++ b/mmdet3d/core/evaluation/waymo_utils/prediction_kitti_to_waymo.py @@ -114,7 +114,7 @@ def parse_one_object(instance_idx): instance_idx (int): Index of the instance to be converted. Returns: - :obj:`Object`: Predicted instance in waymo dataset \ + :obj:`Object`: Predicted instance in waymo dataset Object proto. """ cls = kitti_result['name'][instance_idx] diff --git a/mmdet3d/core/points/base_points.py b/mmdet3d/core/points/base_points.py index b3a0e6508b..e740f4a6e3 100644 --- a/mmdet3d/core/points/base_points.py +++ b/mmdet3d/core/points/base_points.py @@ -12,17 +12,17 @@ class BasePoints(object): Args: tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. - points_dim (int): Number of the dimension of a point. - Each row is (x, y, z). Default to 3. - attribute_dims (dict): Dictionary to indicate the meaning of extra - dimension. Default to None. + points_dim (int, optional): Number of the dimension of a point. + Each row is (x, y, z). Defaults to 3. + attribute_dims (dict, optional): Dictionary to indicate the + meaning of extra dimension. Defaults to None. Attributes: tensor (torch.Tensor): Float matrix of N x points_dim. points_dim (int): Integer indicating the dimension of a point. Each row is (x, y, z, ...). attribute_dims (bool): Dictionary to indicate the meaning of extra - dimension. Default to None. + dimension. Defaults to None. rotation_axis (int): Default rotation axis for points rotation. """ @@ -47,7 +47,7 @@ def __init__(self, tensor, points_dim=3, attribute_dims=None): @property def coord(self): - """torch.Tensor: Coordinates of each point with size (N, 3).""" + """torch.Tensor: Coordinates of each point in shape (N, 3).""" return self.tensor[:, :3] @coord.setter @@ -63,7 +63,8 @@ def coord(self, tensor): @property def height(self): - """torch.Tensor: A vector with height of each point.""" + """torch.Tensor: + A vector with height of each point in shape (N, 1), or None.""" if self.attribute_dims is not None and \ 'height' in self.attribute_dims.keys(): return self.tensor[:, self.attribute_dims['height']] @@ -93,7 +94,8 @@ def height(self, tensor): @property def color(self): - """torch.Tensor: A vector with color of each point.""" + """torch.Tensor: + A vector with color of each point in shape (N, 3), or None.""" if self.attribute_dims is not None and \ 'color' in self.attribute_dims.keys(): return self.tensor[:, self.attribute_dims['color']] @@ -145,7 +147,7 @@ def rotate(self, rotation, axis=None): Args: rotation (float | np.ndarray | torch.Tensor): Rotation matrix or angle. - axis (int): Axis to rotate at. Defaults to None. + axis (int, optional): Axis to rotate at. Defaults to None. """ if not isinstance(rotation, torch.Tensor): rotation = self.tensor.new_tensor(rotation) @@ -169,7 +171,11 @@ def rotate(self, rotation, axis=None): @abstractmethod def flip(self, bev_direction='horizontal'): - """Flip the points in BEV along given BEV direction.""" + """Flip the points along given BEV direction. + + Args: + bev_direction (str): Flip direction (horizontal or vertical). + """ pass def translate(self, trans_vector): @@ -206,7 +212,7 @@ def in_range_3d(self, point_range): polygon, we try to reduce the burden for simpler cases. Returns: - torch.Tensor: A binary vector indicating whether each point is \ + torch.Tensor: A binary vector indicating whether each point is inside the reference range. """ in_range_flags = ((self.tensor[:, 0] > point_range[0]) @@ -217,7 +223,11 @@ def in_range_3d(self, point_range): & (self.tensor[:, 2] < point_range[5])) return in_range_flags - @abstractmethod + @property + def bev(self): + """torch.Tensor: BEV of the points in shape (N, 2).""" + return self.tensor[:, [0, 1]] + def in_range_bev(self, point_range): """Check whether the points are in the given range. @@ -226,10 +236,14 @@ def in_range_bev(self, point_range): in order of (x_min, y_min, x_max, y_max). Returns: - torch.Tensor: Indicating whether each point is inside \ + torch.Tensor: Indicating whether each point is inside the reference range. """ - pass + in_range_flags = ((self.bev[:, 0] > point_range[0]) + & (self.bev[:, 1] > point_range[1]) + & (self.bev[:, 1] < point_range[2]) + & (self.bev[:, 1] < point_range[3])) + return in_range_flags @abstractmethod def convert_to(self, dst, rt_mat=None): @@ -237,14 +251,15 @@ def convert_to(self, dst, rt_mat=None): Args: dst (:obj:`CoordMode`): The target Box mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. Returns: - :obj:`BasePoints`: The converted box of the same type \ + :obj:`BasePoints`: The converted box of the same type in the `dst` mode. """ pass @@ -276,7 +291,7 @@ def __getitem__(self, item): subject to Pytorch's indexing semantics. Returns: - :obj:`BasePoints`: A new object of \ + :obj:`BasePoints`: A new object of :class:`BasePoints` after indexing. """ original_type = type(self) @@ -367,7 +382,7 @@ def to(self, device): device (str | :obj:`torch.device`): The name of the device. Returns: - :obj:`BasePoints`: A new boxes object on the \ + :obj:`BasePoints`: A new boxes object on the specific device. """ original_type = type(self) @@ -380,7 +395,7 @@ def clone(self): """Clone the Points. Returns: - :obj:`BasePoints`: Box object with the same properties \ + :obj:`BasePoints`: Box object with the same properties as self. """ original_type = type(self) @@ -405,14 +420,14 @@ def __iter__(self): def new_point(self, data): """Create a new point object with data. - The new point and its tensor has the similar properties \ + The new point and its tensor has the similar properties as self and self.tensor, respectively. Args: data (torch.Tensor | numpy.array | list): Data to be copied. Returns: - :obj:`BasePoints`: A new point object with ``data``, \ + :obj:`BasePoints`: A new point object with ``data``, the object's other properties are similar to ``self``. """ new_tensor = self.tensor.new_tensor(data) \ diff --git a/mmdet3d/core/points/cam_points.py b/mmdet3d/core/points/cam_points.py index cb866ddcfb..a57c3db1e8 100644 --- a/mmdet3d/core/points/cam_points.py +++ b/mmdet3d/core/points/cam_points.py @@ -7,17 +7,17 @@ class CameraPoints(BasePoints): Args: tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. - points_dim (int): Number of the dimension of a point. - Each row is (x, y, z). Default to 3. - attribute_dims (dict): Dictionary to indicate the meaning of extra - dimension. Default to None. + points_dim (int, optional): Number of the dimension of a point. + Each row is (x, y, z). Defaults to 3. + attribute_dims (dict, optional): Dictionary to indicate the + meaning of extra dimension. Defaults to None. Attributes: tensor (torch.Tensor): Float matrix of N x points_dim. points_dim (int): Integer indicating the dimension of a point. Each row is (x, y, z, ...). attribute_dims (bool): Dictionary to indicate the meaning of extra - dimension. Default to None. + dimension. Defaults to None. rotation_axis (int): Default rotation axis for points rotation. """ @@ -27,42 +27,35 @@ def __init__(self, tensor, points_dim=3, attribute_dims=None): self.rotation_axis = 1 def flip(self, bev_direction='horizontal'): - """Flip the boxes in BEV along given BEV direction.""" + """Flip the points along given BEV direction. + + Args: + bev_direction (str): Flip direction (horizontal or vertical). + """ if bev_direction == 'horizontal': self.tensor[:, 0] = -self.tensor[:, 0] elif bev_direction == 'vertical': self.tensor[:, 2] = -self.tensor[:, 2] - def in_range_bev(self, point_range): - """Check whether the points are in the given range. - - Args: - point_range (list | torch.Tensor): The range of point - in order of (x_min, y_min, x_max, y_max). - - Returns: - torch.Tensor: Indicating whether each point is inside \ - the reference range. - """ - in_range_flags = ((self.tensor[:, 0] > point_range[0]) - & (self.tensor[:, 2] > point_range[1]) - & (self.tensor[:, 0] < point_range[2]) - & (self.tensor[:, 2] < point_range[3])) - return in_range_flags + @property + def bev(self): + """torch.Tensor: BEV of the points in shape (N, 2).""" + return self.tensor[:, [0, 2]] def convert_to(self, dst, rt_mat=None): """Convert self to ``dst`` mode. Args: dst (:obj:`CoordMode`): The target Point mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. Returns: - :obj:`BasePoints`: The converted point of the same type \ + :obj:`BasePoints`: The converted point of the same type in the `dst` mode. """ from mmdet3d.core.bbox import Coord3DMode diff --git a/mmdet3d/core/points/depth_points.py b/mmdet3d/core/points/depth_points.py index 3dfd5f7cd8..2d9221fb25 100644 --- a/mmdet3d/core/points/depth_points.py +++ b/mmdet3d/core/points/depth_points.py @@ -7,17 +7,17 @@ class DepthPoints(BasePoints): Args: tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. - points_dim (int): Number of the dimension of a point. - Each row is (x, y, z). Default to 3. - attribute_dims (dict): Dictionary to indicate the meaning of extra - dimension. Default to None. + points_dim (int, optional): Number of the dimension of a point. + Each row is (x, y, z). Defaults to 3. + attribute_dims (dict, optional): Dictionary to indicate the + meaning of extra dimension. Defaults to None. Attributes: tensor (torch.Tensor): Float matrix of N x points_dim. points_dim (int): Integer indicating the dimension of a point. Each row is (x, y, z, ...). attribute_dims (bool): Dictionary to indicate the meaning of extra - dimension. Default to None. + dimension. Defaults to None. rotation_axis (int): Default rotation axis for points rotation. """ @@ -27,42 +27,30 @@ def __init__(self, tensor, points_dim=3, attribute_dims=None): self.rotation_axis = 2 def flip(self, bev_direction='horizontal'): - """Flip the boxes in BEV along given BEV direction.""" + """Flip the points along given BEV direction. + + Args: + bev_direction (str): Flip direction (horizontal or vertical). + """ if bev_direction == 'horizontal': self.tensor[:, 0] = -self.tensor[:, 0] elif bev_direction == 'vertical': self.tensor[:, 1] = -self.tensor[:, 1] - def in_range_bev(self, point_range): - """Check whether the points are in the given range. - - Args: - point_range (list | torch.Tensor): The range of point - in order of (x_min, y_min, x_max, y_max). - - Returns: - torch.Tensor: Indicating whether each point is inside \ - the reference range. - """ - in_range_flags = ((self.tensor[:, 0] > point_range[0]) - & (self.tensor[:, 1] > point_range[1]) - & (self.tensor[:, 0] < point_range[2]) - & (self.tensor[:, 1] < point_range[3])) - return in_range_flags - def convert_to(self, dst, rt_mat=None): """Convert self to ``dst`` mode. Args: dst (:obj:`CoordMode`): The target Point mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. Returns: - :obj:`BasePoints`: The converted point of the same type \ + :obj:`BasePoints`: The converted point of the same type in the `dst` mode. """ from mmdet3d.core.bbox import Coord3DMode diff --git a/mmdet3d/core/points/lidar_points.py b/mmdet3d/core/points/lidar_points.py index ec0c98e273..ff4f57ab0e 100644 --- a/mmdet3d/core/points/lidar_points.py +++ b/mmdet3d/core/points/lidar_points.py @@ -7,17 +7,17 @@ class LiDARPoints(BasePoints): Args: tensor (torch.Tensor | np.ndarray | list): a N x points_dim matrix. - points_dim (int): Number of the dimension of a point. - Each row is (x, y, z). Default to 3. - attribute_dims (dict): Dictionary to indicate the meaning of extra - dimension. Default to None. + points_dim (int, optional): Number of the dimension of a point. + Each row is (x, y, z). Defaults to 3. + attribute_dims (dict, optional): Dictionary to indicate the + meaning of extra dimension. Defaults to None. Attributes: tensor (torch.Tensor): Float matrix of N x points_dim. points_dim (int): Integer indicating the dimension of a point. Each row is (x, y, z, ...). attribute_dims (bool): Dictionary to indicate the meaning of extra - dimension. Default to None. + dimension. Defaults to None. rotation_axis (int): Default rotation axis for points rotation. """ @@ -27,42 +27,30 @@ def __init__(self, tensor, points_dim=3, attribute_dims=None): self.rotation_axis = 2 def flip(self, bev_direction='horizontal'): - """Flip the boxes in BEV along given BEV direction.""" + """Flip the points along given BEV direction. + + Args: + bev_direction (str): Flip direction (horizontal or vertical). + """ if bev_direction == 'horizontal': self.tensor[:, 1] = -self.tensor[:, 1] elif bev_direction == 'vertical': self.tensor[:, 0] = -self.tensor[:, 0] - def in_range_bev(self, point_range): - """Check whether the points are in the given range. - - Args: - point_range (list | torch.Tensor): The range of point - in order of (x_min, y_min, x_max, y_max). - - Returns: - torch.Tensor: Indicating whether each point is inside \ - the reference range. - """ - in_range_flags = ((self.tensor[:, 0] > point_range[0]) - & (self.tensor[:, 1] > point_range[1]) - & (self.tensor[:, 0] < point_range[2]) - & (self.tensor[:, 1] < point_range[3])) - return in_range_flags - def convert_to(self, dst, rt_mat=None): """Convert self to ``dst`` mode. Args: dst (:obj:`CoordMode`): The target Point mode. - rt_mat (np.ndarray | torch.Tensor): The rotation and translation - matrix between different coordinates. Defaults to None. + rt_mat (np.ndarray | torch.Tensor, optional): The rotation and + translation matrix between different coordinates. + Defaults to None. The conversion from `src` coordinates to `dst` coordinates usually comes along the change of sensors, e.g., from camera to LiDAR. This requires a transformation matrix. Returns: - :obj:`BasePoints`: The converted point of the same type \ + :obj:`BasePoints`: The converted point of the same type in the `dst` mode. """ from mmdet3d.core.bbox import Coord3DMode diff --git a/mmdet3d/core/post_processing/box3d_nms.py b/mmdet3d/core/post_processing/box3d_nms.py index 638c9aaae0..e9d65d1bed 100644 --- a/mmdet3d/core/post_processing/box3d_nms.py +++ b/mmdet3d/core/post_processing/box3d_nms.py @@ -15,13 +15,15 @@ def box3d_multiclass_nms(mlvl_bboxes, mlvl_dir_scores=None, mlvl_attr_scores=None, mlvl_bboxes2d=None): - """Multi-class nms for 3D boxes. + """Multi-class NMS for 3D boxes. The IoU used for NMS is defined as the 2D + IoU between BEV boxes. Args: mlvl_bboxes (torch.Tensor): Multi-level boxes with shape (N, M). M is the dimensions of boxes. mlvl_bboxes_for_nms (torch.Tensor): Multi-level boxes with shape (N, 5) ([x1, y1, x2, y2, ry]). N is the number of boxes. + The coordinate system of the BEV boxes is counterclockwise. mlvl_scores (torch.Tensor): Multi-level boxes with shape (N, C + 1). N is the number of boxes. C is the number of classes. score_thr (float): Score thredhold to filter boxes with low @@ -36,8 +38,8 @@ def box3d_multiclass_nms(mlvl_bboxes, boxes. Defaults to None. Returns: - tuple[torch.Tensor]: Return results after nms, including 3D \ - bounding boxes, scores, labels, direction scores, attribute \ + tuple[torch.Tensor]: Return results after nms, including 3D + bounding boxes, scores, labels, direction scores, attribute scores (optional) and 2D bounding boxes (optional). """ # do multi class nms @@ -128,13 +130,13 @@ def box3d_multiclass_nms(mlvl_bboxes, def aligned_3d_nms(boxes, scores, classes, thresh): - """3d nms for aligned boxes. + """3D NMS for aligned boxes. Args: boxes (torch.Tensor): Aligned box with shape [n, 6]. scores (torch.Tensor): Scores of each box. classes (torch.Tensor): Class of each box. - thresh (float): Iou threshold for nms. + thresh (float): IoU threshold for nms. Returns: torch.Tensor: Indices of selected boxes. @@ -188,8 +190,8 @@ def circle_nms(dets, thresh, post_max_size=83): Args: dets (torch.Tensor): Detection results with the shape of [N, 3]. thresh (float): Value of threshold. - post_max_size (int): Max number of prediction to be kept. Defaults - to 83 + post_max_size (int, optional): Max number of prediction to be kept. + Defaults to 83. Returns: torch.Tensor: Indexes of the detections to be kept. diff --git a/mmdet3d/core/utils/array_converter.py b/mmdet3d/core/utils/array_converter.py index eeb3699973..fa623afee4 100644 --- a/mmdet3d/core/utils/array_converter.py +++ b/mmdet3d/core/utils/array_converter.py @@ -11,19 +11,20 @@ def array_converter(to_torch=True, """Wrapper function for data-type agnostic processing. First converts input arrays to PyTorch tensors or NumPy ndarrays - for middle calculation, then convert output to original data-type. + for middle calculation, then convert output to original data-type if + `recover=True`. Args: - to_torch (Bool): Whether convert to PyTorch tensors + to_torch (Bool, optional): Whether convert to PyTorch tensors for middle calculation. Defaults to True. - apply_to (tuple[str]): The arguments to which we apply data-type - conversion. Defaults to an empty tuple. - template_arg_name_ (str): Argument serving as the template ( + apply_to (tuple[str], optional): The arguments to which we apply + data-type conversion. Defaults to an empty tuple. + template_arg_name_ (str, optional): Argument serving as the template ( return arrays should have the same dtype and device as the template). Defaults to None. If None, we will use the first argument in `apply_to` as the template argument. - recover (Bool): Whether or not recover the wrapped function outputs - to the `template_arg_name_` type. Defaults to True. + recover (Bool, optional): Whether or not recover the wrapped function + outputs to the `template_arg_name_` type. Defaults to True. Raises: ValueError: When template_arg_name_ is not among all args, or @@ -254,9 +255,10 @@ def convert(self, input_array, target_type=None, target_array=None): input_array (tuple | list | np.ndarray | torch.Tensor | int | float ): Input array. Defaults to None. - target_type ( | ): + target_type ( | , + optional): Type to which input array is converted. Defaults to None. - target_array (np.ndarray | torch.Tensor): + target_array (np.ndarray | torch.Tensor, optional): Template array to which input array is converted. Defaults to None. diff --git a/mmdet3d/core/utils/gaussian.py b/mmdet3d/core/utils/gaussian.py index a07963e151..da9e3bb77e 100644 --- a/mmdet3d/core/utils/gaussian.py +++ b/mmdet3d/core/utils/gaussian.py @@ -8,7 +8,7 @@ def gaussian_2d(shape, sigma=1): Args: shape (list[int]): Shape of the map. - sigma (float): Sigma to generate gaussian map. + sigma (float, optional): Sigma to generate gaussian map. Defaults to 1. Returns: @@ -29,7 +29,7 @@ def draw_heatmap_gaussian(heatmap, center, radius, k=1): heatmap (torch.Tensor): Heatmap to be masked. center (torch.Tensor): Center coord of the heatmap. radius (int): Radius of gausian. - K (int): Multiple of masked_gaussian. Defaults to 1. + K (int, optional): Multiple of masked_gaussian. Defaults to 1. Returns: torch.Tensor: Masked heatmap. @@ -59,7 +59,7 @@ def gaussian_radius(det_size, min_overlap=0.5): Args: det_size (tuple[torch.Tensor]): Size of the detection result. - min_overlap (float): Gaussian_overlap. Defaults to 0.5. + min_overlap (float, optional): Gaussian_overlap. Defaults to 0.5. Returns: torch.Tensor: Computed radius. diff --git a/mmdet3d/core/visualizer/image_vis.py b/mmdet3d/core/visualizer/image_vis.py index 891fb549c0..4144bb7e5f 100644 --- a/mmdet3d/core/visualizer/image_vis.py +++ b/mmdet3d/core/visualizer/image_vis.py @@ -18,7 +18,7 @@ def project_pts_on_img(points, raw_img (numpy.array): The numpy array of image. lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix according to the camera intrinsic parameters. - max_distance (float): the max distance of the points cloud. + max_distance (float, optional): the max distance of the points cloud. Default: 70. thickness (int, optional): The thickness of 2D points. Default: -1. """ @@ -69,7 +69,8 @@ def plot_rect3d_on_img(img, num_rects (int): Number of 3D rectangulars. rect_corners (numpy.array): Coordinates of the corners of 3D rectangulars. Should be in the shape of [num_rect, 8, 2]. - color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0). + color (tuple[int], optional): The color to draw bboxes. + Default: (0, 255, 0). thickness (int, optional): The thickness of bboxes. Default: 1. """ line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7), @@ -99,7 +100,8 @@ def draw_lidar_bbox3d_on_img(bboxes3d, lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix according to the camera intrinsic parameters. img_metas (dict): Useless here. - color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0). + color (tuple[int], optional): The color to draw bboxes. + Default: (0, 255, 0). thickness (int, optional): The thickness of bboxes. Default: 1. """ img = raw_img.copy() @@ -136,7 +138,8 @@ def draw_depth_bbox3d_on_img(bboxes3d, raw_img (numpy.array): The numpy array of image. calibs (dict): Camera calibration information, Rt and K. img_metas (dict): Used in coordinates transformation. - color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0). + color (tuple[int], optional): The color to draw bboxes. + Default: (0, 255, 0). thickness (int, optional): The thickness of bboxes. Default: 1. """ from mmdet3d.core.bbox import points_cam2img @@ -176,7 +179,8 @@ def draw_camera_bbox3d_on_img(bboxes3d, cam2img (dict): Camera intrinsic matrix, denoted as `K` in depth bbox coordinate system. img_metas (dict): Useless here. - color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0). + color (tuple[int], optional): The color to draw bboxes. + Default: (0, 255, 0). thickness (int, optional): The thickness of bboxes. Default: 1. """ from mmdet3d.core.bbox import points_cam2img diff --git a/mmdet3d/core/visualizer/open3d_vis.py b/mmdet3d/core/visualizer/open3d_vis.py index 0790ee483a..7a286b3c8a 100644 --- a/mmdet3d/core/visualizer/open3d_vis.py +++ b/mmdet3d/core/visualizer/open3d_vis.py @@ -22,12 +22,12 @@ def _draw_points(points, points (numpy.array | torch.tensor, shape=[N, 3+C]): points to visualize. vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer. - points_size (int): the size of points to show on visualizer. + points_size (int, optional): the size of points to show on visualizer. Default: 2. - point_color (tuple[float]): the color of points. + point_color (tuple[float], optional): the color of points. Default: (0.5, 0.5, 0.5). - mode (str): indicate type of the input points, avaliable mode - ['xyz', 'xyzrgb']. Default: 'xyz'. + mode (str, optional): indicate type of the input points, + available mode ['xyz', 'xyzrgb']. Default: 'xyz'. Returns: tuple: points, color of each point. @@ -69,19 +69,21 @@ def _draw_bboxes(bbox3d, Args: bbox3d (numpy.array | torch.tensor, shape=[M, 7]): - 3d bbox (x, y, z, dx, dy, dz, yaw) to visualize. + 3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize. vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer. points_colors (numpy.array): color of each points. - pcd (:obj:`open3d.geometry.PointCloud`): point cloud. Default: None. - bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0). - points_in_box_color (tuple[float]): + pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud. + Default: None. + bbox_color (tuple[float], optional): the color of bbox. + Default: (0, 1, 0). + points_in_box_color (tuple[float], optional): the color of points inside bbox3d. Default: (1, 0, 0). - rot_axis (int): rotation axis of bbox. Default: 2. - center_mode (bool): indicate the center of bbox is bottom center - or gravity center. avaliable mode + rot_axis (int, optional): rotation axis of bbox. Default: 2. + center_mode (bool, optional): indicate the center of bbox is + bottom center or gravity center. avaliable mode ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'. - mode (str): indicate type of the input points, avaliable mode - ['xyz', 'xyzrgb']. Default: 'xyz'. + mode (str, optional): indicate type of the input points, + avaliable mode ['xyz', 'xyzrgb']. Default: 'xyz'. """ if isinstance(bbox3d, torch.Tensor): bbox3d = bbox3d.cpu().numpy() @@ -135,23 +137,27 @@ def show_pts_boxes(points, Args: points (numpy.array | torch.tensor, shape=[N, 3+C]): points to visualize. - bbox3d (numpy.array | torch.tensor, shape=[M, 7]): - 3d bbox (x, y, z, dx, dy, dz, yaw) to visualize. Default: None. - show (bool): whether to show the visualization results. Default: True. - save_path (str): path to save visualized results. Default: None. - points_size (int): the size of points to show on visualizer. + bbox3d (numpy.array | torch.tensor, shape=[M, 7], optional): + 3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize. + Defaults to None. + show (bool, optional): whether to show the visualization results. + Default: True. + save_path (str, optional): path to save visualized results. + Default: None. + points_size (int, optional): the size of points to show on visualizer. Default: 2. - point_color (tuple[float]): the color of points. + point_color (tuple[float], optional): the color of points. Default: (0.5, 0.5, 0.5). - bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0). - points_in_box_color (tuple[float]): + bbox_color (tuple[float], optional): the color of bbox. + Default: (0, 1, 0). + points_in_box_color (tuple[float], optional): the color of points which are in bbox3d. Default: (1, 0, 0). - rot_axis (int): rotation axis of bbox. Default: 2. - center_mode (bool): indicate the center of bbox is bottom center - or gravity center. avaliable mode + rot_axis (int, optional): rotation axis of bbox. Default: 2. + center_mode (bool, optional): indicate the center of bbox is bottom + center or gravity center. avaliable mode ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'. - mode (str): indicate type of the input points, avaliable mode - ['xyz', 'xyzrgb']. Default: 'xyz'. + mode (str, optional): indicate type of the input points, avaliable + mode ['xyz', 'xyzrgb']. Default: 'xyz'. """ # TODO: support score and class info assert 0 <= rot_axis <= 2 @@ -196,21 +202,23 @@ def _draw_bboxes_ind(bbox3d, Args: bbox3d (numpy.array | torch.tensor, shape=[M, 7]): - 3d bbox (x, y, z, dx, dy, dz, yaw) to visualize. + 3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize. vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer. indices (numpy.array | torch.tensor, shape=[N, M]): indicate which bbox3d that each point lies in. points_colors (numpy.array): color of each points. - pcd (:obj:`open3d.geometry.PointCloud`): point cloud. Default: None. - bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0). - points_in_box_color (tuple[float]): + pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud. + Default: None. + bbox_color (tuple[float], optional): the color of bbox. + Default: (0, 1, 0). + points_in_box_color (tuple[float], optional): the color of points which are in bbox3d. Default: (1, 0, 0). - rot_axis (int): rotation axis of bbox. Default: 2. - center_mode (bool): indicate the center of bbox is bottom center - or gravity center. avaliable mode + rot_axis (int, optional): rotation axis of bbox. Default: 2. + center_mode (bool, optional): indicate the center of bbox is + bottom center or gravity center. avaliable mode ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'. - mode (str): indicate type of the input points, avaliable mode - ['xyz', 'xyzrgb']. Default: 'xyz'. + mode (str, optional): indicate type of the input points, + avaliable mode ['xyz', 'xyzrgb']. Default: 'xyz'. """ if isinstance(bbox3d, torch.Tensor): bbox3d = bbox3d.cpu().numpy() @@ -270,24 +278,28 @@ def show_pts_index_boxes(points, points (numpy.array | torch.tensor, shape=[N, 3+C]): points to visualize. bbox3d (numpy.array | torch.tensor, shape=[M, 7]): - 3d bbox (x, y, z, dx, dy, dz, yaw) to visualize. Default: None. - show (bool): whether to show the visualization results. Default: True. - indices (numpy.array | torch.tensor, shape=[N, M]): + 3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize. + Defaults to None. + show (bool, optional): whether to show the visualization results. + Default: True. + indices (numpy.array | torch.tensor, shape=[N, M], optional): indicate which bbox3d that each point lies in. Default: None. - save_path (str): path to save visualized results. Default: None. - points_size (int): the size of points to show on visualizer. + save_path (str, optional): path to save visualized results. + Default: None. + points_size (int, optional): the size of points to show on visualizer. Default: 2. - point_color (tuple[float]): the color of points. + point_color (tuple[float], optional): the color of points. Default: (0.5, 0.5, 0.5). - bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0). - points_in_box_color (tuple[float]): + bbox_color (tuple[float], optional): the color of bbox. + Default: (0, 1, 0). + points_in_box_color (tuple[float], optional): the color of points which are in bbox3d. Default: (1, 0, 0). - rot_axis (int): rotation axis of bbox. Default: 2. - center_mode (bool): indicate the center of bbox is bottom center - or gravity center. avaliable mode + rot_axis (int, optional): rotation axis of bbox. Default: 2. + center_mode (bool, optional): indicate the center of bbox is + bottom center or gravity center. avaliable mode ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'. - mode (str): indicate type of the input points, avaliable mode - ['xyz', 'xyzrgb']. Default: 'xyz'. + mode (str, optional): indicate type of the input points, + avaliable mode ['xyz', 'xyzrgb']. Default: 'xyz'. """ # TODO: support score and class info assert 0 <= rot_axis <= 2 @@ -324,24 +336,27 @@ class Visualizer(object): points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points cloud is in mode of Coord3DMode.DEPTH (please refer to core.structures.coord_3d_mode). - bbox3d (numpy.array, shape=[M, 7]): 3d bbox (x, y, z, dx, dy, dz, yaw) - to visualize. The 3d bbox is in mode of Box3DMode.DEPTH with + bbox3d (numpy.array, shape=[M, 7], optional): 3D bbox + (x, y, z, x_size, y_size, z_size, yaw) to visualize. + The 3D bbox is in mode of Box3DMode.DEPTH with gravity_center (please refer to core.structures.box_3d_mode). Default: None. - save_path (str): path to save visualized results. Default: None. - points_size (int): the size of points to show on visualizer. + save_path (str, optional): path to save visualized results. + Default: None. + points_size (int, optional): the size of points to show on visualizer. Default: 2. - point_color (tuple[float]): the color of points. + point_color (tuple[float], optional): the color of points. Default: (0.5, 0.5, 0.5). - bbox_color (tuple[float]): the color of bbox. Default: (0, 1, 0). - points_in_box_color (tuple[float]): + bbox_color (tuple[float], optional): the color of bbox. + Default: (0, 1, 0). + points_in_box_color (tuple[float], optional): the color of points which are in bbox3d. Default: (1, 0, 0). - rot_axis (int): rotation axis of bbox. Default: 2. - center_mode (bool): indicate the center of bbox is bottom center - or gravity center. avaliable mode + rot_axis (int, optional): rotation axis of bbox. Default: 2. + center_mode (bool, optional): indicate the center of bbox is + bottom center or gravity center. avaliable mode ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'. - mode (str): indicate type of the input points, avaliable mode - ['xyz', 'xyzrgb']. Default: 'xyz'. + mode (str, optional): indicate type of the input points, + avaliable mode ['xyz', 'xyzrgb']. Default: 'xyz'. """ def __init__(self, @@ -390,9 +405,10 @@ def add_bboxes(self, bbox3d, bbox_color=None, points_in_box_color=None): Args: bbox3d (numpy.array, shape=[M, 7]): - 3D bbox (x, y, z, dx, dy, dz, yaw) to be visualized. - The 3d bbox is in mode of Box3DMode.DEPTH with - gravity_center (please refer to core.structures.box_3d_mode). + 3D bbox (x, y, z, x_size, y_size, z_size, yaw) + to be visualized. The 3d bbox is in mode of + Box3DMode.DEPTH with gravity_center (please refer to + core.structures.box_3d_mode). bbox_color (tuple[float]): the color of bbox. Defaule: None. points_in_box_color (tuple[float]): the color of points which are in bbox3d. Defaule: None. @@ -431,7 +447,7 @@ def show(self, save_path=None): """Visualize the points cloud. Args: - save_path (str): path to save image. Default: None. + save_path (str, optional): path to save image. Default: None. """ self.o3d_visualizer.run() diff --git a/mmdet3d/core/visualizer/show_result.py b/mmdet3d/core/visualizer/show_result.py index 3329f6013e..b809bf755f 100644 --- a/mmdet3d/core/visualizer/show_result.py +++ b/mmdet3d/core/visualizer/show_result.py @@ -35,7 +35,7 @@ def _write_oriented_bbox(scene_bbox, out_filename): Args: scene_bbox(list[ndarray] or ndarray): xyz pos of center and - 3 lengths (dx,dy,dz) and heading angle around Z axis. + 3 lengths (x_size, y_size, z_size) and heading angle around Z axis. Y forward, X right, Z upward. heading angle of positive X is 0, heading angle of positive Y is 90 degrees. out_filename(str): Filename. @@ -87,8 +87,10 @@ def show_result(points, pred_bboxes (np.ndarray): Predicted boxes. out_dir (str): Path of output directory filename (str): Filename of the current frame. - show (bool): Visualize the results online. Defaults to False. - snapshot (bool): Whether to save the online results. Defaults to False. + show (bool, optional): Visualize the results online. + Defaults to False. + snapshot (bool, optional): Whether to save the online results. + Defaults to False. """ result_path = osp.join(out_dir, filename) mmcv.mkdir_or_exist(result_path) @@ -141,10 +143,10 @@ def show_seg_result(points, out_dir (str): Path of output directory filename (str): Filename of the current frame. palette (np.ndarray): Mapping between class labels and colors. - ignore_index (int, optional): The label index to be ignored, e.g. \ + ignore_index (int, optional): The label index to be ignored, e.g. unannotated points. Defaults to None. show (bool, optional): Visualize the results online. Defaults to False. - snapshot (bool, optional): Whether to save the online results. \ + snapshot (bool, optional): Whether to save the online results. Defaults to False. """ # we need 3D coordinates to visualize segmentation mask @@ -219,14 +221,16 @@ def show_multi_modality_result(img, according to the camera intrinsic parameters. out_dir (str): Path of output directory. filename (str): Filename of the current frame. - box_mode (str): Coordinate system the boxes are in. Should be one of - 'depth', 'lidar' and 'camera'. Defaults to 'lidar'. - img_metas (dict): Used in projecting depth bbox. - show (bool): Visualize the results online. Defaults to False. - gt_bbox_color (str or tuple(int)): Color of bbox lines. - The tuple of color should be in BGR order. Default: (255, 102, 61) - pred_bbox_color (str or tuple(int)): Color of bbox lines. - The tuple of color should be in BGR order. Default: (72, 101, 241) + box_mode (str, optional): Coordinate system the boxes are in. + Should be one of 'depth', 'lidar' and 'camera'. + Defaults to 'lidar'. + img_metas (dict, optional): Used in projecting depth bbox. + Defaults to None. + show (bool, optional): Visualize the results online. Defaults to False. + gt_bbox_color (str or tuple(int), optional): Color of bbox lines. + The tuple of color should be in BGR order. Default: (255, 102, 61). + pred_bbox_color (str or tuple(int), optional): Color of bbox lines. + The tuple of color should be in BGR order. Default: (72, 101, 241). """ if box_mode == 'depth': draw_bbox = draw_depth_bbox3d_on_img diff --git a/mmdet3d/core/voxel/voxel_generator.py b/mmdet3d/core/voxel/voxel_generator.py index 615b749558..404f2cdc9b 100644 --- a/mmdet3d/core/voxel/voxel_generator.py +++ b/mmdet3d/core/voxel/voxel_generator.py @@ -82,18 +82,18 @@ def points_to_voxel(points, """convert kitti points(N, >=3) to voxels. Args: - points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and \ + points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and points[:, 3:] contain other information such as reflectivity. voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size - coors_range (list[float | tuple[float] | ndarray]): Voxel range. \ + coors_range (list[float | tuple[float] | ndarray]): Voxel range. format: xyzxyz, minmax max_points (int): Indicate maximum points contained in a voxel. - reverse_index (bool): Whether return reversed coordinates. \ - if points has xyz format and reverse_index is True, output \ - coordinates will be zyx format, but points in features always \ + reverse_index (bool): Whether return reversed coordinates. + if points has xyz format and reverse_index is True, output + coordinates will be zyx format, but points in features always xyz format. - max_voxels (int): Maximum number of voxels this function creates. \ - For second, 20000 is a good choice. Points should be shuffled for \ + max_voxels (int): Maximum number of voxels this function creates. + For second, 20000 is a good choice. Points should be shuffled for randomness before this function because max_voxels drops points. Returns: @@ -147,20 +147,20 @@ def _points_to_voxel_reverse_kernel(points, """convert kitti points(N, >=3) to voxels. Args: - points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and \ + points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and points[:, 3:] contain other information such as reflectivity. - voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size \ - coors_range (list[float | tuple[float] | ndarray]): Range of voxels. \ + voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size + coors_range (list[float | tuple[float] | ndarray]): Range of voxels. format: xyzxyz, minmax num_points_per_voxel (int): Number of points per voxel. - coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W), \ - which has the same shape as the complete voxel map. It indicates \ + coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W), + which has the same shape as the complete voxel map. It indicates the index of each corresponding voxel. voxels (np.ndarray): Created empty voxels. coors (np.ndarray): Created coordinates of each voxel. max_points (int): Indicate maximum points contained in a voxel. - max_voxels (int): Maximum number of voxels this function create. \ - for second, 20000 is a good choice. Points should be shuffled for \ + max_voxels (int): Maximum number of voxels this function create. + for second, 20000 is a good choice. Points should be shuffled for randomness before this function because max_voxels drops points. Returns: @@ -221,20 +221,20 @@ def _points_to_voxel_kernel(points, """convert kitti points(N, >=3) to voxels. Args: - points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and \ + points (np.ndarray): [N, ndim]. points[:, :3] contain xyz points and points[:, 3:] contain other information such as reflectivity. voxel_size (list, tuple, np.ndarray): [3] xyz, indicate voxel size. - coors_range (list[float | tuple[float] | ndarray]): Range of voxels. \ + coors_range (list[float | tuple[float] | ndarray]): Range of voxels. format: xyzxyz, minmax num_points_per_voxel (int): Number of points per voxel. - coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W), \ - which has the same shape as the complete voxel map. It indicates \ + coor_to_voxel_idx (np.ndarray): A voxel grid of shape (D, H, W), + which has the same shape as the complete voxel map. It indicates the index of each corresponding voxel. voxels (np.ndarray): Created empty voxels. coors (np.ndarray): Created coordinates of each voxel. max_points (int): Indicate maximum points contained in a voxel. - max_voxels (int): Maximum number of voxels this function create. \ - for second, 20000 is a good choice. Points should be shuffled for \ + max_voxels (int): Maximum number of voxels this function create. + for second, 20000 is a good choice. Points should be shuffled for randomness before this function because max_voxels drops points. Returns: diff --git a/mmdet3d/datasets/custom_3d.py b/mmdet3d/datasets/custom_3d.py index 3d17aeb7ca..939390713c 100644 --- a/mmdet3d/datasets/custom_3d.py +++ b/mmdet3d/datasets/custom_3d.py @@ -88,7 +88,7 @@ def get_data_info(self, index): index (int): Index of the sample data to get. Returns: - dict: Data information that will be passed to the data \ + dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: - sample_idx (str): Sample index. @@ -177,7 +177,7 @@ def get_classes(cls, classes=None): """Get class names of current dataset. Args: - classes (Sequence[str] | str | None): If classes is None, use + classes (Sequence[str] | str): If classes is None, use default CLASSES defined by builtin dataset. If classes is a string, take it as a file name. The file contains the name of classes where each line contains one class name. If classes is @@ -207,13 +207,13 @@ def format_results(self, Args: outputs (list[dict]): Testing results of the dataset. - pklfile_prefix (str | None): The prefix of pkl files. It includes + pklfile_prefix (str): The prefix of pkl files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. Returns: - tuple: (outputs, tmp_dir), outputs is the detection results, \ - tmp_dir is the temporal directory created for saving json \ + tuple: (outputs, tmp_dir), outputs is the detection results, + tmp_dir is the temporal directory created for saving json files when ``jsonfile_prefix`` is not specified. """ if pklfile_prefix is None: @@ -237,11 +237,14 @@ def evaluate(self, Args: results (list[dict]): List of results. - metric (str | list[str]): Metrics to be evaluated. - iou_thr (list[float]): AP IoU thresholds. - show (bool): Whether to visualize. + metric (str | list[str], optional): Metrics to be evaluated. + Defaults to None. + iou_thr (list[float]): AP IoU thresholds. Defaults to (0.25, 0.5). + logger (logging.Logger | str, optional): Logger used for printing + related information during evaluation. Defaults to None. + show (bool, optional): Whether to visualize. Default: False. - out_dir (str): Path to save the visualization results. + out_dir (str, optional): Path to save the visualization results. Default: None. pipeline (list[dict], optional): raw data loading for showing. Default: None. @@ -281,7 +284,7 @@ def _get_pipeline(self, pipeline): """Get data loading pipeline in self.show/evaluate function. Args: - pipeline (list[dict] | None): Input pipeline. If None is given, \ + pipeline (list[dict]): Input pipeline. If None is given, get from self.pipeline. """ if pipeline is None: diff --git a/mmdet3d/datasets/custom_3d_seg.py b/mmdet3d/datasets/custom_3d_seg.py index c07aa675a4..52b5d0b37f 100644 --- a/mmdet3d/datasets/custom_3d_seg.py +++ b/mmdet3d/datasets/custom_3d_seg.py @@ -32,7 +32,7 @@ class Custom3DSegDataset(Dataset): as input. Defaults to None. test_mode (bool, optional): Whether the dataset is in test mode. Defaults to False. - ignore_index (int, optional): The label index to be ignored, e.g. \ + ignore_index (int, optional): The label index to be ignored, e.g. unannotated points. If None is given, set to len(self.CLASSES) to be consistent with PointSegClassMapping function in pipeline. Defaults to None. @@ -102,7 +102,7 @@ def get_data_info(self, index): index (int): Index of the sample data to get. Returns: - dict: Data information that will be passed to the data \ + dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: - sample_idx (str): Sample index. @@ -179,13 +179,13 @@ def get_classes_and_palette(self, classes=None, palette=None): This function is taken from MMSegmentation. Args: - classes (Sequence[str] | str | None): If classes is None, use + classes (Sequence[str] | str): If classes is None, use default CLASSES defined by builtin dataset. If classes is a string, take it as a file name. The file contains the name of classes where each line contains one class name. If classes is a tuple or list, override the CLASSES defined by the dataset. Defaults to None. - palette (Sequence[Sequence[int]]] | np.ndarray | None): + palette (Sequence[Sequence[int]]] | np.ndarray): The palette of segmentation map. If None is given, random palette will be generated. Defaults to None. """ @@ -276,13 +276,13 @@ def format_results(self, Args: outputs (list[dict]): Testing results of the dataset. - pklfile_prefix (str | None): The prefix of pkl files. It includes + pklfile_prefix (str): The prefix of pkl files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. Returns: - tuple: (outputs, tmp_dir), outputs is the detection results, \ - tmp_dir is the temporal directory created for saving json \ + tuple: (outputs, tmp_dir), outputs is the detection results, + tmp_dir is the temporal directory created for saving json files when ``jsonfile_prefix`` is not specified. """ if pklfile_prefix is None: @@ -306,7 +306,7 @@ def evaluate(self, Args: results (list[dict]): List of results. metric (str | list[str]): Metrics to be evaluated. - logger (logging.Logger | None | str): Logger used for printing + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Defaults to None. show (bool, optional): Whether to visualize. Defaults to False. @@ -364,7 +364,7 @@ def _get_pipeline(self, pipeline): """Get data loading pipeline in self.show/evaluate function. Args: - pipeline (list[dict] | None): Input pipeline. If None is given, \ + pipeline (list[dict]): Input pipeline. If None is given, get from self.pipeline. """ if pipeline is None: diff --git a/mmdet3d/datasets/kitti2d_dataset.py b/mmdet3d/datasets/kitti2d_dataset.py index 9254bcd6b8..65d08eca11 100644 --- a/mmdet3d/datasets/kitti2d_dataset.py +++ b/mmdet3d/datasets/kitti2d_dataset.py @@ -206,7 +206,8 @@ def reformat_bbox(self, outputs, out=None): Args: outputs (list[np.ndarray]): List of arrays storing the inferenced bounding boxes and scores. - out (str | None): The prefix of output file. Default: None. + out (str, optional): The prefix of output file. + Default: None. Returns: list[dict]: A list of dictionaries with the kitti 2D format. @@ -222,7 +223,7 @@ def evaluate(self, result_files, eval_types=None): Args: result_files (str): Path of result files. - eval_types (str): Types of evaluation. Default: None. + eval_types (str, optional): Types of evaluation. Default: None. KITTI dataset only support 'bbox' evaluation type. Returns: diff --git a/mmdet3d/datasets/kitti_dataset.py b/mmdet3d/datasets/kitti_dataset.py index 66ca3a7d96..01fc8ae1c6 100644 --- a/mmdet3d/datasets/kitti_dataset.py +++ b/mmdet3d/datasets/kitti_dataset.py @@ -47,8 +47,9 @@ class KittiDataset(Custom3DDataset): Defaults to True. test_mode (bool, optional): Whether the dataset is in test mode. Defaults to False. - pcd_limit_range (list): The range of point cloud used to filter - invalid predicted boxes. Default: [0, -40, -3, 70.4, 40, 0.0]. + pcd_limit_range (list, optional): The range of point cloud used to + filter invalid predicted boxes. + Default: [0, -40, -3, 70.4, 40, 0.0]. """ CLASSES = ('car', 'pedestrian', 'cyclist') @@ -100,14 +101,14 @@ def get_data_info(self, index): index (int): Index of the sample data to get. Returns: - dict: Data information that will be passed to the data \ + dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: - sample_idx (str): Sample index. - pts_filename (str): Filename of point clouds. - - img_prefix (str | None): Prefix of image files. + - img_prefix (str): Prefix of image files. - img_info (dict): Image info. - - lidar2img (list[np.ndarray], optional): Transformations \ + - lidar2img (list[np.ndarray], optional): Transformations from lidar to different cameras. - ann_info (dict): Annotation info. """ @@ -145,7 +146,7 @@ def get_ann_info(self, index): Returns: dict: annotation information consists of the following keys: - - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \ + - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): 3D ground truth bboxes. - gt_labels_3d (np.ndarray): Labels of ground truths. - gt_bboxes (np.ndarray): 2D ground truth bboxes. @@ -248,17 +249,17 @@ def format_results(self, Args: outputs (list[dict]): Testing results of the dataset. - pklfile_prefix (str | None): The prefix of pkl files. It includes + pklfile_prefix (str): The prefix of pkl files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - submission_prefix (str | None): The prefix of submitted files. It + submission_prefix (str): The prefix of submitted files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. Returns: - tuple: (result_files, tmp_dir), result_files is a dict containing \ - the json filepaths, tmp_dir is the temporal directory created \ + tuple: (result_files, tmp_dir), result_files is a dict containing + the json filepaths, tmp_dir is the temporal directory created for saving json files when jsonfile_prefix is not specified. """ if pklfile_prefix is None: @@ -308,17 +309,18 @@ def evaluate(self, Args: results (list[dict]): Testing results of the dataset. - metric (str | list[str]): Metrics to be evaluated. - logger (logging.Logger | str | None): Logger used for printing + metric (str | list[str], optional): Metrics to be evaluated. + Default: None. + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - pklfile_prefix (str | None): The prefix of pkl files. It includes + pklfile_prefix (str, optional): The prefix of pkl files, including the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - submission_prefix (str | None): The prefix of submission datas. + submission_prefix (str, optional): The prefix of submission datas. If not specified, the submission data will not be generated. - show (bool): Whether to visualize. + show (bool, optional): Whether to visualize. Default: False. - out_dir (str): Path to save the visualization results. + out_dir (str, optional): Path to save the visualization results. Default: None. pipeline (list[dict], optional): raw data loading for showing. Default: None. @@ -371,11 +373,11 @@ def bbox2result_kitti(self, submission. Args: - net_outputs (list[np.ndarray]): List of array storing the \ + net_outputs (list[np.ndarray]): List of array storing the inferenced bounding boxes and scores. class_names (list[String]): A list of class names. - pklfile_prefix (str | None): The prefix of pkl file. - submission_prefix (str | None): The prefix of submission file. + pklfile_prefix (str): The prefix of pkl file. + submission_prefix (str): The prefix of submission file. Returns: list[dict]: A list of dictionaries with the kitti format. @@ -486,11 +488,11 @@ def bbox2result_kitti2d(self, submission. Args: - net_outputs (list[np.ndarray]): List of array storing the \ + net_outputs (list[np.ndarray]): List of array storing the inferenced bounding boxes and scores. class_names (list[String]): A list of class names. - pklfile_prefix (str | None): The prefix of pkl file. - submission_prefix (str | None): The prefix of submission file. + pklfile_prefix (str): The prefix of pkl file. + submission_prefix (str): The prefix of submission file. Returns: list[dict]: A list of dictionaries have the kitti format @@ -604,9 +606,9 @@ def convert_valid_bboxes(self, box_dict, info): dict: Valid predicted boxes. - bbox (np.ndarray): 2D bounding boxes. - - box3d_camera (np.ndarray): 3D bounding boxes in \ + - box3d_camera (np.ndarray): 3D bounding boxes in camera coordinate. - - box3d_lidar (np.ndarray): 3D bounding boxes in \ + - box3d_lidar (np.ndarray): 3D bounding boxes in LiDAR coordinate. - scores (np.ndarray): Scores of boxes. - label_preds (np.ndarray): Class label predictions. diff --git a/mmdet3d/datasets/kitti_mono_dataset.py b/mmdet3d/datasets/kitti_mono_dataset.py index ba77690f5f..9b8e262f7b 100644 --- a/mmdet3d/datasets/kitti_mono_dataset.py +++ b/mmdet3d/datasets/kitti_mono_dataset.py @@ -57,8 +57,8 @@ def _parse_ann_info(self, img_info, ann_info): with_mask (bool): Whether to parse mask annotations. Returns: - dict: A dict containing the following keys: bboxes, bboxes_ignore,\ - labels, masks, seg_map. "masks" are raw annotations and not \ + dict: A dict containing the following keys: bboxes, bboxes_ignore, + labels, masks, seg_map. "masks" are raw annotations and not decoded into binary masks. """ gt_bboxes = [] @@ -147,17 +147,17 @@ def format_results(self, Args: outputs (list[dict]): Testing results of the dataset. - pklfile_prefix (str | None): The prefix of pkl files. It includes + pklfile_prefix (str): The prefix of pkl files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - submission_prefix (str | None): The prefix of submitted files. It + submission_prefix (str): The prefix of submitted files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. Returns: - tuple: (result_files, tmp_dir), result_files is a dict containing \ - the json filepaths, tmp_dir is the temporal directory created \ + tuple: (result_files, tmp_dir), result_files is a dict containing + the json filepaths, tmp_dir is the temporal directory created for saving json files when jsonfile_prefix is not specified. """ if pklfile_prefix is None: @@ -207,17 +207,18 @@ def evaluate(self, Args: results (list[dict]): Testing results of the dataset. - metric (str | list[str]): Metrics to be evaluated. - logger (logging.Logger | str | None): Logger used for printing + metric (str | list[str], optional): Metrics to be evaluated. + Defaults to None. + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - pklfile_prefix (str | None): The prefix of pkl files. It includes + pklfile_prefix (str, optional): The prefix of pkl files, including the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - submission_prefix (str | None): The prefix of submission datas. + submission_prefix (str, optional): The prefix of submission datas. If not specified, the submission data will not be generated. - show (bool): Whether to visualize. + show (bool, optional): Whether to visualize. Default: False. - out_dir (str): Path to save the visualization results. + out_dir (str, optional): Path to save the visualization results. Default: None. Returns: @@ -268,11 +269,11 @@ def bbox2result_kitti(self, submission. Args: - net_outputs (list[np.ndarray]): List of array storing the \ + net_outputs (list[np.ndarray]): List of array storing the inferenced bounding boxes and scores. class_names (list[String]): A list of class names. - pklfile_prefix (str | None): The prefix of pkl file. - submission_prefix (str | None): The prefix of submission file. + pklfile_prefix (str): The prefix of pkl file. + submission_prefix (str): The prefix of submission file. Returns: list[dict]: A list of dictionaries with the kitti format. @@ -383,11 +384,11 @@ def bbox2result_kitti2d(self, submission. Args: - net_outputs (list[np.ndarray]): List of array storing the \ + net_outputs (list[np.ndarray]): List of array storing the inferenced bounding boxes and scores. class_names (list[String]): A list of class names. - pklfile_prefix (str | None): The prefix of pkl file. - submission_prefix (str | None): The prefix of submission file. + pklfile_prefix (str): The prefix of pkl file. + submission_prefix (str): The prefix of submission file. Returns: list[dict]: A list of dictionaries have the kitti format @@ -498,7 +499,7 @@ def convert_valid_bboxes(self, box_dict, info): Returns: dict: Valid predicted boxes. - bbox (np.ndarray): 2D bounding boxes. - - box3d_camera (np.ndarray): 3D bounding boxes in \ + - box3d_camera (np.ndarray): 3D bounding boxes in camera coordinate. - scores (np.ndarray): Scores of boxes. - label_preds (np.ndarray): Class label predictions. diff --git a/mmdet3d/datasets/lyft_dataset.py b/mmdet3d/datasets/lyft_dataset.py index 7d366be46d..b1966bb428 100644 --- a/mmdet3d/datasets/lyft_dataset.py +++ b/mmdet3d/datasets/lyft_dataset.py @@ -129,7 +129,7 @@ def get_data_info(self, index): index (int): Index of the sample data to get. Returns: - dict: Data information that will be passed to the data \ + dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: - sample_idx (str): sample index @@ -137,7 +137,7 @@ def get_data_info(self, index): - sweeps (list[dict]): infos of sweeps - timestamp (float): sample timestamp - img_filename (str, optional): image filename - - lidar2img (list[np.ndarray], optional): transformations \ + - lidar2img (list[np.ndarray], optional): transformations from lidar to different cameras - ann_info (dict): annotation info """ @@ -190,7 +190,7 @@ def get_ann_info(self, index): Returns: dict: Annotation information consists of the following keys: - - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \ + - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): 3D ground truth bboxes. - gt_labels_3d (np.ndarray): Labels of ground truths. - gt_names (list[str]): Class names of ground truths. @@ -275,10 +275,11 @@ def _evaluate_single(self, Args: result_path (str): Path of the result file. - logger (logging.Logger | str | None): Logger used for printing + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - metric (str): Metric name used for evaluation. Default: 'bbox'. - result_name (str): Result name in the metric prefix. + metric (str, optional): Metric name used for evaluation. + Default: 'bbox'. + result_name (str, optional): Result name in the metric prefix. Default: 'pts_bbox'. Returns: @@ -312,18 +313,18 @@ def format_results(self, results, jsonfile_prefix=None, csv_savepath=None): Args: results (list[dict]): Testing results of the dataset. - jsonfile_prefix (str | None): The prefix of json files. It includes + jsonfile_prefix (str): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - csv_savepath (str | None): The path for saving csv files. + csv_savepath (str): The path for saving csv files. It includes the file path and the csv filename, e.g., "a/b/filename.csv". If not specified, the result will not be converted to csv file. Returns: - tuple: Returns (result_files, tmp_dir), where `result_files` is a \ - dict containing the json filepaths, `tmp_dir` is the temporal \ - directory created for saving json files when \ + tuple: Returns (result_files, tmp_dir), where `result_files` is a + dict containing the json filepaths, `tmp_dir` is the temporal + directory created for saving json files when `jsonfile_prefix` is not specified. """ assert isinstance(results, list), 'results must be a list' @@ -372,19 +373,22 @@ def evaluate(self, Args: results (list[dict]): Testing results of the dataset. - metric (str | list[str]): Metrics to be evaluated. - logger (logging.Logger | str | None): Logger used for printing + metric (str | list[str], optional): Metrics to be evaluated. + Default: 'bbox'. + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - jsonfile_prefix (str | None): The prefix of json files. It includes + jsonfile_prefix (str, optional): The prefix of json files including the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - csv_savepath (str | None): The path for saving csv files. + csv_savepath (str, optional): The path for saving csv files. It includes the file path and the csv filename, e.g., "a/b/filename.csv". If not specified, the result will not be converted to csv file. - show (bool): Whether to visualize. + result_names (list[str], optional): Result names in the + metric prefix. Default: ['pts_bbox']. + show (bool, optional): Whether to visualize. Default: False. - out_dir (str): Path to save the visualization results. + out_dir (str, optional): Path to save the visualization results. Default: None. pipeline (list[dict], optional): raw data loading for showing. Default: None. diff --git a/mmdet3d/datasets/nuscenes_dataset.py b/mmdet3d/datasets/nuscenes_dataset.py index 8381ee081c..f5dae641e1 100644 --- a/mmdet3d/datasets/nuscenes_dataset.py +++ b/mmdet3d/datasets/nuscenes_dataset.py @@ -48,8 +48,9 @@ class NuScenesDataset(Custom3DDataset): Defaults to False. eval_version (bool, optional): Configuration version of evaluation. Defaults to 'detection_cvpr_2019'. - use_valid_flag (bool): Whether to use `use_valid_flag` key in the info - file as mask to filter gt_boxes and gt_names. Defaults to False. + use_valid_flag (bool, optional): Whether to use `use_valid_flag` key + in the info file as mask to filter gt_boxes and gt_names. + Defaults to False. """ NameMapping = { 'movable_object.barrier': 'barrier', @@ -196,7 +197,7 @@ def get_data_info(self, index): index (int): Index of the sample data to get. Returns: - dict: Data information that will be passed to the data \ + dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: - sample_idx (str): Sample index. @@ -204,7 +205,7 @@ def get_data_info(self, index): - sweeps (list[dict]): Infos of sweeps. - timestamp (float): Sample timestamp. - img_filename (str, optional): Image filename. - - lidar2img (list[np.ndarray], optional): Transformations \ + - lidar2img (list[np.ndarray], optional): Transformations from lidar to different cameras. - ann_info (dict): Annotation info. """ @@ -256,7 +257,7 @@ def get_ann_info(self, index): Returns: dict: Annotation information consists of the following keys: - - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): \ + - gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): 3D ground truth bboxes - gt_labels_3d (np.ndarray): Labels of ground truths. - gt_names (list[str]): Class names of ground truths. @@ -374,10 +375,11 @@ def _evaluate_single(self, Args: result_path (str): Path of the result file. - logger (logging.Logger | str | None): Logger used for printing + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - metric (str): Metric name used for evaluation. Default: 'bbox'. - result_name (str): Result name in the metric prefix. + metric (str, optional): Metric name used for evaluation. + Default: 'bbox'. + result_name (str, optional): Result name in the metric prefix. Default: 'pts_bbox'. Returns: @@ -427,14 +429,14 @@ def format_results(self, results, jsonfile_prefix=None): Args: results (list[dict]): Testing results of the dataset. - jsonfile_prefix (str | None): The prefix of json files. It includes + jsonfile_prefix (str): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. Returns: - tuple: Returns (result_files, tmp_dir), where `result_files` is a \ - dict containing the json filepaths, `tmp_dir` is the temporal \ - directory created for saving json files when \ + tuple: Returns (result_files, tmp_dir), where `result_files` is a + dict containing the json filepaths, `tmp_dir` is the temporal + directory created for saving json files when `jsonfile_prefix` is not specified. """ assert isinstance(results, list), 'results must be a list' @@ -480,15 +482,16 @@ def evaluate(self, Args: results (list[dict]): Testing results of the dataset. - metric (str | list[str]): Metrics to be evaluated. - logger (logging.Logger | str | None): Logger used for printing + metric (str | list[str], optional): Metrics to be evaluated. + Default: 'bbox'. + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - jsonfile_prefix (str | None): The prefix of json files. It includes + jsonfile_prefix (str, optional): The prefix of json files including the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - show (bool): Whether to visualize. + show (bool, optional): Whether to visualize. Default: False. - out_dir (str): Path to save the visualization results. + out_dir (str, optional): Path to save the visualization results. Default: None. pipeline (list[dict], optional): raw data loading for showing. Default: None. @@ -624,7 +627,7 @@ def lidar_nusc_box_to_global(info, boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes. classes (list[str]): Mapped classes in the evaluation. eval_configs (object): Evaluation configuration object. - eval_version (str): Evaluation version. + eval_version (str, optional): Evaluation version. Default: 'detection_cvpr_2019' Returns: diff --git a/mmdet3d/datasets/nuscenes_mono_dataset.py b/mmdet3d/datasets/nuscenes_mono_dataset.py index e361b4ae70..f288e31950 100644 --- a/mmdet3d/datasets/nuscenes_mono_dataset.py +++ b/mmdet3d/datasets/nuscenes_mono_dataset.py @@ -44,8 +44,9 @@ class NuScenesMonoDataset(CocoDataset): - 'Camera': Box in camera coordinates. eval_version (str, optional): Configuration version of evaluation. Defaults to 'detection_cvpr_2019'. - use_valid_flag (bool): Whether to use `use_valid_flag` key in the info - file as mask to filter gt_boxes and gt_names. Defaults to False. + use_valid_flag (bool, optional): Whether to use `use_valid_flag` key + in the info file as mask to filter gt_boxes and gt_names. + Defaults to False. version (str, optional): Dataset version. Defaults to 'v1.0-trainval'. """ CLASSES = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', @@ -140,8 +141,8 @@ def _parse_ann_info(self, img_info, ann_info): ann_info (list[dict]): Annotation info of an image. Returns: - dict: A dict containing the following keys: bboxes, labels, \ - gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d, \ + dict: A dict containing the following keys: bboxes, labels, + gt_bboxes_3d, gt_labels_3d, attr_labels, centers2d, depths, bboxes_ignore, masks, seg_map """ gt_bboxes = [] @@ -394,10 +395,11 @@ def _evaluate_single(self, Args: result_path (str): Path of the result file. - logger (logging.Logger | str | None): Logger used for printing + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - metric (str): Metric name used for evaluation. Default: 'bbox'. - result_name (str): Result name in the metric prefix. + metric (str, optional): Metric name used for evaluation. + Default: 'bbox'. + result_name (str, optional): Result name in the metric prefix. Default: 'img_bbox'. Returns: @@ -448,13 +450,13 @@ def format_results(self, results, jsonfile_prefix=None, **kwargs): Args: results (list[tuple | numpy.ndarray]): Testing results of the dataset. - jsonfile_prefix (str | None): The prefix of json files. It includes + jsonfile_prefix (str): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. Returns: - tuple: (result_files, tmp_dir), result_files is a dict containing \ - the json filepaths, tmp_dir is the temporal directory created \ + tuple: (result_files, tmp_dir), result_files is a dict containing + the json filepaths, tmp_dir is the temporal directory created for saving json files when jsonfile_prefix is not specified. """ assert isinstance(results, list), 'results must be a list' @@ -504,15 +506,18 @@ def evaluate(self, Args: results (list[dict]): Testing results of the dataset. - metric (str | list[str]): Metrics to be evaluated. - logger (logging.Logger | str | None): Logger used for printing + metric (str | list[str], optional): Metrics to be evaluated. + Default: 'bbox'. + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - jsonfile_prefix (str | None): The prefix of json files. It includes + jsonfile_prefix (str): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - show (bool): Whether to visualize. + result_names (list[str], optional): Result names in the + metric prefix. Default: ['img_bbox']. + show (bool, optional): Whether to visualize. Default: False. - out_dir (str): Path to save the visualization results. + out_dir (str, optional): Path to save the visualization results. Default: None. pipeline (list[dict], optional): raw data loading for showing. Default: None. @@ -576,7 +581,7 @@ def _get_pipeline(self, pipeline): """Get data loading pipeline in self.show/evaluate function. Args: - pipeline (list[dict] | None): Input pipeline. If None is given, \ + pipeline (list[dict]): Input pipeline. If None is given, get from self.pipeline. """ if pipeline is None: @@ -696,7 +701,7 @@ def cam_nusc_box_to_global(info, boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes. classes (list[str]): Mapped classes in the evaluation. eval_configs (object): Evaluation configuration object. - eval_version (str): Evaluation version. + eval_version (str, optional): Evaluation version. Default: 'detection_cvpr_2019' Returns: @@ -736,7 +741,7 @@ def global_nusc_box_to_cam(info, boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes. classes (list[str]): Mapped classes in the evaluation. eval_configs (object): Evaluation configuration object. - eval_version (str): Evaluation version. + eval_version (str, optional): Evaluation version. Default: 'detection_cvpr_2019' Returns: @@ -769,7 +774,7 @@ def nusc_box_to_cam_box3d(boxes): boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes. Returns: - tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor): \ + tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor): Converted 3D bounding boxes, scores and labels. """ locs = torch.Tensor([b.center for b in boxes]).view(-1, 3) diff --git a/mmdet3d/datasets/pipelines/data_augment_utils.py b/mmdet3d/datasets/pipelines/data_augment_utils.py index 6bee549bfd..b68ca0c245 100644 --- a/mmdet3d/datasets/pipelines/data_augment_utils.py +++ b/mmdet3d/datasets/pipelines/data_augment_utils.py @@ -34,8 +34,8 @@ def box_collision_test(boxes, qboxes, clockwise=True): Args: boxes (np.ndarray): Corners of current boxes. qboxes (np.ndarray): Boxes to be avoid colliding. - clockwise (bool): Whether the corners are in clockwise order. - Default: True. + clockwise (bool, optional): Whether the corners are in + clockwise order. Default: True. """ N = boxes.shape[0] K = qboxes.shape[0] @@ -317,7 +317,7 @@ def box3d_transform_(boxes, loc_transform, rot_transform, valid_mask): boxes (np.ndarray): 3D boxes to be transformed. loc_transform (np.ndarray): Location transform to be applied. rot_transform (np.ndarray): Rotation transform to be applied. - valid_mask (np.ndarray | None): Mask to indicate which boxes are valid. + valid_mask (np.ndarray): Mask to indicate which boxes are valid. """ num_box = boxes.shape[0] for i in range(num_box): @@ -338,16 +338,17 @@ def noise_per_object_v3_(gt_boxes, Args: gt_boxes (np.ndarray): Ground truth boxes with shape (N, 7). - points (np.ndarray | None): Input point cloud with shape (M, 4). - Default: None. - valid_mask (np.ndarray | None): Mask to indicate which boxes are valid. - Default: None. - rotation_perturb (float): Rotation perturbation. Default: pi / 4. - center_noise_std (float): Center noise standard deviation. + points (np.ndarray, optional): Input point cloud with + shape (M, 4). Default: None. + valid_mask (np.ndarray, optional): Mask to indicate which + boxes are valid. Default: None. + rotation_perturb (float, optional): Rotation perturbation. + Default: pi / 4. + center_noise_std (float, optional): Center noise standard deviation. Default: 1.0. - global_random_rot_range (float): Global random rotation range. - Default: pi/4. - num_try (int): Number of try. Default: 100. + global_random_rot_range (float, optional): Global random rotation + range. Default: pi/4. + num_try (int, optional): Number of try. Default: 100. """ num_boxes = gt_boxes.shape[0] if not isinstance(rotation_perturb, (list, tuple, np.ndarray)): diff --git a/mmdet3d/datasets/pipelines/dbsampler.py b/mmdet3d/datasets/pipelines/dbsampler.py index 8a2455532e..82e9829db7 100644 --- a/mmdet3d/datasets/pipelines/dbsampler.py +++ b/mmdet3d/datasets/pipelines/dbsampler.py @@ -15,10 +15,10 @@ class BatchSampler: Args: sample_list (list[dict]): List of samples. - name (str | None): The category of samples. Default: None. - epoch (int | None): Sampling epoch. Default: None. - shuffle (bool): Whether to shuffle indices. Default: False. - drop_reminder (bool): Drop reminder. Default: False. + name (str, optional): The category of samples. Default: None. + epoch (int, optional): Sampling epoch. Default: None. + shuffle (bool, optional): Whether to shuffle indices. Default: False. + drop_reminder (bool, optional): Drop reminder. Default: False. """ def __init__(self, @@ -87,9 +87,9 @@ class DataBaseSampler(object): rate (float): Rate of actual sampled over maximum sampled number. prepare (dict): Name of preparation functions and the input value. sample_groups (dict): Sampled classes and numbers. - classes (list[str]): List of classes. Default: None. - points_loader(dict): Config of points loader. Default: dict( - type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3]) + classes (list[str], optional): List of classes. Default: None. + points_loader(dict, optional): Config of points loader. Default: + dict(type='LoadPointsFromFile', load_dim=4, use_dim=[0,1,2,3]) """ def __init__(self, @@ -198,9 +198,9 @@ def sample_all(self, gt_bboxes, gt_labels, img=None): Returns: dict: Dict of sampled 'pseudo ground truths'. - - gt_labels_3d (np.ndarray): ground truths labels \ + - gt_labels_3d (np.ndarray): ground truths labels of sampled objects. - - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): \ + - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): sampled ground truth 3D bounding boxes - points (np.ndarray): sampled points - group_ids (np.ndarray): ids of sampled ground truths diff --git a/mmdet3d/datasets/pipelines/formating.py b/mmdet3d/datasets/pipelines/formating.py index ea019e40d9..2738992314 100644 --- a/mmdet3d/datasets/pipelines/formating.py +++ b/mmdet3d/datasets/pipelines/formating.py @@ -24,7 +24,7 @@ class DefaultFormatBundle(object): - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer - gt_labels: (1)to tensor, (2)to DataContainer - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True) - - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, \ + - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, (3)to DataContainer (stack=True) """ @@ -92,8 +92,8 @@ class Collect3D(object): The "img_meta" item is always populated. The contents of the "img_meta" dictionary depends on "meta_keys". By default this includes: - - 'img_shape': shape of the image input to the network as a tuple \ - (h, w, c). Note that images may be zero padded on the \ + - 'img_shape': shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the bottom/right if the batch tensor is larger than this shape. - 'scale_factor': a float indicating the preprocessing scale - 'flip': a boolean indicating if image flip transform was used @@ -103,9 +103,9 @@ class Collect3D(object): - 'lidar2img': transform from lidar to image - 'depth2img': transform from depth to image - 'cam2img': transform from camera to image - - 'pcd_horizontal_flip': a boolean indicating if point cloud is \ + - 'pcd_horizontal_flip': a boolean indicating if point cloud is flipped horizontally - - 'pcd_vertical_flip': a boolean indicating if point cloud is \ + - 'pcd_vertical_flip': a boolean indicating if point cloud is flipped vertically - 'box_mode_3d': 3D box mode - 'box_type_3d': 3D box type @@ -130,16 +130,15 @@ class Collect3D(object): 'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename') """ - def __init__(self, - keys, - meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img', - 'depth2img', 'cam2img', 'pad_shape', - 'scale_factor', 'flip', 'pcd_horizontal_flip', - 'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d', - 'img_norm_cfg', 'pcd_trans', 'sample_idx', - 'pcd_scale_factor', 'pcd_rotation', - 'pcd_rotation_angle', 'pts_filename', - 'transformation_3d_flow')): + def __init__( + self, + keys, + meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img', + 'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 'flip', + 'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d', + 'box_type_3d', 'img_norm_cfg', 'pcd_trans', 'sample_idx', + 'pcd_scale_factor', 'pcd_rotation', 'pcd_rotation_angle', + 'pts_filename', 'transformation_3d_flow')): self.keys = keys self.meta_keys = meta_keys diff --git a/mmdet3d/datasets/pipelines/loading.py b/mmdet3d/datasets/pipelines/loading.py index 58f5a86fa3..f628b5041e 100644 --- a/mmdet3d/datasets/pipelines/loading.py +++ b/mmdet3d/datasets/pipelines/loading.py @@ -14,9 +14,10 @@ class LoadMultiViewImageFromFiles(object): Expects results['img_filename'] to be a list of filenames. Args: - to_float32 (bool): Whether to convert the img to float32. + to_float32 (bool, optional): Whether to convert the img to float32. Defaults to False. - color_type (str): Color type of the file. Defaults to 'unchanged'. + color_type (str, optional): Color type of the file. + Defaults to 'unchanged'. """ def __init__(self, to_float32=False, color_type='unchanged'): @@ -30,7 +31,7 @@ def __call__(self, results): results (dict): Result dict containing multi-view image filenames. Returns: - dict: The result dict containing the multi-view image data. \ + dict: The result dict containing the multi-view image data. Added keys and values are described below. - filename (str): Multi-view image filenames. @@ -77,7 +78,7 @@ class LoadImageFromFileMono3D(LoadImageFromFile): detection, additional camera parameters need to be loaded. Args: - kwargs (dict): Arguments are the same as those in \ + kwargs (dict): Arguments are the same as those in :class:`LoadImageFromFile`. """ @@ -102,17 +103,20 @@ class LoadPointsFromMultiSweeps(object): This is usually used for nuScenes dataset to utilize previous sweeps. Args: - sweeps_num (int): Number of sweeps. Defaults to 10. - load_dim (int): Dimension number of the loaded points. Defaults to 5. - use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4]. - file_client_args (dict): Config dict of file clients, refer to + sweeps_num (int, optional): Number of sweeps. Defaults to 10. + load_dim (int, optional): Dimension number of the loaded points. + Defaults to 5. + use_dim (list[int], optional): Which dimension to use. + Defaults to [0, 1, 2, 4]. + file_client_args (dict, optional): Config dict of file clients, + refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py for more details. Defaults to dict(backend='disk'). - pad_empty_sweeps (bool): Whether to repeat keyframe when + pad_empty_sweeps (bool, optional): Whether to repeat keyframe when sweeps is empty. Defaults to False. - remove_close (bool): Whether to remove close points. + remove_close (bool, optional): Whether to remove close points. Defaults to False. - test_mode (bool): If test_model=True used for testing, it will not + test_mode (bool, optional): If `test_mode=True`, it will not randomly sample sweeps but select the nearest N frames. Defaults to False. """ @@ -161,7 +165,7 @@ def _remove_close(self, points, radius=1.0): Args: points (np.ndarray | :obj:`BasePoints`): Sweep points. - radius (float): Radius below which points are removed. + radius (float, optional): Radius below which points are removed. Defaults to 1.0. Returns: @@ -182,14 +186,14 @@ def __call__(self, results): """Call function to load multi-sweep point clouds from files. Args: - results (dict): Result dict containing multi-sweep point cloud \ + results (dict): Result dict containing multi-sweep point cloud filenames. Returns: - dict: The result dict containing the multi-sweep points data. \ + dict: The result dict containing the multi-sweep points data. Added key and value are described below. - - points (np.ndarray | :obj:`BasePoints`): Multi-sweep point \ + - points (np.ndarray | :obj:`BasePoints`): Multi-sweep point cloud arrays. """ points = results['points'] @@ -243,8 +247,8 @@ class PointSegClassMapping(object): Args: valid_cat_ids (tuple[int]): A tuple of valid category. - max_cat_id (int): The max possible cat_id in input segmentation mask. - Defaults to 40. + max_cat_id (int, optional): The max possible cat_id in input + segmentation mask. Defaults to 40. """ def __init__(self, valid_cat_ids, max_cat_id=40): @@ -268,7 +272,7 @@ def __call__(self, results): results (dict): Result dict containing point semantic masks. Returns: - dict: The result dict containing the mapped category ids. \ + dict: The result dict containing the mapped category ids. Updated key and value are described below. - pts_semantic_mask (np.ndarray): Mapped semantic masks. @@ -307,7 +311,7 @@ def __call__(self, results): results (dict): Result dict containing point clouds data. Returns: - dict: The result dict containing the normalized points. \ + dict: The result dict containing the normalized points. Updated key and value are described below. - points (:obj:`BasePoints`): Points after color normalization. @@ -342,14 +346,17 @@ class LoadPointsFromFile(object): - 'LIDAR': Points in LiDAR coordinates. - 'DEPTH': Points in depth coordinates, usually for indoor dataset. - 'CAMERA': Points in camera coordinates. - load_dim (int): The dimension of the loaded points. + load_dim (int, optional): The dimension of the loaded points. Defaults to 6. - use_dim (list[int]): Which dimensions of the points to be used. + use_dim (list[int], optional): Which dimensions of the points to use. Defaults to [0, 1, 2]. For KITTI dataset, set use_dim=4 or use_dim=[0, 1, 2, 3] to use the intensity dimension. - shift_height (bool): Whether to use shifted height. Defaults to False. - use_color (bool): Whether to use color features. Defaults to False. - file_client_args (dict): Config dict of file clients, refer to + shift_height (bool, optional): Whether to use shifted height. + Defaults to False. + use_color (bool, optional): Whether to use color features. + Defaults to False. + file_client_args (dict, optional): Config dict of file clients, + refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py for more details. Defaults to dict(backend='disk'). """ @@ -405,7 +412,7 @@ def __call__(self, results): results (dict): Result dict containing point clouds data. Returns: - dict: The result dict containing the point clouds data. \ + dict: The result dict containing the point clouds data. Added key and value are described below. - points (:obj:`BasePoints`): Point clouds data. diff --git a/mmdet3d/datasets/pipelines/test_time_aug.py b/mmdet3d/datasets/pipelines/test_time_aug.py index 790476c58b..398afc1339 100644 --- a/mmdet3d/datasets/pipelines/test_time_aug.py +++ b/mmdet3d/datasets/pipelines/test_time_aug.py @@ -16,18 +16,19 @@ class MultiScaleFlipAug3D(object): img_scale (tuple | list[tuple]: Images scales for resizing. pts_scale_ratio (float | list[float]): Points scale ratios for resizing. - flip (bool): Whether apply flip augmentation. Defaults to False. - flip_direction (str | list[str]): Flip augmentation directions - for images, options are "horizontal" and "vertical". + flip (bool, optional): Whether apply flip augmentation. + Defaults to False. + flip_direction (str | list[str], optional): Flip augmentation + directions for images, options are "horizontal" and "vertical". If flip_direction is list, multiple flip augmentations will be applied. It has no effect when ``flip == False``. Defaults to "horizontal". - pcd_horizontal_flip (bool): Whether apply horizontal flip augmentation - to point cloud. Defaults to True. Note that it works only when - 'flip' is turned on. - pcd_vertical_flip (bool): Whether apply vertical flip augmentation - to point cloud. Defaults to True. Note that it works only when - 'flip' is turned on. + pcd_horizontal_flip (bool, optional): Whether apply horizontal + flip augmentation to point cloud. Defaults to True. + Note that it works only when 'flip' is turned on. + pcd_vertical_flip (bool, optional): Whether apply vertical flip + augmentation to point cloud. Defaults to True. + Note that it works only when 'flip' is turned on. """ def __init__(self, @@ -70,7 +71,7 @@ def __call__(self, results): results (dict): Result dict contains the data to augment. Returns: - dict: The result dict contains the data that is augmented with \ + dict: The result dict contains the data that is augmented with different scales and flips. """ aug_data = [] diff --git a/mmdet3d/datasets/pipelines/transforms_3d.py b/mmdet3d/datasets/pipelines/transforms_3d.py index fb57f19b0d..ad675dadef 100644 --- a/mmdet3d/datasets/pipelines/transforms_3d.py +++ b/mmdet3d/datasets/pipelines/transforms_3d.py @@ -22,7 +22,7 @@ class RandomDropPointsColor(object): util/transform.py#L223>`_ for more details. Args: - drop_ratio (float): The probability of dropping point colors. + drop_ratio (float, optional): The probability of dropping point colors. Defaults to 0.2. """ @@ -38,7 +38,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after color dropping, \ + dict: Results after color dropping, 'points' key is updated in the result dict. """ points = input_dict['points'] @@ -105,10 +105,11 @@ def random_flip_data_3d(self, input_dict, direction='horizontal'): Args: input_dict (dict): Result dict from loading pipeline. - direction (str): Flip direction. Default: horizontal. + direction (str, optional): Flip direction. + Default: 'horizontal'. Returns: - dict: Flipped results, 'points', 'bbox3d_fields' keys are \ + dict: Flipped results, 'points', 'bbox3d_fields' keys are updated in the result dict. """ assert direction in ['horizontal', 'vertical'] @@ -137,15 +138,15 @@ def random_flip_data_3d(self, input_dict, direction='horizontal'): input_dict['cam2img'][0][2] = w - input_dict['cam2img'][0][2] def __call__(self, input_dict): - """Call function to flip points, values in the ``bbox3d_fields`` and \ + """Call function to flip points, values in the ``bbox3d_fields`` and also flip 2D image and its annotations. Args: input_dict (dict): Result dict from loading pipeline. Returns: - dict: Flipped results, 'flip', 'flip_direction', \ - 'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added \ + dict: Flipped results, 'flip', 'flip_direction', + 'pcd_horizontal_flip' and 'pcd_vertical_flip' keys are added into result dict. """ # filp 2D image and its annotations @@ -187,20 +188,20 @@ def __repr__(self): class RandomJitterPoints(object): """Randomly jitter point coordinates. - Different from the global translation in ``GlobalRotScaleTrans``, here we \ + Different from the global translation in ``GlobalRotScaleTrans``, here we apply different noises to each point in a scene. Args: jitter_std (list[float]): The standard deviation of jittering noise. - This applies random noise to all points in a 3D scene, which is \ - sampled from a gaussian distribution whose standard deviation is \ + This applies random noise to all points in a 3D scene, which is + sampled from a gaussian distribution whose standard deviation is set by ``jitter_std``. Defaults to [0.01, 0.01, 0.01] - clip_range (list[float] | None): Clip the randomly generated jitter \ + clip_range (list[float]): Clip the randomly generated jitter noise into this range. If None is given, don't perform clipping. Defaults to [-0.05, 0.05] Note: - This transform should only be used in point cloud segmentation tasks \ + This transform should only be used in point cloud segmentation tasks because we don't transform ground-truth bboxes accordingly. For similar transform in detection task, please refer to `ObjectNoise`. """ @@ -229,7 +230,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after adding noise to each point, \ + dict: Results after adding noise to each point, 'points' key is updated in the result dict. """ points = input_dict['points'] @@ -291,8 +292,8 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after object sampling augmentation, \ - 'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated \ + dict: Results after object sampling augmentation, + 'points', 'gt_bboxes_3d', 'gt_labels_3d' keys are updated in the result dict. """ gt_bboxes_3d = input_dict['gt_bboxes_3d'] @@ -388,7 +389,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after adding noise to each object, \ + dict: Results after adding noise to each object, 'points', 'gt_bboxes_3d' keys are updated in the result dict. """ gt_bboxes_3d = input_dict['gt_bboxes_3d'] @@ -428,10 +429,10 @@ class GlobalAlignment(object): rotation_axis (int): Rotation axis for points and bboxes rotation. Note: - We do not record the applied rotation and translation as in \ - GlobalRotScaleTrans. Because usually, we do not need to reverse \ + We do not record the applied rotation and translation as in + GlobalRotScaleTrans. Because usually, we do not need to reverse the alignment step. - For example, ScanNet 3D detection task uses aligned ground-truth \ + For example, ScanNet 3D detection task uses aligned ground-truth bounding boxes for evaluation. """ @@ -483,7 +484,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after global alignment, 'points' and keys in \ + dict: Results after global alignment, 'points' and keys in input_dict['bbox3d_fields'] are updated in the result dict. """ assert 'axis_align_matrix' in input_dict['ann_info'].keys(), \ @@ -512,15 +513,15 @@ class GlobalRotScaleTrans(object): """Apply global rotation, scaling and translation to a 3D scene. Args: - rot_range (list[float]): Range of rotation angle. + rot_range (list[float], optional): Range of rotation angle. Defaults to [-0.78539816, 0.78539816] (close to [-pi/4, pi/4]). - scale_ratio_range (list[float]): Range of scale ratio. + scale_ratio_range (list[float], optional): Range of scale ratio. Defaults to [0.95, 1.05]. - translation_std (list[float]): The standard deviation of translation - noise. This applies random translation to a scene by a noise, which + translation_std (list[float], optional): The standard deviation of + translation noise applied to a scene, which is sampled from a gaussian distribution whose standard deviation is set by ``translation_std``. Defaults to [0, 0, 0] - shift_height (bool): Whether to shift height. + shift_height (bool, optional): Whether to shift height. (the fourth dimension of indoor points) when scaling. Defaults to False. """ @@ -559,8 +560,8 @@ def _trans_bbox_points(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after translation, 'points', 'pcd_trans' \ - and keys in input_dict['bbox3d_fields'] are updated \ + dict: Results after translation, 'points', 'pcd_trans' + and keys in input_dict['bbox3d_fields'] are updated in the result dict. """ translation_std = np.array(self.translation_std, dtype=np.float32) @@ -578,8 +579,8 @@ def _rot_bbox_points(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after rotation, 'points', 'pcd_rotation' \ - and keys in input_dict['bbox3d_fields'] are updated \ + dict: Results after rotation, 'points', 'pcd_rotation' + and keys in input_dict['bbox3d_fields'] are updated in the result dict. """ rotation = self.rot_range @@ -608,7 +609,7 @@ def _scale_bbox_points(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after scaling, 'points'and keys in \ + dict: Results after scaling, 'points'and keys in input_dict['bbox3d_fields'] are updated in the result dict. """ scale = input_dict['pcd_scale_factor'] @@ -630,7 +631,7 @@ def _random_scale(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after scaling, 'pcd_scale_factor' are updated \ + dict: Results after scaling, 'pcd_scale_factor' are updated in the result dict. """ scale_factor = np.random.uniform(self.scale_ratio_range[0], @@ -638,7 +639,7 @@ def _random_scale(self, input_dict): input_dict['pcd_scale_factor'] = scale_factor def __call__(self, input_dict): - """Private function to rotate, scale and translate bounding boxes and \ + """Private function to rotate, scale and translate bounding boxes and points. Args: @@ -646,7 +647,7 @@ def __call__(self, input_dict): Returns: dict: Results after scaling, 'points', 'pcd_rotation', - 'pcd_scale_factor', 'pcd_trans' and keys in \ + 'pcd_scale_factor', 'pcd_trans' and keys in input_dict['bbox3d_fields'] are updated in the result dict. """ if 'transformation_3d_flow' not in input_dict: @@ -684,7 +685,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after filtering, 'points', 'pts_instance_mask' \ + dict: Results after filtering, 'points', 'pts_instance_mask' and 'pts_semantic_mask' keys are updated in the result dict. """ idx = input_dict['points'].shuffle() @@ -723,7 +724,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' \ + dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' keys are updated in the result dict. """ # Check points instance type and initialise bev_range @@ -775,7 +776,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after filtering, 'points', 'pts_instance_mask' \ + dict: Results after filtering, 'points', 'pts_instance_mask' and 'pts_semantic_mask' keys are updated in the result dict. """ points = input_dict['points'] @@ -821,7 +822,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' \ + dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' keys are updated in the result dict. """ gt_labels_3d = input_dict['gt_labels_3d'] @@ -913,7 +914,7 @@ def __call__(self, results): Args: input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after sampling, 'points', 'pts_instance_mask' \ + dict: Results after sampling, 'points', 'pts_instance_mask' and 'pts_semantic_mask' keys are updated in the result dict. """ points = results['points'] @@ -994,10 +995,10 @@ class IndoorPatchPointSample(object): additional features. Defaults to False. num_try (int, optional): Number of times to try if the patch selected is invalid. Defaults to 10. - enlarge_size (float | None, optional): Enlarge the sampled patch to + enlarge_size (float, optional): Enlarge the sampled patch to [-block_size / 2 - enlarge_size, block_size / 2 + enlarge_size] as an augmentation. If None, set it as 0. Defaults to 0.2. - min_unique_num (int | None, optional): Minimum number of unique points + min_unique_num (int, optional): Minimum number of unique points the sampled patch should contain. If None, use PointNet++'s method to judge uniqueness. Defaults to None. eps (float, optional): A value added to patch boundary to guarantee @@ -1038,7 +1039,7 @@ def _input_generation(self, coords, patch_center, coord_max, attributes, attribute_dims, point_type): """Generating model input. - Generate input by subtracting patch center and adding additional \ + Generate input by subtracting patch center and adding additional features. Currently support colors and normalized xyz as features. Args: @@ -1182,7 +1183,7 @@ def __call__(self, results): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after sampling, 'points', 'pts_instance_mask' \ + dict: Results after sampling, 'points', 'pts_instance_mask' and 'pts_semantic_mask' keys are updated in the result dict. """ points = results['points'] @@ -1242,7 +1243,7 @@ def __call__(self, input_dict): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after filtering, 'points', 'pts_instance_mask' \ + dict: Results after filtering, 'points', 'pts_instance_mask' and 'pts_semantic_mask' keys are updated in the result dict. """ points = input_dict['points'] @@ -1340,7 +1341,7 @@ def __call__(self, results): input_dict (dict): Result dict from loading pipeline. Returns: - dict: Results after sampling, 'points', 'pts_instance_mask' \ + dict: Results after sampling, 'points', 'pts_instance_mask' and 'pts_semantic_mask' keys are updated in the result dict. """ points = results['points'] diff --git a/mmdet3d/datasets/scannet_dataset.py b/mmdet3d/datasets/scannet_dataset.py index 7d15b0bacc..df03986f27 100644 --- a/mmdet3d/datasets/scannet_dataset.py +++ b/mmdet3d/datasets/scannet_dataset.py @@ -78,13 +78,13 @@ def get_data_info(self, index): index (int): Index of the sample data to get. Returns: - dict: Data information that will be passed to the data \ + dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: - sample_idx (str): Sample index. - pts_filename (str): Filename of point clouds. - file_name (str): Filename of point clouds. - - img_prefix (str | None, optional): Prefix of image files. + - img_prefix (str, optional): Prefix of image files. - img_info (dict, optional): Image info. - ann_info (dict): Annotation info. """ @@ -129,12 +129,12 @@ def get_ann_info(self, index): Returns: dict: annotation information consists of the following keys: - - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ + - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): 3D ground truth bboxes - gt_labels_3d (np.ndarray): Labels of ground truths. - pts_instance_mask_path (str): Path of instance masks. - pts_semantic_mask_path (str): Path of semantic masks. - - axis_align_matrix (np.ndarray): Transformation matrix for \ + - axis_align_matrix (np.ndarray): Transformation matrix for global scene alignment. """ # Use index to get the annos, thus the evalhook could also use this api @@ -172,7 +172,7 @@ def get_ann_info(self, index): def prepare_test_data(self, index): """Prepare data for testing. - We should take axis_align_matrix from self.data_infos since we need \ + We should take axis_align_matrix from self.data_infos since we need to align point clouds. Args: @@ -272,7 +272,7 @@ class ScanNetSegDataset(Custom3DSegDataset): as input. Defaults to None. test_mode (bool, optional): Whether the dataset is in test mode. Defaults to False. - ignore_index (int, optional): The label index to be ignored, e.g. \ + ignore_index (int, optional): The label index to be ignored, e.g. unannotated points. If None is given, set to len(self.CLASSES). Defaults to None. scene_idxs (np.ndarray | str, optional): Precomputed index to load @@ -424,7 +424,7 @@ def format_results(self, results, txtfile_prefix=None): Args: outputs (list[dict]): Testing results of the dataset. - txtfile_prefix (str | None): The prefix of saved files. It includes + txtfile_prefix (str): The prefix of saved files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. diff --git a/mmdet3d/datasets/sunrgbd_dataset.py b/mmdet3d/datasets/sunrgbd_dataset.py index 7a9a7d59c2..4b9a5330ac 100644 --- a/mmdet3d/datasets/sunrgbd_dataset.py +++ b/mmdet3d/datasets/sunrgbd_dataset.py @@ -74,13 +74,13 @@ def get_data_info(self, index): index (int): Index of the sample data to get. Returns: - dict: Data information that will be passed to the data \ + dict: Data information that will be passed to the data preprocessing pipelines. It includes the following keys: - sample_idx (str): Sample index. - pts_filename (str, optional): Filename of point clouds. - file_name (str, optional): Filename of point clouds. - - img_prefix (str | None, optional): Prefix of image files. + - img_prefix (str, optional): Prefix of image files. - img_info (dict, optional): Image info. - calib (dict, optional): Camera calibration info. - ann_info (dict): Annotation info. @@ -125,7 +125,7 @@ def get_ann_info(self, index): Returns: dict: annotation information consists of the following keys: - - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): \ + - gt_bboxes_3d (:obj:`DepthInstance3DBoxes`): 3D ground truth bboxes - gt_labels_3d (np.ndarray): Labels of ground truths. - pts_instance_mask_path (str): Path of instance masks. @@ -239,12 +239,15 @@ def evaluate(self, Args: results (list[dict]): List of results. - metric (str | list[str]): Metrics to be evaluated. - iou_thr (list[float]): AP IoU thresholds. - iou_thr_2d (list[float]): AP IoU thresholds for 2d evaluation. - show (bool): Whether to visualize. + metric (str | list[str], optional): Metrics to be evaluated. + Default: None. + iou_thr (list[float], optional): AP IoU thresholds for 3D + evaluation. Default: (0.25, 0.5). + iou_thr_2d (list[float], optional): AP IoU thresholds for 2D + evaluation. Default: (0.5, ). + show (bool, optional): Whether to visualize. Default: False. - out_dir (str): Path to save the visualization results. + out_dir (str, optional): Path to save the visualization results. Default: None. pipeline (list[dict], optional): raw data loading for showing. Default: None. diff --git a/mmdet3d/datasets/utils.py b/mmdet3d/datasets/utils.py index b273fb1799..2f4f09d26f 100644 --- a/mmdet3d/datasets/utils.py +++ b/mmdet3d/datasets/utils.py @@ -25,7 +25,7 @@ def is_loading_function(transform): transform (dict | :obj:`Pipeline`): A transform config or a function. Returns: - bool | None: Whether it is a loading function. None means can't judge. + bool: Whether it is a loading function. None means can't judge. When transform is `MultiScaleFlipAug3D`, we return None. """ # TODO: use more elegant way to distinguish loading modules @@ -92,7 +92,7 @@ def get_loading_pipeline(pipeline): ... dict(type='Collect3D', ... keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']) ... ] - >>> assert expected_pipelines ==\ + >>> assert expected_pipelines == \ ... get_loading_pipeline(pipelines) """ loading_pipeline = [] @@ -126,7 +126,7 @@ def extract_result_dict(results, key): key (str): Key of the desired data. Returns: - np.ndarray | torch.Tensor | None: Data term. + np.ndarray | torch.Tensor: Data term. """ if key not in results.keys(): return None diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index 5017ecbcb4..e5eeef50f7 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -46,8 +46,9 @@ class WaymoDataset(KittiDataset): Defaults to True. test_mode (bool, optional): Whether the dataset is in test mode. Defaults to False. - pcd_limit_range (list): The range of point cloud used to filter - invalid predicted boxes. Default: [-85, -85, -5, 85, 85, 5]. + pcd_limit_range (list(float), optional): The range of point cloud used + to filter invalid predicted boxes. + Default: [-85, -85, -5, 85, 85, 5]. """ CLASSES = ('Car', 'Cyclist', 'Pedestrian') @@ -100,7 +101,7 @@ def get_data_info(self, index): - sample_idx (str): sample index - pts_filename (str): filename of point clouds - - img_prefix (str | None): prefix of image files + - img_prefix (str): prefix of image files - img_info (dict): image info - lidar2img (list[np.ndarray], optional): transformations from lidar to different cameras @@ -140,15 +141,15 @@ def format_results(self, Args: outputs (list[dict]): Testing results of the dataset. - pklfile_prefix (str | None): The prefix of pkl files. It includes + pklfile_prefix (str): The prefix of pkl files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - submission_prefix (str | None): The prefix of submitted files. It + submission_prefix (str): The prefix of submitted files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - data_format (str | None): Output data format. Default: 'waymo'. - Another supported choice is 'kitti'. + data_format (str, optional): Output data format. + Default: 'waymo'. Another supported choice is 'kitti'. Returns: tuple: (result_files, tmp_dir), result_files is a dict containing @@ -226,18 +227,18 @@ def evaluate(self, Args: results (list[dict]): Testing results of the dataset. - metric (str | list[str]): Metrics to be evaluated. + metric (str | list[str], optional): Metrics to be evaluated. Default: 'waymo'. Another supported metric is 'kitti'. - logger (logging.Logger | str | None): Logger used for printing + logger (logging.Logger | str, optional): Logger used for printing related information during evaluation. Default: None. - pklfile_prefix (str | None): The prefix of pkl files. It includes + pklfile_prefix (str, optional): The prefix of pkl files including the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. - submission_prefix (str | None): The prefix of submission datas. + submission_prefix (str, optional): The prefix of submission datas. If not specified, the submission data will not be generated. - show (bool): Whether to visualize. + show (bool, optional): Whether to visualize. Default: False. - out_dir (str): Path to save the visualization results. + out_dir (str, optional): Path to save the visualization results. Default: None. pipeline (list[dict], optional): raw data loading for showing. Default: None. @@ -364,8 +365,8 @@ def bbox2result_kitti(self, net_outputs (List[np.ndarray]): list of array storing the bbox and score class_nanes (List[String]): A list of class names - pklfile_prefix (str | None): The prefix of pkl file. - submission_prefix (str | None): The prefix of submission file. + pklfile_prefix (str): The prefix of pkl file. + submission_prefix (str): The prefix of submission file. Returns: List[dict]: A list of dict have the kitti 3d format diff --git a/mmdet3d/models/backbones/pointnet2_sa_msg.py b/mmdet3d/models/backbones/pointnet2_sa_msg.py index dcf3931aa9..e2b0eca4a0 100644 --- a/mmdet3d/models/backbones/pointnet2_sa_msg.py +++ b/mmdet3d/models/backbones/pointnet2_sa_msg.py @@ -134,7 +134,7 @@ def forward(self, points): - sa_xyz (torch.Tensor): The coordinates of sa features. - sa_features (torch.Tensor): The features from the last Set Aggregation Layers. - - sa_indices (torch.Tensor): Indices of the \ + - sa_indices (torch.Tensor): Indices of the input points. """ xyz, features = self._split_point_feats(points) diff --git a/mmdet3d/models/backbones/pointnet2_sa_ssg.py b/mmdet3d/models/backbones/pointnet2_sa_ssg.py index 2f3a0cc68a..eb5f4d6cad 100644 --- a/mmdet3d/models/backbones/pointnet2_sa_ssg.py +++ b/mmdet3d/models/backbones/pointnet2_sa_ssg.py @@ -97,11 +97,11 @@ def forward(self, points): Returns: dict[str, list[torch.Tensor]]: Outputs after SA and FP modules. - - fp_xyz (list[torch.Tensor]): The coordinates of \ + - fp_xyz (list[torch.Tensor]): The coordinates of each fp features. - - fp_features (list[torch.Tensor]): The features \ + - fp_features (list[torch.Tensor]): The features from each Feature Propagate Layers. - - fp_indices (list[torch.Tensor]): Indices of the \ + - fp_indices (list[torch.Tensor]): Indices of the input points. """ xyz, features = self._split_point_feats(points) diff --git a/mmdet3d/models/decode_heads/decode_head.py b/mmdet3d/models/decode_heads/decode_head.py index 672340b668..fe9367d492 100644 --- a/mmdet3d/models/decode_heads/decode_head.py +++ b/mmdet3d/models/decode_heads/decode_head.py @@ -13,17 +13,18 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta): Args: channels (int): Channels after modules, before conv_seg. num_classes (int): Number of classes. - dropout_ratio (float): Ratio of dropout layer. Default: 0.5. - conv_cfg (dict|None): Config of conv layers. + dropout_ratio (float, optional): Ratio of dropout layer. Default: 0.5. + conv_cfg (dict, optional): Config of conv layers. Default: dict(type='Conv1d'). - norm_cfg (dict|None): Config of norm layers. + norm_cfg (dict, optional): Config of norm layers. Default: dict(type='BN1d'). - act_cfg (dict): Config of activation layers. + act_cfg (dict, optional): Config of activation layers. Default: dict(type='ReLU'). - loss_decode (dict): Config of decode loss. + loss_decode (dict, optional): Config of decode loss. Default: dict(type='CrossEntropyLoss'). - ignore_index (int | None): The label index to be ignored. When using - masked BCE loss, ignore_index should be set to None. Default: 255. + ignore_index (int, optional): The label index to be ignored. + When using masked BCE loss, ignore_index should be set to None. + Default: 255. """ def __init__(self, @@ -110,9 +111,9 @@ def losses(self, seg_logit, seg_label): """Compute semantic segmentation loss. Args: - seg_logit (torch.Tensor): Predicted per-point segmentation logits \ + seg_logit (torch.Tensor): Predicted per-point segmentation logits of shape [B, num_classes, N]. - seg_label (torch.Tensor): Ground-truth segmentation label of \ + seg_label (torch.Tensor): Ground-truth segmentation label of shape [B, N]. """ loss = dict() diff --git a/mmdet3d/models/decode_heads/paconv_head.py b/mmdet3d/models/decode_heads/paconv_head.py index 6ace064264..e662c976c2 100644 --- a/mmdet3d/models/decode_heads/paconv_head.py +++ b/mmdet3d/models/decode_heads/paconv_head.py @@ -14,7 +14,7 @@ class PAConvHead(PointNet2Head): Args: fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules. - fp_norm_cfg (dict|None): Config of norm layers used in FP modules. + fp_norm_cfg (dict): Config of norm layers used in FP modules. Default: dict(type='BN2d'). """ diff --git a/mmdet3d/models/decode_heads/pointnet2_head.py b/mmdet3d/models/decode_heads/pointnet2_head.py index c7fe0d553d..0585df6ab4 100644 --- a/mmdet3d/models/decode_heads/pointnet2_head.py +++ b/mmdet3d/models/decode_heads/pointnet2_head.py @@ -16,7 +16,7 @@ class PointNet2Head(Base3DDecodeHead): Args: fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules. - fp_norm_cfg (dict|None): Config of norm layers used in FP modules. + fp_norm_cfg (dict): Config of norm layers used in FP modules. Default: dict(type='BN2d'). """ diff --git a/mmdet3d/models/dense_heads/anchor3d_head.py b/mmdet3d/models/dense_heads/anchor3d_head.py index 72d98eedaa..422057dc82 100644 --- a/mmdet3d/models/dense_heads/anchor3d_head.py +++ b/mmdet3d/models/dense_heads/anchor3d_head.py @@ -145,7 +145,7 @@ def forward_single(self, x): x (torch.Tensor): Input features. Returns: - tuple[torch.Tensor]: Contain score of each class, bbox \ + tuple[torch.Tensor]: Contain score of each class, bbox regression and direction classification predictions. """ cls_score = self.conv_cls(x) @@ -163,7 +163,7 @@ def forward(self, feats): features produced by FPN. Returns: - tuple[list[torch.Tensor]]: Multi-level class score, bbox \ + tuple[list[torch.Tensor]]: Multi-level class score, bbox and direction predictions. """ return multi_apply(self.forward_single, feats) @@ -177,7 +177,7 @@ def get_anchors(self, featmap_sizes, input_metas, device='cuda'): device (str): device of current module. Returns: - list[list[torch.Tensor]]: Anchors of each image, valid flags \ + list[list[torch.Tensor]]: Anchors of each image, valid flags of each image. """ num_imgs = len(input_metas) @@ -207,7 +207,7 @@ def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels, num_total_samples (int): The number of valid samples. Returns: - tuple[torch.Tensor]: Losses of class, bbox \ + tuple[torch.Tensor]: Losses of class, bbox and direction, respectively. """ # classification loss @@ -285,7 +285,7 @@ def add_sin_difference(boxes1, boxes2): the 7th dimension is rotation dimension. Returns: - tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th \ + tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th dimensions are changed. """ rad_pred_encoding = torch.sin(boxes1[..., 6:7]) * torch.cos( @@ -318,16 +318,16 @@ class predictions. of each sample. gt_labels (list[torch.Tensor]): Gt labels of each sample. input_metas (list[dict]): Contain pcd and img's meta info. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify - which bounding. + gt_bboxes_ignore (list[torch.Tensor]): Specify + which bounding boxes to ignore. Returns: - dict[str, list[torch.Tensor]]: Classification, bbox, and \ + dict[str, list[torch.Tensor]]: Classification, bbox, and direction losses of each level. - loss_cls (list[torch.Tensor]): Classification losses. - loss_bbox (list[torch.Tensor]): Box regression losses. - - loss_dir (list[torch.Tensor]): Direction classification \ + - loss_dir (list[torch.Tensor]): Direction classification losses. """ featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] @@ -385,7 +385,7 @@ def get_bboxes(self, dir_cls_preds (list[torch.Tensor]): Multi-level direction class predictions. input_metas (list[dict]): Contain pcd and img's meta info. - cfg (None | :obj:`ConfigDict`): Training or testing config. + cfg (:obj:`ConfigDict`): Training or testing config. rescale (list[torch.Tensor]): Whether th rescale bbox. Returns: @@ -439,7 +439,7 @@ def get_bboxes_single(self, mlvl_anchors (List[torch.Tensor]): Multi-level anchors in single batch. input_meta (list[dict]): Contain pcd and img's meta info. - cfg (None | :obj:`ConfigDict`): Training or testing config. + cfg (:obj:`ConfigDict`): Training or testing config. rescale (list[torch.Tensor]): whether th rescale bbox. Returns: diff --git a/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py b/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py index a0957d29fe..a8131fc21e 100644 --- a/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py +++ b/mmdet3d/models/dense_heads/anchor_free_mono3d_head.py @@ -18,35 +18,45 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead): num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. - feat_channels (int): Number of hidden channels. Used in child classes. - stacked_convs (int): Number of stacking convs of the head. - strides (tuple): Downsample factor of each feature map. - dcn_on_last_conv (bool): If true, use dcn in the last layer of - towers. Default: False. - conv_bias (bool | str): If specified as `auto`, it will be decided by - the norm_cfg. Bias of conv will be set as True if `norm_cfg` is - None, otherwise False. Default: "auto". - background_label (int | None): Label ID of background, set as 0 for - RPN and num_classes for other heads. It will automatically set as - num_classes if None is given. - use_direction_classifier (bool): Whether to add a direction classifier. - diff_rad_by_sin (bool): Whether to change the difference into sin - difference for box regression loss. - loss_cls (dict): Config of classification loss. - loss_bbox (dict): Config of localization loss. - loss_dir (dict): Config of direction classifier loss. - loss_attr (dict): Config of attribute classifier loss, which is only - active when pred_attrs=True. - bbox_code_size (int): Dimensions of predicted bounding boxes. - pred_attrs (bool): Whether to predict attributes. Default to False. - num_attrs (int): The number of attributes to be predicted. Default: 9. - pred_velo (bool): Whether to predict velocity. Default to False. - pred_bbox2d (bool): Whether to predict 2D boxes. Default to False. - group_reg_dims (tuple[int]): The dimension of each regression target - group. Default: (2, 1, 3, 1, 2). - cls_branch (tuple[int]): Channels for classification branch. + feat_channels (int, optional): Number of hidden channels. + Used in child classes. Defaults to 256. + stacked_convs (int, optional): Number of stacking convs of the head. + strides (tuple, optional): Downsample factor of each feature map. + dcn_on_last_conv (bool, optional): If true, use dcn in the last + layer of towers. Default: False. + conv_bias (bool | str, optional): If specified as `auto`, it will be + decided by the norm_cfg. Bias of conv will be set as True + if `norm_cfg` is None, otherwise False. Default: 'auto'. + background_label (int, optional): Label ID of background, + set as 0 for RPN and num_classes for other heads. + It will automatically set as `num_classes` if None is given. + use_direction_classifier (bool, optional): + Whether to add a direction classifier. + diff_rad_by_sin (bool, optional): Whether to change the difference + into sin difference for box regression loss. Defaults to True. + dir_offset (float, optional): Parameter used in direction + classification. Defaults to 0. + dir_limit_offset (float, optional): Parameter used in direction + classification. Defaults to 0. + loss_cls (dict, optional): Config of classification loss. + loss_bbox (dict, optional): Config of localization loss. + loss_dir (dict, optional): Config of direction classifier loss. + loss_attr (dict, optional): Config of attribute classifier loss, + which is only active when `pred_attrs=True`. + bbox_code_size (int, optional): Dimensions of predicted bounding boxes. + pred_attrs (bool, optional): Whether to predict attributes. + Defaults to False. + num_attrs (int, optional): The number of attributes to be predicted. + Default: 9. + pred_velo (bool, optional): Whether to predict velocity. + Defaults to False. + pred_bbox2d (bool, optional): Whether to predict 2D boxes. + Defaults to False. + group_reg_dims (tuple[int], optional): The dimension of each regression + target group. Default: (2, 1, 3, 1, 2). + cls_branch (tuple[int], optional): Channels for classification branch. Default: (128, 64). - reg_branch (tuple[tuple]): Channels for regression branch. + reg_branch (tuple[tuple], optional): Channels for regression branch. Default: ( (128, 64), # offset (128, 64), # depth @@ -54,14 +64,16 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead): (64, ), # rot () # velo ), - dir_branch (tuple[int]): Channels for direction classification branch. + dir_branch (tuple[int], optional): Channels for direction + classification branch. Default: (64, ). + attr_branch (tuple[int], optional): Channels for classification branch. Default: (64, ). - attr_branch (tuple[int]): Channels for classification branch. - Default: (64, ). - conv_cfg (dict): Config dict for convolution layer. Default: None. - norm_cfg (dict): Config dict for normalization layer. Default: None. - train_cfg (dict): Training config of anchor head. - test_cfg (dict): Testing config of anchor head. + conv_cfg (dict, optional): Config dict for convolution layer. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. + train_cfg (dict, optional): Training config of anchor head. + test_cfg (dict, optional): Testing config of anchor head. """ # noqa: W605 _version = 1 @@ -126,6 +138,7 @@ def __init__( self.use_direction_classifier = use_direction_classifier self.diff_rad_by_sin = diff_rad_by_sin self.dir_offset = dir_offset + self.dir_limit_offset = dir_limit_offset self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.loss_dir = build_loss(loss_dir) @@ -290,7 +303,7 @@ def forward(self, feats): a 4D-tensor. Returns: - tuple: Usually contain classification scores, bbox predictions, \ + tuple: Usually contain classification scores, bbox predictions, and direction class predictions. cls_scores (list[Tensor]): Box scores for each scale level, each is a 4D-tensor, the channel number is @@ -402,7 +415,7 @@ def loss(self, corresponding to each box img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. - gt_bboxes_ignore (None | list[Tensor]): specify which bounding + gt_bboxes_ignore (list[Tensor]): specify which bounding boxes can be ignored when computing the loss. """ diff --git a/mmdet3d/models/dense_heads/centerpoint_head.py b/mmdet3d/models/dense_heads/centerpoint_head.py index aa6ff7cdda..5189002b5e 100644 --- a/mmdet3d/models/dense_heads/centerpoint_head.py +++ b/mmdet3d/models/dense_heads/centerpoint_head.py @@ -21,16 +21,16 @@ class SeparateHead(BaseModule): Args: in_channels (int): Input channels for conv_layer. heads (dict): Conv information. - head_conv (int): Output channels. + head_conv (int, optional): Output channels. Default: 64. - final_kernal (int): Kernal size for the last conv layer. + final_kernal (int, optional): Kernal size for the last conv layer. Deafult: 1. - init_bias (float): Initial bias. Default: -2.19. - conv_cfg (dict): Config of conv layer. + init_bias (float, optional): Initial bias. Default: -2.19. + conv_cfg (dict, optional): Config of conv layer. Default: dict(type='Conv2d') - norm_cfg (dict): Config of norm layer. + norm_cfg (dict, optional): Config of norm layer. Default: dict(type='BN2d'). - bias (str): Type of bias. Default: 'auto'. + bias (str, optional): Type of bias. Default: 'auto'. """ def __init__(self, @@ -100,17 +100,17 @@ def forward(self, x): Returns: dict[str: torch.Tensor]: contains the following keys: - -reg (torch.Tensor): 2D regression value with the \ + -reg (torch.Tensor): 2D regression value with the shape of [B, 2, H, W]. - -height (torch.Tensor): Height value with the \ + -height (torch.Tensor): Height value with the shape of [B, 1, H, W]. - -dim (torch.Tensor): Size value with the shape \ + -dim (torch.Tensor): Size value with the shape of [B, 3, H, W]. - -rot (torch.Tensor): Rotation value with the \ + -rot (torch.Tensor): Rotation value with the shape of [B, 2, H, W]. - -vel (torch.Tensor): Velocity value with the \ + -vel (torch.Tensor): Velocity value with the shape of [B, 2, H, W]. - -heatmap (torch.Tensor): Heatmap with the shape of \ + -heatmap (torch.Tensor): Heatmap with the shape of [B, N, H, W]. """ ret_dict = dict() @@ -131,18 +131,19 @@ class DCNSeparateHead(BaseModule): Args: in_channels (int): Input channels for conv_layer. + num_cls (int): Number of classes. heads (dict): Conv information. dcn_config (dict): Config of dcn layer. - num_cls (int): Output channels. + head_conv (int, optional): Output channels. Default: 64. - final_kernal (int): Kernal size for the last conv layer. - Deafult: 1. - init_bias (float): Initial bias. Default: -2.19. - conv_cfg (dict): Config of conv layer. + final_kernal (int, optional): Kernal size for the last conv + layer. Deafult: 1. + init_bias (float, optional): Initial bias. Default: -2.19. + conv_cfg (dict, optional): Config of conv layer. Default: dict(type='Conv2d') - norm_cfg (dict): Config of norm layer. + norm_cfg (dict, optional): Config of norm layer. Default: dict(type='BN2d'). - bias (str): Type of bias. Default: 'auto'. + bias (str, optional): Type of bias. Default: 'auto'. """ # noqa: W605 def __init__(self, @@ -215,17 +216,17 @@ def forward(self, x): Returns: dict[str: torch.Tensor]: contains the following keys: - -reg (torch.Tensor): 2D regression value with the \ + -reg (torch.Tensor): 2D regression value with the shape of [B, 2, H, W]. - -height (torch.Tensor): Height value with the \ + -height (torch.Tensor): Height value with the shape of [B, 1, H, W]. - -dim (torch.Tensor): Size value with the shape \ + -dim (torch.Tensor): Size value with the shape of [B, 3, H, W]. - -rot (torch.Tensor): Rotation value with the \ + -rot (torch.Tensor): Rotation value with the shape of [B, 2, H, W]. - -vel (torch.Tensor): Velocity value with the \ + -vel (torch.Tensor): Velocity value with the shape of [B, 2, H, W]. - -heatmap (torch.Tensor): Heatmap with the shape of \ + -heatmap (torch.Tensor): Heatmap with the shape of [B, N, H, W]. """ center_feat = self.feature_adapt_cls(x) @@ -243,31 +244,30 @@ class CenterHead(BaseModule): """CenterHead for CenterPoint. Args: - mode (str): Mode of the head. Default: '3d'. - in_channels (list[int] | int): Channels of the input feature map. - Default: [128]. - tasks (list[dict]): Task information including class number + in_channels (list[int] | int, optional): Channels of the input + feature map. Default: [128]. + tasks (list[dict], optional): Task information including class number and class names. Default: None. - dataset (str): Name of the dataset. Default: 'nuscenes'. - weight (float): Weight for location loss. Default: 0.25. - code_weights (list[int]): Code weights for location loss. Default: []. - common_heads (dict): Conv information for common heads. + train_cfg (dict, optional): Train-time configs. Default: None. + test_cfg (dict, optional): Test-time configs. Default: None. + bbox_coder (dict, optional): Bbox coder configs. Default: None. + common_heads (dict, optional): Conv information for common heads. Default: dict(). - loss_cls (dict): Config of classification loss function. + loss_cls (dict, optional): Config of classification loss function. Default: dict(type='GaussianFocalLoss', reduction='mean'). - loss_bbox (dict): Config of regression loss function. + loss_bbox (dict, optional): Config of regression loss function. Default: dict(type='L1Loss', reduction='none'). - separate_head (dict): Config of separate head. Default: dict( + separate_head (dict, optional): Config of separate head. Default: dict( type='SeparateHead', init_bias=-2.19, final_kernel=3) - share_conv_channel (int): Output channels for share_conv_layer. - Default: 64. - num_heatmap_convs (int): Number of conv layers for heatmap conv layer. - Default: 2. - conv_cfg (dict): Config of conv layer. + share_conv_channel (int, optional): Output channels for share_conv + layer. Default: 64. + num_heatmap_convs (int, optional): Number of conv layers for heatmap + conv layer. Default: 2. + conv_cfg (dict, optional): Config of conv layer. Default: dict(type='Conv2d') - norm_cfg (dict): Config of norm layer. + norm_cfg (dict, optional): Config of norm layer. Default: dict(type='BN2d'). - bias (str): Type of bias. Default: 'auto'. + bias (str, optional): Type of bias. Default: 'auto'. """ def __init__(self, @@ -366,8 +366,8 @@ def _gather_feat(self, feat, ind, mask=None): feat (torch.tensor): Feature map with the shape of [B, H*W, 10]. ind (torch.Tensor): Index of the ground truth boxes with the shape of [B, max_obj]. - mask (torch.Tensor): Mask of the feature map with the shape - of [B, max_obj]. Default: None. + mask (torch.Tensor, optional): Mask of the feature map with the + shape of [B, max_obj]. Default: None. Returns: torch.Tensor: Feature map after gathering with the shape @@ -403,14 +403,14 @@ def get_targets(self, gt_bboxes_3d, gt_labels_3d): Returns: Returns: - tuple[list[torch.Tensor]]: Tuple of target including \ + tuple[list[torch.Tensor]]: Tuple of target including the following results in order. - list[torch.Tensor]: Heatmap scores. - list[torch.Tensor]: Ground truth boxes. - - list[torch.Tensor]: Indexes indicating the \ + - list[torch.Tensor]: Indexes indicating the position of the valid boxes. - - list[torch.Tensor]: Masks indicating which \ + - list[torch.Tensor]: Masks indicating which boxes are valid. """ heatmaps, anno_boxes, inds, masks = multi_apply( @@ -437,14 +437,14 @@ def get_targets_single(self, gt_bboxes_3d, gt_labels_3d): gt_labels_3d (torch.Tensor): Labels of boxes. Returns: - tuple[list[torch.Tensor]]: Tuple of target including \ + tuple[list[torch.Tensor]]: Tuple of target including the following results in order. - list[torch.Tensor]: Heatmap scores. - list[torch.Tensor]: Ground truth boxes. - - list[torch.Tensor]: Indexes indicating the position \ + - list[torch.Tensor]: Indexes indicating the position of the valid boxes. - - list[torch.Tensor]: Masks indicating which boxes \ + - list[torch.Tensor]: Masks indicating which boxes are valid. """ device = gt_labels_3d.device @@ -728,11 +728,11 @@ def get_task_detections(self, num_class_with_bg, batch_cls_preds, Returns: list[dict[str: torch.Tensor]]: contains the following keys: - -bboxes (torch.Tensor): Prediction bboxes after nms with the \ + -bboxes (torch.Tensor): Prediction bboxes after nms with the shape of [N, 9]. - -scores (torch.Tensor): Prediction scores after nms with the \ + -scores (torch.Tensor): Prediction scores after nms with the shape of [N]. - -labels (torch.Tensor): Prediction labels after nms with the \ + -labels (torch.Tensor): Prediction labels after nms with the shape of [N]. """ predictions_dicts = [] @@ -781,7 +781,7 @@ def get_task_detections(self, num_class_with_bg, batch_cls_preds, boxes_for_nms, top_scores, thresh=self.test_cfg['nms_thr'], - pre_maxsize=self.test_cfg['pre_max_size'], + pre_max_size=self.test_cfg['pre_max_size'], post_max_size=self.test_cfg['post_max_size']) else: selected = [] diff --git a/mmdet3d/models/dense_heads/fcos_mono3d_head.py b/mmdet3d/models/dense_heads/fcos_mono3d_head.py index 5960c68e9c..c0b15a4581 100644 --- a/mmdet3d/models/dense_heads/fcos_mono3d_head.py +++ b/mmdet3d/models/dense_heads/fcos_mono3d_head.py @@ -21,25 +21,25 @@ class FCOSMono3DHead(AnchorFreeMono3DHead): num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. - regress_ranges (tuple[tuple[int, int]]): Regress range of multiple + regress_ranges (tuple[tuple[int, int]], optional): Regress range of multiple level points. - center_sampling (bool): If true, use center sampling. Default: True. - center_sample_radius (float): Radius of center sampling. Default: 1.5. - norm_on_bbox (bool): If true, normalize the regression targets + center_sampling (bool, optional): If true, use center sampling. Default: True. + center_sample_radius (float, optional): Radius of center sampling. Default: 1.5. + norm_on_bbox (bool, optional): If true, normalize the regression targets with FPN strides. Default: True. - centerness_on_reg (bool): If true, position centerness on the + centerness_on_reg (bool, optional): If true, position centerness on the regress branch. Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042. Default: True. - centerness_alpha: Parameter used to adjust the intensity attenuation - from the center to the periphery. Default: 2.5. - loss_cls (dict): Config of classification loss. - loss_bbox (dict): Config of localization loss. - loss_dir (dict): Config of direction classification loss. - loss_attr (dict): Config of attribute classification loss. - loss_centerness (dict): Config of centerness loss. - norm_cfg (dict): dictionary to construct and config norm layer. + centerness_alpha (int, optional): Parameter used to adjust the intensity + attenuation from the center to the periphery. Default: 2.5. + loss_cls (dict, optional): Config of classification loss. + loss_bbox (dict, optional): Config of localization loss. + loss_dir (dict, optional): Config of direction classification loss. + loss_attr (dict, optional): Config of attribute classification loss. + loss_centerness (dict, optional): Config of centerness loss. + norm_cfg (dict, optional): dictionary to construct and config norm layer. Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True). - centerness_branch (tuple[int]): Channels for centerness branch. + centerness_branch (tuple[int], optional): Channels for centerness branch. Default: (64, ). """ # noqa: E501 @@ -153,7 +153,7 @@ def forward_single(self, x, scale, stride): is True. Returns: - tuple: scores for each class, bbox and direction class \ + tuple: scores for each class, bbox and direction class predictions, centerness predictions of input feature maps. """ cls_score, bbox_pred, dir_cls_pred, attr_pred, cls_feat, reg_feat = \ @@ -201,7 +201,7 @@ def add_sin_difference(boxes1, boxes2): the 7th dimension is rotation dimension. Returns: - tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th \ + tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th dimensions are changed. """ rad_pred_encoding = torch.sin(boxes1[..., 6:7]) * torch.cos( @@ -295,7 +295,7 @@ def loss(self, attr_labels (list[Tensor]): Attributes indices of each box. img_metas (list[dict]): Meta information of each image, e.g., image size, scaling factor, etc. - gt_bboxes_ignore (None | list[Tensor]): specify which bounding + gt_bboxes_ignore (list[Tensor]): specify which bounding boxes can be ignored when computing the loss. Returns: @@ -507,11 +507,11 @@ def get_bboxes(self, rescale (bool): If True, return boxes in original image space Returns: - list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. \ - The first item is an (n, 5) tensor, where the first 4 columns \ - are bounding box positions (tl_x, tl_y, br_x, br_y) and the \ - 5-th column is a score between 0 and 1. The second item is a \ - (n,) tensor where each item is the predicted class label of \ + list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. + The first item is an (n, 5) tensor, where the first 4 columns + are bounding box positions (tl_x, tl_y, br_x, br_y) and the + 5-th column is a score between 0 and 1. The second item is a + (n,) tensor where each item is the predicted class label of the corresponding box. """ assert len(cls_scores) == len(bbox_preds) == len(dir_cls_preds) == \ @@ -580,7 +580,7 @@ def _get_bboxes_single(self, bbox_preds (list[Tensor]): Box energies / deltas for a single scale level with shape (num_points * bbox_code_size, H, W). dir_cls_preds (list[Tensor]): Box scores for direction class - predictions on a single scale level with shape \ + predictions on a single scale level with shape (num_points * 2, H, W) attr_preds (list[Tensor]): Attribute scores for each scale level Has shape (N, num_points * num_attrs, H, W) @@ -700,12 +700,12 @@ def _get_bboxes_single(self, def pts2Dto3D(points, view): """ Args: - points (torch.Tensor): points in 2D images, [N, 3], \ + points (torch.Tensor): points in 2D images, [N, 3], 3 corresponds with x, y in the image and depth. view (np.ndarray): camera instrinsic, [3, 3] Returns: - torch.Tensor: points in 3D space. [N, 3], \ + torch.Tensor: points in 3D space. [N, 3], 3 corresponds with x, y, z in 3D space. """ assert view.shape[0] <= 4 @@ -767,8 +767,8 @@ def get_targets(self, points, gt_bboxes_list, gt_labels_list, Returns: tuple: - concat_lvl_labels (list[Tensor]): Labels of each level. \ - concat_lvl_bbox_targets (list[Tensor]): BBox targets of each \ + concat_lvl_labels (list[Tensor]): Labels of each level. + concat_lvl_bbox_targets (list[Tensor]): BBox targets of each level. """ assert len(points) == len(self.regress_ranges) diff --git a/mmdet3d/models/dense_heads/groupfree3d_head.py b/mmdet3d/models/dense_heads/groupfree3d_head.py index bb02b401b1..9280e79cd3 100644 --- a/mmdet3d/models/dense_heads/groupfree3d_head.py +++ b/mmdet3d/models/dense_heads/groupfree3d_head.py @@ -25,13 +25,13 @@ class PointsObjClsModule(BaseModule): Args: in_channel (int): number of channels of seed point features. - num_convs (int): number of conv layers. + num_convs (int, optional): number of conv layers. Default: 3. - conv_cfg (dict): Config of convolution. + conv_cfg (dict, optional): Config of convolution. Default: dict(type='Conv1d'). - norm_cfg (dict): Config of normalization. + norm_cfg (dict, optional): Config of normalization. Default: dict(type='BN1d'). - act_cfg (dict): Config of activation. + act_cfg (dict, optional): Config of activation. Default: dict(type='ReLU'). """ @@ -405,15 +405,15 @@ def loss(self, Args: bbox_preds (dict): Predictions from forward of vote head. points (list[torch.Tensor]): Input points. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic mask. - pts_instance_mask (None | list[torch.Tensor]): Point-wise + pts_instance_mask (list[torch.Tensor]): Point-wise instance mask. img_metas (list[dict]): Contain pcd and img's meta info. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. ret_target (Bool): Return targets or not. @@ -545,12 +545,12 @@ def get_targets(self, Args: points (list[torch.Tensor]): Points of each batch. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each batch. gt_labels_3d (list[torch.Tensor]): Labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): Point-wise instance + pts_instance_mask (list[torch.Tensor]): Point-wise instance label of each batch. bbox_preds (torch.Tensor): Bounding box predictions of vote head. max_gt_num (int): Max number of GTs for single batch. @@ -657,12 +657,12 @@ def get_targets_single(self, Args: points (torch.Tensor): Points of each batch. - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes of each batch. gt_labels_3d (torch.Tensor): Labels of each batch. - pts_semantic_mask (None | torch.Tensor): Point-wise semantic + pts_semantic_mask (torch.Tensor): Point-wise semantic label of each batch. - pts_instance_mask (None | torch.Tensor): Point-wise instance + pts_instance_mask (torch.Tensor): Point-wise instance label of each batch. max_gt_nums (int): Max number of GTs for single batch. seed_points (torch.Tensor): Coordinates of seed points. @@ -710,7 +710,7 @@ def get_targets_single(self, if self.bbox_coder.with_rot: vote_targets = points.new_zeros([num_points, 4 * self.gt_per_seed]) vote_target_idx = points.new_zeros([num_points], dtype=torch.long) - box_indices_all = gt_bboxes_3d.points_in_boxes(points) + box_indices_all = gt_bboxes_3d.points_in_boxes_part(points) for i in range(gt_labels_3d.shape[0]): box_indices = box_indices_all[:, i] indices = torch.nonzero( @@ -951,7 +951,7 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, box_dim=bbox.shape[-1], with_yaw=self.bbox_coder.with_rot, origin=(0.5, 0.5, 0.5)) - box_indices = bbox.points_in_boxes_batch(points) + box_indices = bbox.points_in_boxes_all(points) corner3d = bbox.corners minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) diff --git a/mmdet3d/models/dense_heads/parta2_rpn_head.py b/mmdet3d/models/dense_heads/parta2_rpn_head.py index 89136e58c4..e6b5f9305c 100644 --- a/mmdet3d/models/dense_heads/parta2_rpn_head.py +++ b/mmdet3d/models/dense_heads/parta2_rpn_head.py @@ -100,20 +100,20 @@ def loss(self, bbox_preds (list[torch.Tensor]): Multi-level bbox predictions. dir_cls_preds (list[torch.Tensor]): Multi-level direction class predictions. - gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes \ + gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes of each sample. gt_labels (list[torch.Tensor]): Labels of each sample. input_metas (list[dict]): Point cloud and image's meta info. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. Returns: - dict[str, list[torch.Tensor]]: Classification, bbox, and \ + dict[str, list[torch.Tensor]]: Classification, bbox, and direction losses of each level. - loss_rpn_cls (list[torch.Tensor]): Classification losses. - loss_rpn_bbox (list[torch.Tensor]): Box regression losses. - - loss_rpn_dir (list[torch.Tensor]): Direction classification \ + - loss_rpn_dir (list[torch.Tensor]): Direction classification losses. """ loss_dict = super().loss(cls_scores, bbox_preds, dir_cls_preds, @@ -143,7 +143,7 @@ def get_bboxes_single(self, mlvl_anchors (List[torch.Tensor]): Multi-level anchors in single batch. input_meta (list[dict]): Contain pcd and img's meta info. - cfg (None | :obj:`ConfigDict`): Training or testing config. + cfg (:obj:`ConfigDict`): Training or testing config. rescale (list[torch.Tensor]): whether th rescale bbox. Returns: @@ -240,7 +240,7 @@ def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms, Multi-level bbox. score_thr (int): Score threshold. max_num (int): Max number of bboxes after nms. - cfg (None | :obj:`ConfigDict`): Training or testing config. + cfg (:obj:`ConfigDict`): Training or testing config. input_meta (dict): Contain pcd and img's meta info. Returns: diff --git a/mmdet3d/models/dense_heads/shape_aware_head.py b/mmdet3d/models/dense_heads/shape_aware_head.py index 9b7e5eedb6..70b15f4fd6 100644 --- a/mmdet3d/models/dense_heads/shape_aware_head.py +++ b/mmdet3d/models/dense_heads/shape_aware_head.py @@ -30,15 +30,17 @@ class BaseShapeHead(BaseModule): num_base_anchors (int): Number of anchors per location. box_code_size (int): The dimension of boxes to be encoded. in_channels (int): Input channels for convolutional layers. - shared_conv_channels (tuple): Channels for shared convolutional \ - layers. Default: (64, 64). \ - shared_conv_strides (tuple): Strides for shared convolutional \ - layers. Default: (1, 1). - use_direction_classifier (bool, optional): Whether to use direction \ + shared_conv_channels (tuple, optional): Channels for shared + convolutional layers. Default: (64, 64). + shared_conv_strides (tuple, optional): Strides for shared + convolutional layers. Default: (1, 1). + use_direction_classifier (bool, optional): Whether to use direction classifier. Default: True. - conv_cfg (dict): Config of conv layer. Default: dict(type='Conv2d') - norm_cfg (dict): Config of norm layer. Default: dict(type='BN2d'). - bias (bool|str, optional): Type of bias. Default: False. + conv_cfg (dict, optional): Config of conv layer. + Default: dict(type='Conv2d') + norm_cfg (dict, optional): Config of norm layer. + Default: dict(type='BN2d'). + bias (bool | str, optional): Type of bias. Default: False. """ def __init__(self, @@ -127,11 +129,11 @@ def forward(self, x): [B, C, H, W]. Returns: - dict[torch.Tensor]: Contain score of each class, bbox \ - regression and direction classification predictions. \ - Note that all the returned tensors are reshaped as \ - [bs*num_base_anchors*H*W, num_cls/box_code_size/dir_bins]. \ - It is more convenient to concat anchors for different \ + dict[torch.Tensor]: Contain score of each class, bbox + regression and direction classification predictions. + Note that all the returned tensors are reshaped as + [bs*num_base_anchors*H*W, num_cls/box_code_size/dir_bins]. + It is more convenient to concat anchors for different classes even though they have different feature map sizes. """ x = self.shared_conv(x) @@ -168,9 +170,9 @@ class ShapeAwareHead(Anchor3DHead): Args: tasks (dict): Shape-aware groups of multi-class objects. - assign_per_class (bool, optional): Whether to do assignment for each \ + assign_per_class (bool, optional): Whether to do assignment for each class. Default: True. - kwargs (dict): Other arguments are the same as those in \ + kwargs (dict): Other arguments are the same as those in :class:`Anchor3DHead`. """ @@ -217,7 +219,7 @@ def forward_single(self, x): Args: x (torch.Tensor): Input features. Returns: - tuple[torch.Tensor]: Contain score of each class, bbox \ + tuple[torch.Tensor]: Contain score of each class, bbox regression and direction classification predictions. """ results = [] @@ -263,7 +265,7 @@ def loss_single(self, cls_score, bbox_pred, dir_cls_preds, labels, num_total_samples (int): The number of valid samples. Returns: - tuple[torch.Tensor]: Losses of class, bbox \ + tuple[torch.Tensor]: Losses of class, bbox and direction, respectively. """ # classification loss @@ -325,16 +327,16 @@ class predictions. of each sample. gt_labels (list[torch.Tensor]): Gt labels of each sample. input_metas (list[dict]): Contain pcd and img's meta info. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. Returns: - dict[str, list[torch.Tensor]]: Classification, bbox, and \ + dict[str, list[torch.Tensor]]: Classification, bbox, and direction losses of each level. - loss_cls (list[torch.Tensor]): Classification losses. - loss_bbox (list[torch.Tensor]): Box regression losses. - - loss_dir (list[torch.Tensor]): Direction classification \ + - loss_dir (list[torch.Tensor]): Direction classification losses. """ device = cls_scores[0].device @@ -388,7 +390,7 @@ def get_bboxes(self, dir_cls_preds (list[torch.Tensor]): Multi-level direction class predictions. input_metas (list[dict]): Contain pcd and img's meta info. - cfg (None | :obj:`ConfigDict`): Training or testing config. + cfg (:obj:`ConfigDict`, optional): Training or testing config. Default: None. rescale (list[torch.Tensor], optional): Whether to rescale bbox. Default: False. @@ -443,8 +445,8 @@ def get_bboxes_single(self, mlvl_anchors (List[torch.Tensor]): Multi-level anchors in single batch. input_meta (list[dict]): Contain pcd and img's meta info. - cfg (None | :obj:`ConfigDict`): Training or testing config. - rescale (list[torch.Tensor], optional): whether to rescale bbox. \ + cfg (:obj:`ConfigDict`): Training or testing config. + rescale (list[torch.Tensor], optional): whether to rescale bbox. Default: False. Returns: diff --git a/mmdet3d/models/dense_heads/ssd_3d_head.py b/mmdet3d/models/dense_heads/ssd_3d_head.py index 26030a149c..2baa9fb9bb 100644 --- a/mmdet3d/models/dense_heads/ssd_3d_head.py +++ b/mmdet3d/models/dense_heads/ssd_3d_head.py @@ -128,15 +128,15 @@ def loss(self, Args: bbox_preds (dict): Predictions from forward of SSD3DHead. points (list[torch.Tensor]): Input points. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic mask. - pts_instance_mask (None | list[torch.Tensor]): Point-wise + pts_instance_mask (list[torch.Tensor]): Point-wise instance mask. img_metas (list[dict]): Contain pcd and img's meta info. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. Returns: @@ -231,12 +231,12 @@ def get_targets(self, Args: points (list[torch.Tensor]): Points of each batch. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each batch. gt_labels_3d (list[torch.Tensor]): Labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): Point-wise instance + pts_instance_mask (list[torch.Tensor]): Point-wise instance label of each batch. bbox_preds (torch.Tensor): Bounding box predictions of ssd3d head. @@ -320,12 +320,12 @@ def get_targets_single(self, Args: points (torch.Tensor): Points of each batch. - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes of each batch. gt_labels_3d (torch.Tensor): Labels of each batch. - pts_semantic_mask (None | torch.Tensor): Point-wise semantic + pts_semantic_mask (torch.Tensor): Point-wise semantic label of each batch. - pts_instance_mask (None | torch.Tensor): Point-wise instance + pts_instance_mask (torch.Tensor): Point-wise instance label of each batch. aggregated_points (torch.Tensor): Aggregated points from candidate points layer. @@ -494,7 +494,7 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, origin=(0.5, 0.5, 0.5)) if isinstance(bbox, (LiDARInstance3DBoxes, DepthInstance3DBoxes)): - box_indices = bbox.points_in_boxes_batch(points) + box_indices = bbox.points_in_boxes_all(points) nonempty_box_mask = box_indices.T.sum(1) >= 0 else: raise NotImplementedError('Unsupported bbox type!') @@ -550,7 +550,7 @@ def _assign_targets_by_points_inside(self, bboxes_3d, points): inside bbox and the index of box where each point are in. """ if isinstance(bboxes_3d, (LiDARInstance3DBoxes, DepthInstance3DBoxes)): - points_mask = bboxes_3d.points_in_boxes_batch(points) + points_mask = bboxes_3d.points_in_boxes_all(points) assignment = points_mask.argmax(dim=-1) else: raise NotImplementedError('Unsupported bbox type!') diff --git a/mmdet3d/models/dense_heads/train_mixins.py b/mmdet3d/models/dense_heads/train_mixins.py index 84b16e8967..c3bcf834de 100644 --- a/mmdet3d/models/dense_heads/train_mixins.py +++ b/mmdet3d/models/dense_heads/train_mixins.py @@ -25,7 +25,7 @@ def anchor_target_3d(self, gt_bboxes_list (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each image. input_metas (list[dict]): Meta info of each image. - gt_bboxes_ignore_list (None | list): Ignore list of gt bboxes. + gt_bboxes_ignore_list (list): Ignore list of gt bboxes. gt_labels_list (list[torch.Tensor]): Gt labels of batches. label_channels (int): The channel of labels. num_classes (int): The number of classes. diff --git a/mmdet3d/models/dense_heads/vote_head.py b/mmdet3d/models/dense_heads/vote_head.py index f8ddd0ef73..cc31460d56 100644 --- a/mmdet3d/models/dense_heads/vote_head.py +++ b/mmdet3d/models/dense_heads/vote_head.py @@ -234,15 +234,15 @@ def loss(self, Args: bbox_preds (dict): Predictions from forward of vote head. points (list[torch.Tensor]): Input points. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic mask. - pts_instance_mask (None | list[torch.Tensor]): Point-wise + pts_instance_mask (list[torch.Tensor]): Point-wise instance mask. img_metas (list[dict]): Contain pcd and img's meta info. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. ret_target (Bool): Return targets or not. @@ -358,12 +358,12 @@ def get_targets(self, Args: points (list[torch.Tensor]): Points of each batch. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each batch. gt_labels_3d (list[torch.Tensor]): Labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): Point-wise instance + pts_instance_mask (list[torch.Tensor]): Point-wise instance label of each batch. bbox_preds (torch.Tensor): Bounding box predictions of vote head. @@ -447,12 +447,12 @@ def get_targets_single(self, Args: points (torch.Tensor): Points of each batch. - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes of each batch. gt_labels_3d (torch.Tensor): Labels of each batch. - pts_semantic_mask (None | torch.Tensor): Point-wise semantic + pts_semantic_mask (torch.Tensor): Point-wise semantic label of each batch. - pts_instance_mask (None | torch.Tensor): Point-wise instance + pts_instance_mask (torch.Tensor): Point-wise instance label of each batch. aggregated_points (torch.Tensor): Aggregated points from vote aggregation layer. @@ -471,7 +471,7 @@ def get_targets_single(self, vote_target_masks = points.new_zeros([num_points], dtype=torch.long) vote_target_idx = points.new_zeros([num_points], dtype=torch.long) - box_indices_all = gt_bboxes_3d.points_in_boxes_batch(points) + box_indices_all = gt_bboxes_3d.points_in_boxes_all(points) for i in range(gt_labels_3d.shape[0]): box_indices = box_indices_all[:, i] indices = torch.nonzero( @@ -621,7 +621,7 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, box_dim=bbox.shape[-1], with_yaw=self.bbox_coder.with_rot, origin=(0.5, 0.5, 0.5)) - box_indices = bbox.points_in_boxes_batch(points) + box_indices = bbox.points_in_boxes_all(points) corner3d = bbox.corners minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) diff --git a/mmdet3d/models/detectors/centerpoint.py b/mmdet3d/models/detectors/centerpoint.py index 640bb8b338..d6e971d2b9 100644 --- a/mmdet3d/models/detectors/centerpoint.py +++ b/mmdet3d/models/detectors/centerpoint.py @@ -97,7 +97,8 @@ def aug_test_pts(self, feats, img_metas, rescale=False): Args: feats (list[torch.Tensor]): Feature of point cloud. img_metas (list[dict]): Meta information of samples. - rescale (bool): Whether to rescale bboxes. Default: False. + rescale (bool, optional): Whether to rescale bboxes. + Default: False. Returns: dict: Returned bboxes consists of the following keys: diff --git a/mmdet3d/models/detectors/groupfree3dnet.py b/mmdet3d/models/detectors/groupfree3dnet.py index 1260e868b0..52b3fe8184 100644 --- a/mmdet3d/models/detectors/groupfree3dnet.py +++ b/mmdet3d/models/detectors/groupfree3dnet.py @@ -38,11 +38,11 @@ def forward_train(self, img_metas (list): Image metas. gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch. gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic + pts_semantic_mask (list[torch.Tensor]): point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): point-wise instance + pts_instance_mask (list[torch.Tensor]): point-wise instance label of each batch. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. Returns: diff --git a/mmdet3d/models/detectors/h3dnet.py b/mmdet3d/models/detectors/h3dnet.py index d7bf8e29a7..bbae09a8df 100644 --- a/mmdet3d/models/detectors/h3dnet.py +++ b/mmdet3d/models/detectors/h3dnet.py @@ -47,11 +47,11 @@ def forward_train(self, img_metas (list): Image metas. gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch. gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic + pts_semantic_mask (list[torch.Tensor]): point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): point-wise instance + pts_instance_mask (list[torch.Tensor]): point-wise instance label of each batch. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. Returns: diff --git a/mmdet3d/models/detectors/imvotenet.py b/mmdet3d/models/detectors/imvotenet.py index 02ab2cd1c3..fdec49e225 100644 --- a/mmdet3d/models/detectors/imvotenet.py +++ b/mmdet3d/models/detectors/imvotenet.py @@ -149,21 +149,21 @@ def __init__(self, if self.with_img_backbone: if img_pretrained is not None: - warnings.warn('DeprecationWarning: pretrained is a deprecated \ - key, please consider using init_cfg') + warnings.warn('DeprecationWarning: pretrained is a deprecated ' + 'key, please consider using init_cfg.') self.img_backbone.init_cfg = dict( type='Pretrained', checkpoint=img_pretrained) if self.with_img_roi_head: if img_pretrained is not None: - warnings.warn('DeprecationWarning: pretrained is a deprecated \ - key, please consider using init_cfg') + warnings.warn('DeprecationWarning: pretrained is a deprecated ' + 'key, please consider using init_cfg.') self.img_roi_head.init_cfg = dict( type='Pretrained', checkpoint=img_pretrained) if self.with_pts_backbone: if img_pretrained is not None: - warnings.warn('DeprecationWarning: pretrained is a deprecated \ - key, please consider using init_cfg') + warnings.warn('DeprecationWarning: pretrained is a deprecated ' + 'key, please consider using init_cfg.') self.pts_backbone.init_cfg = dict( type='Pretrained', checkpoint=pts_pretrained) @@ -393,9 +393,9 @@ def forward_train(self, with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[torch.Tensor]): class indices for each 2d bounding box. - gt_bboxes_ignore (None | list[torch.Tensor]): specify which + gt_bboxes_ignore (list[torch.Tensor]): specify which 2d bounding boxes can be ignored when computing the loss. - gt_masks (None | torch.Tensor): true segmentation masks for each + gt_masks (torch.Tensor): true segmentation masks for each 2d bbox, used if the architecture supports a segmentation task. proposals: override rpn proposals (2d) with custom proposals. Use when `with_rpn` is False. @@ -403,9 +403,9 @@ def forward_train(self, not supported yet. gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): 3d gt bboxes. gt_labels_3d (list[torch.Tensor]): gt class labels for 3d bboxes. - pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic + pts_semantic_mask (list[torch.Tensor]): point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): point-wise instance + pts_instance_mask (list[torch.Tensor]): point-wise instance label of each batch. Returns: diff --git a/mmdet3d/models/detectors/mvx_two_stage.py b/mmdet3d/models/detectors/mvx_two_stage.py index 2534604437..0ca8b9686b 100644 --- a/mmdet3d/models/detectors/mvx_two_stage.py +++ b/mmdet3d/models/detectors/mvx_two_stage.py @@ -84,21 +84,21 @@ def __init__(self, if self.with_img_backbone: if img_pretrained is not None: - warnings.warn('DeprecationWarning: pretrained is a deprecated \ - key, please consider using init_cfg') + warnings.warn('DeprecationWarning: pretrained is a deprecated ' + 'key, please consider using init_cfg.') self.img_backbone.init_cfg = dict( type='Pretrained', checkpoint=img_pretrained) if self.with_img_roi_head: if img_pretrained is not None: - warnings.warn('DeprecationWarning: pretrained is a deprecated \ - key, please consider using init_cfg') + warnings.warn('DeprecationWarning: pretrained is a deprecated ' + 'key, please consider using init_cfg.') self.img_roi_head.init_cfg = dict( type='Pretrained', checkpoint=img_pretrained) if self.with_pts_backbone: - if pts_pretrained is not None: - warnings.warn('DeprecationWarning: pretrained is a deprecated \ - key, please consider using init_cfg') + if img_pretrained is not None: + warnings.warn('DeprecationWarning: pretrained is a deprecated ' + 'key, please consider using init_cfg.') self.pts_backbone.init_cfg = dict( type='Pretrained', checkpoint=pts_pretrained) @@ -260,7 +260,7 @@ def forward_train(self, of 2D boxes in images. Defaults to None. gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in images. Defaults to None. - img (torch.Tensor optional): Images of each sample with shape + img (torch.Tensor, optional): Images of each sample with shape (N, C, H, W). Defaults to None. proposals ([list[torch.Tensor], optional): Predicted proposals used for training Fast RCNN. Defaults to None. diff --git a/mmdet3d/models/detectors/single_stage_mono3d.py b/mmdet3d/models/detectors/single_stage_mono3d.py index 5fef243813..205091146e 100644 --- a/mmdet3d/models/detectors/single_stage_mono3d.py +++ b/mmdet3d/models/detectors/single_stage_mono3d.py @@ -48,14 +48,15 @@ def forward_train(self, image in [tl_x, tl_y, br_x, br_y] format. gt_labels (list[Tensor]): Class indices corresponding to each box gt_bboxes_3d (list[Tensor]): Each item are the 3D truth boxes for - each image in [x, y, z, w, l, h, theta, vx, vy] format. + each image in [x, y, z, x_size, y_size, z_size, yaw, vx, vy] + format. gt_labels_3d (list[Tensor]): 3D class indices corresponding to each box. centers2d (list[Tensor]): Projected 3D centers onto 2D images. depths (list[Tensor]): Depth of projected centers on 2D images. attr_labels (list[Tensor], optional): Attribute indices corresponding to each box - gt_bboxes_ignore (None | list[Tensor]): Specify which bounding + gt_bboxes_ignore (list[Tensor]): Specify which bounding boxes can be ignored when computing the loss. Returns: diff --git a/mmdet3d/models/detectors/votenet.py b/mmdet3d/models/detectors/votenet.py index c92ff9c942..4ba0caa88f 100644 --- a/mmdet3d/models/detectors/votenet.py +++ b/mmdet3d/models/detectors/votenet.py @@ -40,11 +40,11 @@ def forward_train(self, img_metas (list): Image metas. gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch. gt_labels_3d (list[torch.Tensor]): gt class labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): point-wise semantic + pts_semantic_mask (list[torch.Tensor]): point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): point-wise instance + pts_instance_mask (list[torch.Tensor]): point-wise instance label of each batch. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. Returns: diff --git a/mmdet3d/models/fusion_layers/point_fusion.py b/mmdet3d/models/fusion_layers/point_fusion.py index 5b587a7d17..a6cca495f6 100644 --- a/mmdet3d/models/fusion_layers/point_fusion.py +++ b/mmdet3d/models/fusion_layers/point_fusion.py @@ -34,7 +34,7 @@ def point_sample(img_meta, coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'. img_scale_factor (torch.Tensor): Scale factor with shape of \ (w_scale, h_scale). - img_crop_offset (torch.Tensor): Crop offset used to crop \ + img_crop_offset (torch.Tensor): Crop offset used to crop image during data augmentation with shape of (w_offset, h_offset). img_flip (bool): Whether the image is flipped. img_pad_shape (tuple[int]): int tuple indicates the h & w after diff --git a/mmdet3d/models/losses/axis_aligned_iou_loss.py b/mmdet3d/models/losses/axis_aligned_iou_loss.py index d0953321dd..5ccef45012 100644 --- a/mmdet3d/models/losses/axis_aligned_iou_loss.py +++ b/mmdet3d/models/losses/axis_aligned_iou_loss.py @@ -54,7 +54,7 @@ def forward(self, Args: pred (torch.Tensor): Bbox predictions with shape [..., 3]. target (torch.Tensor): Bbox targets (gt) with shape [..., 3]. - weight (torch.Tensor|float, optional): Weight of loss. \ + weight (torch.Tensor | float, optional): Weight of loss. Defaults to None. avg_factor (int, optional): Average factor that is used to average the loss. Defaults to None. diff --git a/mmdet3d/models/losses/chamfer_distance.py b/mmdet3d/models/losses/chamfer_distance.py index a9d4b80247..a0caebad78 100644 --- a/mmdet3d/models/losses/chamfer_distance.py +++ b/mmdet3d/models/losses/chamfer_distance.py @@ -29,13 +29,13 @@ def chamfer_distance(src, Returns: tuple: Source and Destination loss with the corresponding indices. - - loss_src (torch.Tensor): The min distance \ + - loss_src (torch.Tensor): The min distance from source to destination. - - loss_dst (torch.Tensor): The min distance \ + - loss_dst (torch.Tensor): The min distance from destination to source. - - indices1 (torch.Tensor): Index the min distance point \ + - indices1 (torch.Tensor): Index the min distance point for each point in source to destination. - - indices2 (torch.Tensor): Index the min distance point \ + - indices2 (torch.Tensor): Index the min distance point for each point in destination to source. """ @@ -125,10 +125,10 @@ def forward(self, Defaults to False. Returns: - tuple[torch.Tensor]: If ``return_indices=True``, return losses of \ - source and target with their corresponding indices in the \ - order of ``(loss_source, loss_target, indices1, indices2)``. \ - If ``return_indices=False``, return \ + tuple[torch.Tensor]: If ``return_indices=True``, return losses of + source and target with their corresponding indices in the + order of ``(loss_source, loss_target, indices1, indices2)``. + If ``return_indices=False``, return ``(loss_source, loss_target)``. """ assert reduction_override in (None, 'none', 'mean', 'sum') diff --git a/mmdet3d/models/middle_encoders/sparse_encoder.py b/mmdet3d/models/middle_encoders/sparse_encoder.py index 1a03e9b5c9..8b296c1a47 100644 --- a/mmdet3d/models/middle_encoders/sparse_encoder.py +++ b/mmdet3d/models/middle_encoders/sparse_encoder.py @@ -14,19 +14,21 @@ class SparseEncoder(nn.Module): Args: in_channels (int): The number of input channels. sparse_shape (list[int]): The sparse shape of input tensor. - order (list[str]): Order of conv module. Defaults to ('conv', - 'norm', 'act'). - norm_cfg (dict): Config of normalization layer. Defaults to + order (list[str], optional): Order of conv module. + Defaults to ('conv', 'norm', 'act'). + norm_cfg (dict, optional): Config of normalization layer. Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). - base_channels (int): Out channels for conv_input layer. + base_channels (int, optional): Out channels for conv_input layer. Defaults to 16. - output_channels (int): Out channels for conv_out layer. + output_channels (int, optional): Out channels for conv_out layer. Defaults to 128. - encoder_channels (tuple[tuple[int]]): + encoder_channels (tuple[tuple[int]], optional): Convolutional channels of each encode block. - encoder_paddings (tuple[tuple[int]]): Paddings of each encode block. + encoder_paddings (tuple[tuple[int]], optional): + Paddings of each encode block. Defaults to ((16, ), (32, 32, 32), (64, 64, 64), (64, 64, 64)). - block_type (str): Type of the block to use. Defaults to 'conv_module'. + block_type (str, optional): Type of the block to use. + Defaults to 'conv_module'. """ def __init__(self, @@ -99,7 +101,7 @@ def forward(self, voxel_features, coors, batch_size): Args: voxel_features (torch.float32): Voxel features in shape (N, C). - coors (torch.int32): Coordinates in shape (N, 4), \ + coors (torch.int32): Coordinates in shape (N, 4), the columns in the order of (batch_idx, z_idx, y_idx, x_idx). batch_size (int): Batch size. @@ -139,9 +141,9 @@ def make_encoder_layers(self, make_block (method): A bounded function to build blocks. norm_cfg (dict[str]): Config of normalization layer. in_channels (int): The number of encoder input channels. - block_type (str): Type of the block to use. Defaults to - 'conv_module'. - conv_cfg (dict): Config of conv layer. Defaults to + block_type (str, optional): Type of the block to use. + Defaults to 'conv_module'. + conv_cfg (dict, optional): Config of conv layer. Defaults to dict(type='SubMConv3d'). Returns: diff --git a/mmdet3d/models/model_utils/transformer.py b/mmdet3d/models/model_utils/transformer.py index 4a1af93dd8..3d4878dafb 100644 --- a/mmdet3d/models/model_utils/transformer.py +++ b/mmdet3d/models/model_utils/transformer.py @@ -15,15 +15,16 @@ class GroupFree3DMHA(MultiheadAttention): embed_dims (int): The embedding dimension. num_heads (int): Parallel attention heads. Same as `nn.MultiheadAttention`. - attn_drop (float): A Dropout layer on attn_output_weights. Default 0.0. - proj_drop (float): A Dropout layer. Default 0.0. - dropout_layer (obj:`ConfigDict`): The dropout_layer used + attn_drop (float, optional): A Dropout layer on attn_output_weights. + Defaults to 0.0. + proj_drop (float, optional): A Dropout layer. Defaults to 0.0. + dropout_layer (obj:`ConfigDict`, optional): The dropout_layer used when adding the shortcut. - init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. - Default: None. - batch_first (bool): Key, Query and Value are shape of + init_cfg (obj:`mmcv.ConfigDict`, optional): The Config for + initialization. Default: None. + batch_first (bool, optional): Key, Query and Value are shape of (batch, n, embed_dim) - or (n, batch, embed_dim). Default to False. + or (n, batch, embed_dim). Defaults to False. """ def __init__(self, @@ -58,26 +59,26 @@ def forward(self, embed_dims]. Same in `nn.MultiheadAttention.forward`. key (Tensor): The key tensor with shape [num_keys, bs, embed_dims]. Same in `nn.MultiheadAttention.forward`. - If None, the ``query`` will be used. Defaults to None. + If None, the ``query`` will be used. value (Tensor): The value tensor with same shape as `key`. - Same in `nn.MultiheadAttention.forward`. Defaults to None. + Same in `nn.MultiheadAttention.forward`. If None, the `key` will be used. identity (Tensor): This tensor, with the same shape as x, - will be used for the identity link. - If None, `x` will be used. Defaults to None. - query_pos (Tensor): The positional encoding for query, with - the same shape as `x`. If not None, it will - be added to `x` before forward function. Defaults to None. - key_pos (Tensor): The positional encoding for `key`, with the - same shape as `key`. Defaults to None. If not None, it will - be added to `key` before forward function. If None, and - `query_pos` has the same shape as `key`, then `query_pos` + will be used for the identity link. If None, `x` will be used. + query_pos (Tensor, optional): The positional encoding for query, + with the same shape as `x`. Defaults to None. + If not None, it will be added to `x` before forward function. + key_pos (Tensor, optional): The positional encoding for `key`, + with the same shape as `key`. Defaults to None. If not None, + it will be added to `key` before forward function. If None, + and `query_pos` has the same shape as `key`, then `query_pos` will be used for `key_pos`. Defaults to None. - attn_mask (Tensor): ByteTensor mask with shape [num_queries, - num_keys]. Same in `nn.MultiheadAttention.forward`. - Defaults to None. - key_padding_mask (Tensor): ByteTensor with shape [bs, num_keys]. + attn_mask (Tensor, optional): ByteTensor mask with shape + [num_queries, num_keys]. Same in `nn.MultiheadAttention.forward`. Defaults to None. + key_padding_mask (Tensor, optional): ByteTensor with shape + [bs, num_keys]. Same in `nn.MultiheadAttention.forward`. + Defaults to None. Returns: Tensor: forwarded results with shape [num_queries, bs, embed_dims]. @@ -113,7 +114,7 @@ class ConvBNPositionalEncoding(nn.Module): Args: input_channel (int): input features dim. - num_pos_feats (int): output position features dim. + num_pos_feats (int, optional): output position features dim. Defaults to 288 to be consistent with seed features dim. """ diff --git a/mmdet3d/models/model_utils/vote_module.py b/mmdet3d/models/model_utils/vote_module.py index 75ca537ccb..5cc52ad9d4 100644 --- a/mmdet3d/models/model_utils/vote_module.py +++ b/mmdet3d/models/model_utils/vote_module.py @@ -14,22 +14,25 @@ class VoteModule(nn.Module): Args: in_channels (int): Number of channels of seed point features. - vote_per_seed (int): Number of votes generated from each seed point. - gt_per_seed (int): Number of ground truth votes generated - from each seed point. - num_points (int): Number of points to be used for voting. - conv_channels (tuple[int]): Out channels of vote - generating convolution. - conv_cfg (dict): Config of convolution. + vote_per_seed (int, optional): Number of votes generated from + each seed point. Default: 1. + gt_per_seed (int, optional): Number of ground truth votes generated + from each seed point. Default: 3. + num_points (int, optional): Number of points to be used for voting. + Default: 1. + conv_channels (tuple[int], optional): Out channels of vote + generating convolution. Default: (16, 16). + conv_cfg (dict, optional): Config of convolution. Default: dict(type='Conv1d'). - norm_cfg (dict): Config of normalization. + norm_cfg (dict, optional): Config of normalization. Default: dict(type='BN1d'). - norm_feats (bool): Whether to normalize features. + norm_feats (bool, optional): Whether to normalize features. Default: True. - with_res_feat (bool): Whether to predict residual features. + with_res_feat (bool, optional): Whether to predict residual features. Default: True. - vote_xyz_range (list[float], None): The range of points translation. - vote_loss (dict): Config of vote loss. + vote_xyz_range (list[float], optional): + The range of points translation. Default: None. + vote_loss (dict, optional): Config of vote loss. Default: None. """ def __init__(self, @@ -95,10 +98,10 @@ def forward(self, seed_points, seed_feats): Returns: tuple[torch.Tensor]: - - vote_points: Voted xyz based on the seed points \ + - vote_points: Voted xyz based on the seed points with shape (B, M, 3), ``M=num_seed*vote_per_seed``. - - vote_features: Voted features based on the seed points with \ - shape (B, C, M) where ``M=num_seed*vote_per_seed``, \ + - vote_features: Voted features based on the seed points with + shape (B, C, M) where ``M=num_seed*vote_per_seed``, ``C=vote_feature_dim``. """ if self.num_points != -1: diff --git a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py index e80e817d94..890d5b3d40 100644 --- a/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py +++ b/mmdet3d/models/roi_heads/bbox_heads/h3d_bbox_head.py @@ -324,16 +324,16 @@ def loss(self, Args: bbox_preds (dict): Predictions from forward of h3d bbox head. points (list[torch.Tensor]): Input points. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic mask. - pts_instance_mask (None | list[torch.Tensor]): Point-wise + pts_instance_mask (list[torch.Tensor]): Point-wise instance mask. img_metas (list[dict]): Contain pcd and img's meta info. rpn_targets (Tuple) : Targets generated by rpn head. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. Returns: @@ -502,7 +502,7 @@ def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points, box_dim=bbox.shape[-1], with_yaw=self.bbox_coder.with_rot, origin=(0.5, 0.5, 0.5)) - box_indices = bbox.points_in_boxes_batch(points) + box_indices = bbox.points_in_boxes_all(points) corner3d = bbox.corners minmax_box3d = corner3d.new(torch.Size((corner3d.shape[0], 6))) @@ -560,25 +560,25 @@ def get_proposal_stage_loss(self, Args: bbox_preds (dict): Predictions from forward of vote head. - size_class_targets (torch.Tensor): Ground truth \ + size_class_targets (torch.Tensor): Ground truth size class of each prediction bounding box. - size_res_targets (torch.Tensor): Ground truth \ + size_res_targets (torch.Tensor): Ground truth size residual of each prediction bounding box. - dir_class_targets (torch.Tensor): Ground truth \ + dir_class_targets (torch.Tensor): Ground truth direction class of each prediction bounding box. - dir_res_targets (torch.Tensor): Ground truth \ + dir_res_targets (torch.Tensor): Ground truth direction residual of each prediction bounding box. - center_targets (torch.Tensor): Ground truth center \ + center_targets (torch.Tensor): Ground truth center of each prediction bounding box. - mask_targets (torch.Tensor): Validation of each \ + mask_targets (torch.Tensor): Validation of each prediction bounding box. - objectness_targets (torch.Tensor): Ground truth \ + objectness_targets (torch.Tensor): Ground truth objectness label of each prediction bounding box. - objectness_weights (torch.Tensor): Weights of objectness \ + objectness_weights (torch.Tensor): Weights of objectness loss for each prediction bounding box. - box_loss_weights (torch.Tensor): Weights of regression \ + box_loss_weights (torch.Tensor): Weights of regression loss for each prediction bounding box. - valid_gt_weights (torch.Tensor): Validation of each \ + valid_gt_weights (torch.Tensor): Validation of each ground truth bounding box. Returns: @@ -663,12 +663,12 @@ def get_targets(self, Args: points (list[torch.Tensor]): Points of each batch. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each batch. gt_labels_3d (list[torch.Tensor]): Labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): Point-wise instance + pts_instance_mask (list[torch.Tensor]): Point-wise instance label of each batch. bbox_preds (torch.Tensor): Bounding box predictions of vote head. @@ -769,22 +769,22 @@ def get_targets_single(self, Args: points (torch.Tensor): Points of each batch. - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes of each batch. gt_labels_3d (torch.Tensor): Labels of each batch. - pts_semantic_mask (None | torch.Tensor): Point-wise semantic + pts_semantic_mask (torch.Tensor): Point-wise semantic label of each batch. - pts_instance_mask (None | torch.Tensor): Point-wise instance + pts_instance_mask (torch.Tensor): Point-wise instance label of each batch. aggregated_points (torch.Tensor): Aggregated points from vote aggregation layer. pred_surface_center (torch.Tensor): Prediction of surface center. pred_line_center (torch.Tensor): Prediction of line center. - pred_obj_surface_center (torch.Tensor): Objectness prediction \ + pred_obj_surface_center (torch.Tensor): Objectness prediction of surface center. - pred_obj_line_center (torch.Tensor): Objectness prediction of \ + pred_obj_line_center (torch.Tensor): Objectness prediction of line center. - pred_surface_sem (torch.Tensor): Semantic prediction of \ + pred_surface_sem (torch.Tensor): Semantic prediction of surface center. pred_line_sem (torch.Tensor): Semantic prediction of line center. Returns: diff --git a/mmdet3d/models/roi_heads/h3d_roi_head.py b/mmdet3d/models/roi_heads/h3d_roi_head.py index ba5ef1e7b0..4bf8cf36b6 100644 --- a/mmdet3d/models/roi_heads/h3d_roi_head.py +++ b/mmdet3d/models/roi_heads/h3d_roi_head.py @@ -65,15 +65,15 @@ def forward_train(self, feats_dict (dict): Contains features from the first stage. img_metas (list[dict]): Contain pcd and img's meta info. points (list[torch.Tensor]): Input points. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic mask. - pts_instance_mask (None | list[torch.Tensor]): Point-wise + pts_instance_mask (list[torch.Tensor]): Point-wise instance mask. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify - which bounding. + gt_bboxes_ignore (list[torch.Tensor]): Specify + which bounding boxes to ignore. Returns: dict: losses from each head. diff --git a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py index e3b4e453e0..fbdd2f0da3 100644 --- a/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py +++ b/mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py @@ -83,15 +83,15 @@ def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d): sample. Args: - voxel_centers (torch.Tensor): The center of voxels in shape \ + voxel_centers (torch.Tensor): The center of voxels in shape (voxel_num, 3). - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \ + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in shape (box_num, 7). - gt_labels_3d (torch.Tensor): Class labels of ground truths in \ + gt_labels_3d (torch.Tensor): Class labels of ground truths in shape (box_num). Returns: - tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] \ + tuple[torch.Tensor]: Segmentation targets with shape [voxel_num] part prediction targets with shape [voxel_num, 3] """ gt_bboxes_3d = gt_bboxes_3d.to(voxel_centers.device) @@ -99,8 +99,8 @@ def get_targets_single(self, voxel_centers, gt_bboxes_3d, gt_labels_3d): part_targets = voxel_centers.new_zeros((voxel_centers.shape[0], 3), dtype=torch.float32) - box_idx = gt_bboxes_3d.points_in_boxes(voxel_centers) - enlarge_box_idx = enlarged_gt_boxes.points_in_boxes( + box_idx = gt_bboxes_3d.points_in_boxes_part(voxel_centers) + enlarge_box_idx = enlarged_gt_boxes.points_in_boxes_part( voxel_centers).long() gt_labels_pad = F.pad( @@ -131,19 +131,19 @@ def get_targets(self, voxels_dict, gt_bboxes_3d, gt_labels_3d): """generate segmentation and part prediction targets. Args: - voxel_centers (torch.Tensor): The center of voxels in shape \ + voxel_centers (torch.Tensor): The center of voxels in shape (voxel_num, 3). - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in \ + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes in shape (box_num, 7). - gt_labels_3d (torch.Tensor): Class labels of ground truths in \ + gt_labels_3d (torch.Tensor): Class labels of ground truths in shape (box_num). Returns: dict: Prediction targets - - seg_targets (torch.Tensor): Segmentation targets \ + - seg_targets (torch.Tensor): Segmentation targets with shape [voxel_num]. - - part_targets (torch.Tensor): Part prediction targets \ + - part_targets (torch.Tensor): Part prediction targets with shape [voxel_num, 3]. """ batch_size = len(gt_labels_3d) diff --git a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py index 0d8bfc39a2..c930ee9a98 100644 --- a/mmdet3d/models/roi_heads/mask_heads/primitive_head.py +++ b/mmdet3d/models/roi_heads/mask_heads/primitive_head.py @@ -198,15 +198,15 @@ def loss(self, Args: bbox_preds (dict): Predictions from forward of primitive head. points (list[torch.Tensor]): Input points. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each sample. gt_labels_3d (list[torch.Tensor]): Labels of each sample. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic mask. - pts_instance_mask (None | list[torch.Tensor]): Point-wise + pts_instance_mask (list[torch.Tensor]): Point-wise instance mask. img_metas (list[dict]): Contain pcd and img's meta info. - gt_bboxes_ignore (None | list[torch.Tensor]): Specify + gt_bboxes_ignore (list[torch.Tensor]): Specify which bounding. Returns: @@ -266,12 +266,12 @@ def get_targets(self, Args: points (list[torch.Tensor]): Points of each batch. - gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth \ + gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth bboxes of each batch. gt_labels_3d (list[torch.Tensor]): Labels of each batch. - pts_semantic_mask (None | list[torch.Tensor]): Point-wise semantic + pts_semantic_mask (list[torch.Tensor]): Point-wise semantic label of each batch. - pts_instance_mask (None | list[torch.Tensor]): Point-wise instance + pts_instance_mask (list[torch.Tensor]): Point-wise instance label of each batch. bbox_preds (dict): Predictions from forward of primitive head. @@ -333,12 +333,12 @@ def get_targets_single(self, Args: points (torch.Tensor): Points of each batch. - gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth \ + gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth boxes of each batch. gt_labels_3d (torch.Tensor): Labels of each batch. - pts_semantic_mask (None | torch.Tensor): Point-wise semantic + pts_semantic_mask (torch.Tensor): Point-wise semantic label of each batch. - pts_instance_mask (None | torch.Tensor): Point-wise instance + pts_instance_mask (torch.Tensor): Point-wise instance label of each batch. Returns: @@ -355,7 +355,7 @@ def get_targets_single(self, # Generate pts_semantic_mask and pts_instance_mask when they are None if pts_semantic_mask is None or pts_instance_mask is None: - points2box_mask = gt_bboxes_3d.points_in_boxes_batch(points) + points2box_mask = gt_bboxes_3d.points_in_boxes_all(points) assignment = points2box_mask.argmax(1) background_mask = points2box_mask.max(1)[0] == 0 diff --git a/mmdet3d/models/segmentors/base.py b/mmdet3d/models/segmentors/base.py index 50b56bbc6b..6b233c2555 100644 --- a/mmdet3d/models/segmentors/base.py +++ b/mmdet3d/models/segmentors/base.py @@ -78,7 +78,7 @@ def show_results(self, Args: data (list[dict]): Input points and the information of the sample. result (list[dict]): Prediction results. - palette (list[list[int]]] | np.ndarray | None): The palette of + palette (list[list[int]]] | np.ndarray): The palette of segmentation map. If None is given, random palette will be generated. Default: None out_dir (str): Output directory of visualization result. diff --git a/mmdet3d/models/segmentors/encoder_decoder.py b/mmdet3d/models/segmentors/encoder_decoder.py index e64dfc3930..196904ad1e 100644 --- a/mmdet3d/models/segmentors/encoder_decoder.py +++ b/mmdet3d/models/segmentors/encoder_decoder.py @@ -187,7 +187,7 @@ def _input_generation(coords, use_normalized_coord=False): """Generating model input. - Generate input by subtracting patch center and adding additional \ + Generate input by subtracting patch center and adding additional features. Currently support colors and normalized xyz as features. Args: @@ -195,7 +195,7 @@ def _input_generation(coords, patch_center (torch.Tensor): Center coordinate of the patch. coord_max (torch.Tensor): Max coordinate of all 3D points. feats (torch.Tensor): Features of sampled points of shape [S, C]. - use_normalized_coord (bool, optional): Whether to use normalized \ + use_normalized_coord (bool, optional): Whether to use normalized xyz as additional features. Defaults to False. Returns: @@ -233,17 +233,17 @@ def _sliding_patch_generation(self, block_size (float, optional): Size of a patch to sample. sample_rate (float, optional): Stride used in sliding patch. Defaults to 0.5. - use_normalized_coord (bool, optional): Whether to use normalized \ + use_normalized_coord (bool, optional): Whether to use normalized xyz as additional features. Defaults to False. eps (float, optional): A value added to patch boundary to guarantee - points coverage. Default 1e-3. + points coverage. Defaults to 1e-3. Returns: np.ndarray | np.ndarray: - - patch_points (torch.Tensor): Points of different patches of \ + - patch_points (torch.Tensor): Points of different patches of shape [K, N, 3+C]. - - patch_idxs (torch.Tensor): Index of each point in \ + - patch_idxs (torch.Tensor): Index of each point in `patch_points`, of shape [K, N]. """ device = points.device diff --git a/mmdet3d/models/utils/clip_sigmoid.py b/mmdet3d/models/utils/clip_sigmoid.py index 3936d7de29..3afd4edbef 100644 --- a/mmdet3d/models/utils/clip_sigmoid.py +++ b/mmdet3d/models/utils/clip_sigmoid.py @@ -7,8 +7,8 @@ def clip_sigmoid(x, eps=1e-4): Args: x (torch.Tensor): Input feature map with the shape of [B, N, H, W]. - eps (float): Lower bound of the range to be clamped to. Defaults - to 1e-4. + eps (float, optional): Lower bound of the range to be clamped to. + Defaults to 1e-4. Returns: torch.Tensor: Feature map after sigmoid. diff --git a/mmdet3d/models/utils/mlp.py b/mmdet3d/models/utils/mlp.py index 55ea3885e4..0b499bb46f 100644 --- a/mmdet3d/models/utils/mlp.py +++ b/mmdet3d/models/utils/mlp.py @@ -10,15 +10,15 @@ class MLP(BaseModule): Pass features (B, C, N) through an MLP. Args: - in_channels (int): Number of channels of input features. + in_channels (int, optional): Number of channels of input features. Default: 18. - conv_channels (tuple[int]): Out channels of the convolution. + conv_channels (tuple[int], optional): Out channels of the convolution. Default: (256, 256). - conv_cfg (dict): Config of convolution. + conv_cfg (dict, optional): Config of convolution. Default: dict(type='Conv1d'). - norm_cfg (dict): Config of normalization. + norm_cfg (dict, optional): Config of normalization. Default: dict(type='BN1d'). - act_cfg (dict): Config of activation. + act_cfg (dict, optional): Config of activation. Default: dict(type='ReLU'). """ diff --git a/mmdet3d/models/voxel_encoders/pillar_encoder.py b/mmdet3d/models/voxel_encoders/pillar_encoder.py index 109a208757..ddba9efbf7 100644 --- a/mmdet3d/models/voxel_encoders/pillar_encoder.py +++ b/mmdet3d/models/voxel_encoders/pillar_encoder.py @@ -33,7 +33,7 @@ class PillarFeatureNet(nn.Module): Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). mode (str, optional): The mode to gather point features. Options are 'max' or 'avg'. Defaults to 'max'. - legacy (bool): Whether to use the new behavior or + legacy (bool, optional): Whether to use the new behavior or the original behavior. Defaults to True. """ diff --git a/mmdet3d/models/voxel_encoders/utils.py b/mmdet3d/models/voxel_encoders/utils.py index 5055b06196..8c54fc2d16 100644 --- a/mmdet3d/models/voxel_encoders/utils.py +++ b/mmdet3d/models/voxel_encoders/utils.py @@ -113,11 +113,12 @@ class PFNLayer(nn.Module): Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. - norm_cfg (dict): Config dict of normalization layers - last_layer (bool): If last_layer, there is no concatenation of - features. - mode (str): Pooling model to gather features inside voxels. - Default to 'max'. + norm_cfg (dict, optional): Config dict of normalization layers. + Defaults to dict(type='BN1d', eps=1e-3, momentum=0.01). + last_layer (bool, optional): If last_layer, there is no + concatenation of features. Defaults to False. + mode (str, optional): Pooling model to gather features inside voxels. + Defaults to 'max'. """ def __init__(self, diff --git a/mmdet3d/models/voxel_encoders/voxel_encoder.py b/mmdet3d/models/voxel_encoders/voxel_encoder.py index fcc1283e09..45be8b6c1c 100644 --- a/mmdet3d/models/voxel_encoders/voxel_encoder.py +++ b/mmdet3d/models/voxel_encoders/voxel_encoder.py @@ -17,7 +17,7 @@ class HardSimpleVFE(nn.Module): It simply averages the values of points in a voxel. Args: - num_features (int): Number of features to use. Default: 4. + num_features (int, optional): Number of features to use. Default: 4. """ def __init__(self, num_features=4): @@ -93,25 +93,27 @@ class DynamicVFE(nn.Module): The number of points inside the voxel varies. Args: - in_channels (int): Input channels of VFE. Defaults to 4. - feat_channels (list(int)): Channels of features in VFE. - with_distance (bool): Whether to use the L2 distance of points to the - origin point. Default False. - with_cluster_center (bool): Whether to use the distance to cluster - center of points inside a voxel. Default to False. - with_voxel_center (bool): Whether to use the distance to center of - voxel for each points inside a voxel. Default to False. - voxel_size (tuple[float]): Size of a single voxel. Default to - (0.2, 0.2, 4). - point_cloud_range (tuple[float]): The range of points or voxels. - Default to (0, -40, -3, 70.4, 40, 1). - norm_cfg (dict): Config dict of normalization layers. - mode (str): The mode when pooling features of points inside a voxel. - Available options include 'max' and 'avg'. Default to 'max'. - fusion_layer (dict | None): The config dict of fusion layer used in - multi-modal detectors. Default to None. - return_point_feats (bool): Whether to return the features of each - points. Default to False. + in_channels (int, optional): Input channels of VFE. Defaults to 4. + feat_channels (list(int), optional): Channels of features in VFE. + with_distance (bool, optional): Whether to use the L2 distance of + points to the origin point. Defaults to False. + with_cluster_center (bool, optional): Whether to use the distance + to cluster center of points inside a voxel. Defaults to False. + with_voxel_center (bool, optional): Whether to use the distance + to center of voxel for each points inside a voxel. + Defaults to False. + voxel_size (tuple[float], optional): Size of a single voxel. + Defaults to (0.2, 0.2, 4). + point_cloud_range (tuple[float], optional): The range of points + or voxels. Defaults to (0, -40, -3, 70.4, 40, 1). + norm_cfg (dict, optional): Config dict of normalization layers. + mode (str, optional): The mode when pooling features of points + inside a voxel. Available options include 'max' and 'avg'. + Defaults to 'max'. + fusion_layer (dict, optional): The config dict of fusion + layer used in multi-modal detectors. Defaults to None. + return_point_feats (bool, optional): Whether to return the features + of each points. Defaults to False. """ def __init__(self, @@ -292,25 +294,26 @@ class HardVFE(nn.Module): image feature into voxel features in a point-wise manner. Args: - in_channels (int): Input channels of VFE. Defaults to 4. - feat_channels (list(int)): Channels of features in VFE. - with_distance (bool): Whether to use the L2 distance of points to the - origin point. Default False. - with_cluster_center (bool): Whether to use the distance to cluster - center of points inside a voxel. Default to False. - with_voxel_center (bool): Whether to use the distance to center of - voxel for each points inside a voxel. Default to False. - voxel_size (tuple[float]): Size of a single voxel. Default to - (0.2, 0.2, 4). - point_cloud_range (tuple[float]): The range of points or voxels. - Default to (0, -40, -3, 70.4, 40, 1). - norm_cfg (dict): Config dict of normalization layers. - mode (str): The mode when pooling features of points inside a voxel. - Available options include 'max' and 'avg'. Default to 'max'. - fusion_layer (dict | None): The config dict of fusion layer used in - multi-modal detectors. Default to None. - return_point_feats (bool): Whether to return the features of each - points. Default to False. + in_channels (int, optional): Input channels of VFE. Defaults to 4. + feat_channels (list(int), optional): Channels of features in VFE. + with_distance (bool, optional): Whether to use the L2 distance + of points to the origin point. Defaults to False. + with_cluster_center (bool, optional): Whether to use the distance + to cluster center of points inside a voxel. Defaults to False. + with_voxel_center (bool, optional): Whether to use the distance to + center of voxel for each points inside a voxel. Defaults to False. + voxel_size (tuple[float], optional): Size of a single voxel. + Defaults to (0.2, 0.2, 4). + point_cloud_range (tuple[float], optional): The range of points + or voxels. Defaults to (0, -40, -3, 70.4, 40, 1). + norm_cfg (dict, optional): Config dict of normalization layers. + mode (str, optional): The mode when pooling features of points inside a + voxel. Available options include 'max' and 'avg'. + Defaults to 'max'. + fusion_layer (dict, optional): The config dict of fusion layer + used in multi-modal detectors. Defaults to None. + return_point_feats (bool, optional): Whether to return the + features of each points. Defaults to False. """ def __init__(self, diff --git a/mmdet3d/ops/__init__.py b/mmdet3d/ops/__init__.py index bf9988abe8..38e2ea7367 100644 --- a/mmdet3d/ops/__init__.py +++ b/mmdet3d/ops/__init__.py @@ -17,8 +17,8 @@ PAConvSAModule, PAConvSAModuleMSG, PointFPModule, PointSAModule, PointSAModuleMSG, build_sa_module) -from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch, - points_in_boxes_cpu, points_in_boxes_gpu) +from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all, + points_in_boxes_cpu, points_in_boxes_part) from .sparse_block import (SparseBasicBlock, SparseBottleneck, make_sparse_convmodule) from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization @@ -29,12 +29,12 @@ 'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss', 'SigmoidFocalLoss', 'SparseBasicBlock', 'SparseBottleneck', - 'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu', + 'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu', 'make_sparse_convmodule', 'ball_query', 'knn', 'furthest_point_sample', 'furthest_point_sample_with_dist', 'three_interpolate', 'three_nn', 'gather_points', 'grouping_operation', 'group_points', 'GroupAll', 'QueryAndGroup', 'PointSAModule', 'PointSAModuleMSG', 'PointFPModule', - 'points_in_boxes_batch', 'get_compiler_version', 'assign_score_withk', + 'points_in_boxes_all', 'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version', 'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA', 'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule', 'PAConvCUDASAModuleMSG' diff --git a/mmdet3d/ops/furthest_point_sample/points_sampler.py b/mmdet3d/ops/furthest_point_sample/points_sampler.py index 9a3bd2ae42..410d513780 100644 --- a/mmdet3d/ops/furthest_point_sample/points_sampler.py +++ b/mmdet3d/ops/furthest_point_sample/points_sampler.py @@ -36,13 +36,13 @@ class Points_Sampler(nn.Module): Args: num_point (list[int]): Number of sample points. - fps_mod_list (list[str]: Type of FPS method, valid mod + fps_mod_list (list[str], optional): Type of FPS method, valid mod ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. F-FPS: using feature distances for FPS. D-FPS: using Euclidean distances of points for FPS. FS: using F-FPS and D-FPS simultaneously. - fps_sample_range_list (list[int]): Range of points to apply FPS. - Default: [-1]. + fps_sample_range_list (list[int], optional): + Range of points to apply FPS. Default: [-1]. """ def __init__(self, diff --git a/mmdet3d/ops/furthest_point_sample/utils.py b/mmdet3d/ops/furthest_point_sample/utils.py index 4ca235e13b..8727b9d4e8 100644 --- a/mmdet3d/ops/furthest_point_sample/utils.py +++ b/mmdet3d/ops/furthest_point_sample/utils.py @@ -7,7 +7,7 @@ def calc_square_dist(point_feat_a, point_feat_b, norm=True): Args: point_feat_a (Tensor): (B, N, C) Feature vector of each point. point_feat_b (Tensor): (B, M, C) Feature vector of each point. - norm (Bool): Whether to normalize the distance. + norm (Bool, optional): Whether to normalize the distance. Default: True. Returns: diff --git a/mmdet3d/ops/group_points/group_points.py b/mmdet3d/ops/group_points/group_points.py index 88122a88d5..07fd143901 100644 --- a/mmdet3d/ops/group_points/group_points.py +++ b/mmdet3d/ops/group_points/group_points.py @@ -14,22 +14,22 @@ class QueryAndGroup(nn.Module): Groups with a ball query of radius Args: - max_radius (float | None): The maximum radius of the balls. + max_radius (float): The maximum radius of the balls. If None is given, we will use kNN sampling instead of ball query. sample_num (int): Maximum number of features to gather in the ball. - min_radius (float): The minimum radius of the balls. - use_xyz (bool): Whether to use xyz. + min_radius (float, optional): The minimum radius of the balls. + Default: 0. + use_xyz (bool, optional): Whether to use xyz. Default: True. - return_grouped_xyz (bool): Whether to return grouped xyz. + return_grouped_xyz (bool, optional): Whether to return grouped xyz. Default: False. - normalize_xyz (bool): Whether to normalize xyz. + normalize_xyz (bool, optional): Whether to normalize xyz. Default: False. - uniform_sample (bool): Whether to sample uniformly. + uniform_sample (bool, optional): Whether to sample uniformly. Default: False - return_unique_cnt (bool): Whether to return the count of - unique samples. - Default: False. - return_grouped_idx (bool): Whether to return grouped idx. + return_unique_cnt (bool, optional): Whether to return the count of + unique samples. Default: False. + return_grouped_idx (bool, optional): Whether to return grouped idx. Default: False. """ diff --git a/mmdet3d/ops/iou3d/iou3d_utils.py b/mmdet3d/ops/iou3d/iou3d_utils.py index 6f36019e72..09bb33cdd6 100644 --- a/mmdet3d/ops/iou3d/iou3d_utils.py +++ b/mmdet3d/ops/iou3d/iou3d_utils.py @@ -4,7 +4,7 @@ def boxes_iou_bev(boxes_a, boxes_b): - """Calculate boxes IoU in the bird view. + """Calculate boxes IoU in the Bird's Eye View. Args: boxes_a (torch.Tensor): Input boxes a with shape (M, 5). @@ -22,24 +22,29 @@ def boxes_iou_bev(boxes_a, boxes_b): return ans_iou -def nms_gpu(boxes, scores, thresh, pre_maxsize=None, post_max_size=None): - """Nms function with gpu implementation. +def nms_gpu(boxes, scores, thresh, pre_max_size=None, post_max_size=None): + """NMS function GPU implementation (for BEV boxes). The overlap of two + boxes for IoU calculation is defined as the exact overlapping area of the + two boxes. In this function, one can also set `pre_max_size` and + `post_max_size`. Args: boxes (torch.Tensor): Input boxes with the shape of [N, 5] ([x1, y1, x2, y2, ry]). scores (torch.Tensor): Scores of boxes with the shape of [N]. thresh (int): Threshold. - pre_maxsize (int): Max size of boxes before nms. Default: None. - post_maxsize (int): Max size of boxes after nms. Default: None. + pre_max_size (int, optional): Max size of boxes before NMS. + Default: None. + post_max_size (int, optional): Max size of boxes after NMS. + Default: None. Returns: - torch.Tensor: Indexes after nms. + torch.Tensor: Indexes after NMS. """ order = scores.sort(0, descending=True)[1] - if pre_maxsize is not None: - order = order[:pre_maxsize] + if pre_max_size is not None: + order = order[:pre_max_size] boxes = boxes[order].contiguous() keep = torch.zeros(boxes.size(0), dtype=torch.long) @@ -51,12 +56,14 @@ def nms_gpu(boxes, scores, thresh, pre_maxsize=None, post_max_size=None): def nms_normal_gpu(boxes, scores, thresh): - """Normal non maximum suppression on GPU. + """Normal NMS function GPU implementation (for BEV boxes). The overlap of + two boxes for IoU calculation is defined as the exact overlapping area of + the two boxes WITH their yaw angle set to 0. Args: boxes (torch.Tensor): Input boxes with shape (N, 5). scores (torch.Tensor): Scores of predicted boxes with shape (N). - thresh (torch.Tensor): Threshold of non maximum suppression. + thresh (torch.Tensor): Threshold of NMS. Returns: torch.Tensor: Remaining indices with scores in descending order. diff --git a/mmdet3d/ops/pointnet_modules/paconv_sa_module.py b/mmdet3d/ops/pointnet_modules/paconv_sa_module.py index e2deb7ef65..6a4308cfdd 100644 --- a/mmdet3d/ops/pointnet_modules/paconv_sa_module.py +++ b/mmdet3d/ops/pointnet_modules/paconv_sa_module.py @@ -239,11 +239,12 @@ def forward( Args: points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. - features (Tensor): (B, C, N) features of each point. + features (Tensor, optional): (B, C, N) features of each point. Default: None. - indices (Tensor): (B, num_point) Index of the features. + indices (Tensor, optional): (B, num_point) Index of the features. + Default: None. + target_xyz (Tensor, optional): (B, M, 3) new coords of the outputs. Default: None. - target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs. Returns: Tensor: (B, M, 3) where M is the number of points. diff --git a/mmdet3d/ops/pointnet_modules/point_fp_module.py b/mmdet3d/ops/pointnet_modules/point_fp_module.py index 6f4ba8eac0..d97d5ffe31 100644 --- a/mmdet3d/ops/pointnet_modules/point_fp_module.py +++ b/mmdet3d/ops/pointnet_modules/point_fp_module.py @@ -15,7 +15,7 @@ class PointFPModule(BaseModule): Args: mlp_channels (list[int]): List of mlp channels. - norm_cfg (dict): Type of normalization method. + norm_cfg (dict, optional): Type of normalization method. Default: dict(type='BN2d'). """ diff --git a/mmdet3d/ops/pointnet_modules/point_sa_module.py b/mmdet3d/ops/pointnet_modules/point_sa_module.py index 687af4daa2..293d6242d7 100644 --- a/mmdet3d/ops/pointnet_modules/point_sa_module.py +++ b/mmdet3d/ops/pointnet_modules/point_sa_module.py @@ -18,25 +18,25 @@ class BasePointSAModule(nn.Module): sample_nums (list[int]): Number of samples in each ball query. mlp_channels (list[list[int]]): Specify of the pointnet before the global pooling for each scale. - fps_mod (list[str]: Type of FPS method, valid mod + fps_mod (list[str], optional): Type of FPS method, valid mod ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. F-FPS: using feature distances for FPS. D-FPS: using Euclidean distances of points for FPS. FS: using F-FPS and D-FPS simultaneously. - fps_sample_range_list (list[int]): Range of points to apply FPS. - Default: [-1]. - dilated_group (bool): Whether to use dilated ball query. + fps_sample_range_list (list[int], optional): + Range of points to apply FPS. Default: [-1]. + dilated_group (bool, optional): Whether to use dilated ball query. Default: False. - use_xyz (bool): Whether to use xyz. + use_xyz (bool, optional): Whether to use xyz. Default: True. - pool_mod (str): Type of pooling method. + pool_mod (str, optional): Type of pooling method. Default: 'max_pool'. - normalize_xyz (bool): Whether to normalize local XYZ with radius. - Default: False. - grouper_return_grouped_xyz (bool): Whether to return grouped xyz in - `QueryAndGroup`. Defaults to False. - grouper_return_grouped_idx (bool): Whether to return grouped idx in - `QueryAndGroup`. Defaults to False. + normalize_xyz (bool, optional): Whether to normalize local XYZ + with radius. Default: False. + grouper_return_grouped_xyz (bool, optional): Whether to return + grouped xyz in `QueryAndGroup`. Defaults to False. + grouper_return_grouped_idx (bool, optional): Whether to return + grouped idx in `QueryAndGroup`. Defaults to False. """ def __init__(self, @@ -111,9 +111,7 @@ def _sample_points(self, points_xyz, features, indices, target_xyz): Args: points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. features (Tensor): (B, C, N) features of each point. - Default: None. indices (Tensor): (B, num_point) Index of the features. - Default: None. target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs. Returns: @@ -169,11 +167,12 @@ def forward( Args: points_xyz (Tensor): (B, N, 3) xyz coordinates of the features. - features (Tensor): (B, C, N) features of each point. + features (Tensor, optional): (B, C, N) features of each point. Default: None. - indices (Tensor): (B, num_point) Index of the features. + indices (Tensor, optional): (B, num_point) Index of the features. + Default: None. + target_xyz (Tensor, optional): (B, M, 3) new coords of the outputs. Default: None. - target_xyz (Tensor): (B, M, 3) new_xyz coordinates of the outputs. Returns: Tensor: (B, M, 3) where M is the number of points. @@ -223,26 +222,26 @@ class PointSAModuleMSG(BasePointSAModule): sample_nums (list[int]): Number of samples in each ball query. mlp_channels (list[list[int]]): Specify of the pointnet before the global pooling for each scale. - fps_mod (list[str]: Type of FPS method, valid mod + fps_mod (list[str], optional): Type of FPS method, valid mod ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. F-FPS: using feature distances for FPS. D-FPS: using Euclidean distances of points for FPS. FS: using F-FPS and D-FPS simultaneously. - fps_sample_range_list (list[int]): Range of points to apply FPS. - Default: [-1]. - dilated_group (bool): Whether to use dilated ball query. + fps_sample_range_list (list[int], optional): Range of points to + apply FPS. Default: [-1]. + dilated_group (bool, optional): Whether to use dilated ball query. Default: False. - norm_cfg (dict): Type of normalization method. + norm_cfg (dict, optional): Type of normalization method. Default: dict(type='BN2d'). - use_xyz (bool): Whether to use xyz. + use_xyz (bool, optional): Whether to use xyz. Default: True. - pool_mod (str): Type of pooling method. + pool_mod (str, optional): Type of pooling method. Default: 'max_pool'. - normalize_xyz (bool): Whether to normalize local XYZ with radius. - Default: False. - bias (bool | str): If specified as `auto`, it will be decided by the - norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise - False. Default: "auto". + normalize_xyz (bool, optional): Whether to normalize local XYZ + with radius. Default: False. + bias (bool | str, optional): If specified as `auto`, it will be + decided by `norm_cfg`. `bias` will be set as True if + `norm_cfg` is None, otherwise False. Default: 'auto'. """ def __init__(self, @@ -298,24 +297,24 @@ class PointSAModule(PointSAModuleMSG): Args: mlp_channels (list[int]): Specify of the pointnet before the global pooling for each scale. - num_point (int): Number of points. + num_point (int, optional): Number of points. Default: None. - radius (float): Radius to group with. + radius (float, optional): Radius to group with. Default: None. - num_sample (int): Number of samples in each ball query. + num_sample (int, optional): Number of samples in each ball query. Default: None. - norm_cfg (dict): Type of normalization method. + norm_cfg (dict, optional): Type of normalization method. Default: dict(type='BN2d'). - use_xyz (bool): Whether to use xyz. + use_xyz (bool, optional): Whether to use xyz. Default: True. - pool_mod (str): Type of pooling method. + pool_mod (str, optional): Type of pooling method. Default: 'max_pool'. - fps_mod (list[str]: Type of FPS method, valid mod + fps_mod (list[str], optional): Type of FPS method, valid mod ['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS']. - fps_sample_range_list (list[int]): Range of points to apply FPS. - Default: [-1]. - normalize_xyz (bool): Whether to normalize local XYZ with radius. - Default: False. + fps_sample_range_list (list[int], optional): Range of points + to apply FPS. Default: [-1]. + normalize_xyz (bool, optional): Whether to normalize local XYZ + with radius. Default: False. """ def __init__(self, diff --git a/mmdet3d/ops/roiaware_pool3d/__init__.py b/mmdet3d/ops/roiaware_pool3d/__init__.py index aba9e18d37..aaa29eb3ea 100644 --- a/mmdet3d/ops/roiaware_pool3d/__init__.py +++ b/mmdet3d/ops/roiaware_pool3d/__init__.py @@ -1,8 +1,8 @@ -from .points_in_boxes import (points_in_boxes_batch, points_in_boxes_cpu, - points_in_boxes_gpu) +from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, + points_in_boxes_part) from .roiaware_pool3d import RoIAwarePool3d __all__ = [ - 'RoIAwarePool3d', 'points_in_boxes_gpu', 'points_in_boxes_cpu', - 'points_in_boxes_batch' + 'RoIAwarePool3d', 'points_in_boxes_part', 'points_in_boxes_cpu', + 'points_in_boxes_all' ] diff --git a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py index 14e16b9926..1240c20d30 100644 --- a/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py +++ b/mmdet3d/ops/roiaware_pool3d/points_in_boxes.py @@ -3,13 +3,13 @@ from . import roiaware_pool3d_ext -def points_in_boxes_gpu(points, boxes): - """Find points that are in boxes (CUDA) +def points_in_boxes_part(points, boxes): + """Find the box in which each point is (CUDA). Args: points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate boxes (torch.Tensor): [B, T, 7], - num_valid_boxes <= T, [x, y, z, dx, dy, dz, rz] in + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in LiDAR/DEPTH coordinate, (x, y, z) is the bottom center Returns: @@ -43,25 +43,26 @@ def points_in_boxes_gpu(points, boxes): if torch.cuda.current_device() != points_device: torch.cuda.set_device(points_device) - roiaware_pool3d_ext.points_in_boxes_gpu(boxes.contiguous(), - points.contiguous(), - box_idxs_of_pts) + roiaware_pool3d_ext.points_in_boxes_part(boxes.contiguous(), + points.contiguous(), + box_idxs_of_pts) return box_idxs_of_pts def points_in_boxes_cpu(points, boxes): - """Find points that are in boxes (CPU) + """Find all boxes in which each point is (CPU). The CPU version of + :meth:`points_in_boxes_all`. Args: points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate boxes (torch.Tensor): [B, T, 7], - num_valid_boxes <= T, [x, y, z, dx, dy, dz, rz], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], (x, y, z) is the bottom center. Returns: - box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0 + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. """ assert points.shape[0] == boxes.shape[0], \ f'Points and boxes should have the same batch size, ' \ @@ -86,17 +87,17 @@ def points_in_boxes_cpu(points, boxes): return point_indices -def points_in_boxes_batch(points, boxes): - """Find points that are in boxes (CUDA) +def points_in_boxes_all(points, boxes): + """Find all boxes in which each point is (CUDA). Args: points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate boxes (torch.Tensor): [B, T, 7], - num_valid_boxes <= T, [x, y, z, dx, dy, dz, rz], + num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz], (x, y, z) is the bottom center. Returns: - box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0 + box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0. """ assert boxes.shape[0] == points.shape[0], \ f'Points and boxes should have the same batch size, ' \ @@ -120,8 +121,8 @@ def points_in_boxes_batch(points, boxes): if torch.cuda.current_device() != points_device: torch.cuda.set_device(points_device) - roiaware_pool3d_ext.points_in_boxes_batch(boxes.contiguous(), - points.contiguous(), - box_idxs_of_pts) + roiaware_pool3d_ext.points_in_boxes_all(boxes.contiguous(), + points.contiguous(), + box_idxs_of_pts) return box_idxs_of_pts diff --git a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp index 7e5956b67e..f8c5494d2e 100644 --- a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp +++ b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp @@ -23,23 +23,23 @@ inline void lidar_to_local_coords_cpu(float shift_x, float shift_y, float rz, inline int check_pt_in_box3d_cpu(const float *pt, const float *box3d, float &local_x, float &local_y) { // param pt: (x, y, z) - // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the + // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate, cz in the // bottom center float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; - float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; - cz += dz / 2.0; // shift to the center since cz in box3d is the bottom center + float x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6]; + cz += z_size / 2.0; // shift to the center since cz in box3d is the bottom center - if (fabsf(z - cz) > dz / 2.0) return 0; + if (fabsf(z - cz) > z_size / 2.0) return 0; lidar_to_local_coords_cpu(x - cx, y - cy, rz, local_x, local_y); - float in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) & - (local_y > -dy / 2.0) & (local_y < dy / 2.0); + float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) & + (local_y > -y_size / 2.0) & (local_y < y_size / 2.0); return in_flag; } int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor pts_indices_tensor) { - // params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the + // params boxes: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate, z is the // bottom center, each box DO NOT overlaps params pts: (npoints, 3) [x, y, z] // in LiDAR coordinate params pts_indices: (N, npoints) diff --git a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu index 4fed2002f1..4b90897e3a 100644 --- a/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu +++ b/mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu @@ -32,25 +32,25 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y) { // param pt: (x, y, z) - // param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the + // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate, cz in the // bottom center float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; - float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; - cz += dz / 2.0; // shift to the center since cz in box3d is the bottom center + float x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6]; + cz += z_size / 2.0; // shift to the center since cz in box3d is the bottom center - if (fabsf(z - cz) > dz / 2.0) return 0; + if (fabsf(z - cz) > z_size / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); - float in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) & - (local_y > -dy / 2.0) & (local_y < dy / 2.0); + float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) & + (local_y > -y_size / 2.0) & (local_y < y_size / 2.0); return in_flag; } -__global__ void points_in_boxes_kernel(int batch_size, int boxes_num, - int pts_num, const float *boxes, - const float *pts, - int *box_idx_of_points) { - // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is +__global__ void points_in_boxes_part_kernel(int batch_size, int boxes_num, + int pts_num, const float *boxes, + const float *pts, + int *box_idx_of_points) { + // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 @@ -74,11 +74,11 @@ __global__ void points_in_boxes_kernel(int batch_size, int boxes_num, } } -__global__ void points_in_boxes_batch_kernel(int batch_size, int boxes_num, - int pts_num, const float *boxes, - const float *pts, - int *box_idx_of_points) { - // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is +__global__ void points_in_boxes_all_kernel(int batch_size, int boxes_num, + int pts_num, const float *boxes, + const float *pts, + int *box_idx_of_points) { + // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 @@ -102,10 +102,10 @@ __global__ void points_in_boxes_batch_kernel(int batch_size, int boxes_num, } } -void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, - const float *boxes, const float *pts, - int *box_idx_of_points) { - // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is +void points_in_boxes_part_launcher(int batch_size, int boxes_num, int pts_num, + const float *boxes, const float *pts, + int *box_idx_of_points) { + // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 @@ -113,8 +113,8 @@ void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size); dim3 threads(THREADS_PER_BLOCK); - points_in_boxes_kernel<<>>(batch_size, boxes_num, pts_num, - boxes, pts, box_idx_of_points); + points_in_boxes_part_kernel<<>>(batch_size, boxes_num, pts_num, + boxes, pts, box_idx_of_points); err = cudaGetLastError(); if (cudaSuccess != err) { @@ -127,17 +127,17 @@ void points_in_boxes_launcher(int batch_size, int boxes_num, int pts_num, #endif } -void points_in_boxes_batch_launcher(int batch_size, int boxes_num, int pts_num, - const float *boxes, const float *pts, - int *box_idx_of_points) { - // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is +void points_in_boxes_all_launcher(int batch_size, int boxes_num, int pts_num, + const float *boxes, const float *pts, + int *box_idx_of_points) { + // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate, z is // the bottom center, each box params pts: (B, npoints, 3) [x, y, z] in // LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1 cudaError_t err; dim3 blocks(DIVUP(pts_num, THREADS_PER_BLOCK), batch_size); dim3 threads(THREADS_PER_BLOCK); - points_in_boxes_batch_kernel<<>>( + points_in_boxes_all_kernel<<>>( batch_size, boxes_num, pts_num, boxes, pts, box_idx_of_points); err = cudaGetLastError(); @@ -151,9 +151,9 @@ void points_in_boxes_batch_launcher(int batch_size, int boxes_num, int pts_num, #endif } -int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, - at::Tensor box_idx_of_points_tensor) { - // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is +int points_in_boxes_part(at::Tensor boxes_tensor, at::Tensor pts_tensor, + at::Tensor box_idx_of_points_tensor) { + // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate, z is // the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x, // y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default // -1 @@ -170,15 +170,15 @@ int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, const float *pts = pts_tensor.data_ptr(); int *box_idx_of_points = box_idx_of_points_tensor.data_ptr(); - points_in_boxes_launcher(batch_size, boxes_num, pts_num, boxes, pts, - box_idx_of_points); + points_in_boxes_part_launcher(batch_size, boxes_num, pts_num, boxes, pts, + box_idx_of_points); return 1; } -int points_in_boxes_batch(at::Tensor boxes_tensor, at::Tensor pts_tensor, - at::Tensor box_idx_of_points_tensor) { - // params boxes: (B, N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate, z is +int points_in_boxes_all(at::Tensor boxes_tensor, at::Tensor pts_tensor, + at::Tensor box_idx_of_points_tensor) { + // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate, z is // the bottom center. params pts: (B, npoints, 3) [x, y, z] in LiDAR // coordinate params boxes_idx_of_points: (B, npoints), default -1 @@ -194,8 +194,8 @@ int points_in_boxes_batch(at::Tensor boxes_tensor, at::Tensor pts_tensor, const float *pts = pts_tensor.data_ptr(); int *box_idx_of_points = box_idx_of_points_tensor.data_ptr(); - points_in_boxes_batch_launcher(batch_size, boxes_num, pts_num, boxes, pts, - box_idx_of_points); + points_in_boxes_all_launcher(batch_size, boxes_num, pts_num, boxes, pts, + box_idx_of_points); return 1; } diff --git a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp index cd743b18bb..607d783eb5 100644 --- a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp +++ b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp @@ -40,16 +40,16 @@ int roiaware_pool3d_gpu_backward(at::Tensor pts_idx_of_voxels, int points_in_boxes_cpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, at::Tensor pts_indices_tensor); -int points_in_boxes_gpu(at::Tensor boxes_tensor, at::Tensor pts_tensor, - at::Tensor box_idx_of_points_tensor); +int points_in_boxes_part(at::Tensor boxes_tensor, at::Tensor pts_tensor, + at::Tensor box_idx_of_points_tensor); -int points_in_boxes_batch(at::Tensor boxes_tensor, at::Tensor pts_tensor, - at::Tensor box_idx_of_points_tensor); +int points_in_boxes_all(at::Tensor boxes_tensor, at::Tensor pts_tensor, + at::Tensor box_idx_of_points_tensor); int roiaware_pool3d_gpu(at::Tensor rois, at::Tensor pts, at::Tensor pts_feature, at::Tensor argmax, at::Tensor pts_idx_of_voxels, at::Tensor pooled_features, int pool_method) { - // params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate + // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, ry] in LiDAR coordinate // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate // params pts_feature: (npoints, C) // params argmax: (N, out_x, out_y, out_z, C) @@ -127,10 +127,10 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("forward", &roiaware_pool3d_gpu, "roiaware pool3d forward (CUDA)"); m.def("backward", &roiaware_pool3d_gpu_backward, "roiaware pool3d backward (CUDA)"); - m.def("points_in_boxes_gpu", &points_in_boxes_gpu, - "points_in_boxes_gpu forward (CUDA)"); - m.def("points_in_boxes_batch", &points_in_boxes_batch, - "points_in_boxes_batch forward (CUDA)"); + m.def("points_in_boxes_part", &points_in_boxes_part, + "points_in_boxes_part forward (CUDA)"); + m.def("points_in_boxes_all", &points_in_boxes_all, + "points_in_boxes_all forward (CUDA)"); m.def("points_in_boxes_cpu", &points_in_boxes_cpu, "points_in_boxes_cpu forward (CPU)"); } diff --git a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu index c1c948e96a..8f62e891de 100644 --- a/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu +++ b/mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu @@ -25,17 +25,17 @@ __device__ inline void lidar_to_local_coords(float shift_x, float shift_y, __device__ inline int check_pt_in_box3d(const float *pt, const float *box3d, float &local_x, float &local_y) { // param pt: (x, y, z) - // param box3d: (cx, cy, cz, dx, dy, dz, rz) in LiDAR coordinate, cz in the + // param box3d: (cx, cy, cz, x_size, y_size, z_size, rz) in LiDAR coordinate, cz in the // bottom center float x = pt[0], y = pt[1], z = pt[2]; float cx = box3d[0], cy = box3d[1], cz = box3d[2]; - float dx = box3d[3], dy = box3d[4], dz = box3d[5], rz = box3d[6]; - cz += dz / 2.0; // shift to the center since cz in box3d is the bottom center + float x_size = box3d[3], y_size = box3d[4], z_size = box3d[5], rz = box3d[6]; + cz += z_size / 2.0; // shift to the center since cz in box3d is the bottom center - if (fabsf(z - cz) > dz / 2.0) return 0; + if (fabsf(z - cz) > z_size / 2.0) return 0; lidar_to_local_coords(x - cx, y - cy, rz, local_x, local_y); - float in_flag = (local_x > -dx / 2.0) & (local_x < dx / 2.0) & - (local_y > -dy / 2.0) & (local_y < dy / 2.0); + float in_flag = (local_x > -x_size / 2.0) & (local_x < x_size / 2.0) & + (local_y > -y_size / 2.0) & (local_y < y_size / 2.0); return in_flag; } @@ -43,7 +43,7 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, int out_x, int out_y, int out_z, const float *rois, const float *pts, int *pts_mask) { - // params rois: (N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate + // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate // params pts: (npoints, 3) [x, y, z] // params pts_mask: (N, npoints): -1 means point does not in this box, // otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit @@ -61,14 +61,14 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, pts_mask[0] = -1; if (cur_in_flag > 0) { float local_z = pts[2] - rois[2]; - float dx = rois[3], dy = rois[4], dz = rois[5]; + float x_size = rois[3], y_size = rois[4], z_size = rois[5]; - float x_res = dx / out_x; - float y_res = dy / out_y; - float z_res = dz / out_z; + float x_res = x_size / out_x; + float y_res = y_size / out_y; + float z_res = z_size / out_z; - unsigned int x_idx = int((local_x + dx / 2) / x_res); - unsigned int y_idx = int((local_y + dy / 2) / y_res); + unsigned int x_idx = int((local_x + x_size / 2) / x_res); + unsigned int y_idx = int((local_y + y_size / 2) / y_res); unsigned int z_idx = int(local_z / z_res); x_idx = min(max(x_idx, 0), out_x - 1); @@ -229,7 +229,7 @@ void roiaware_pool3d_launcher(int boxes_num, int pts_num, int channels, const float *pts_feature, int *argmax, int *pts_idx_of_voxels, float *pooled_features, int pool_method) { - // params rois: (N, 7) [x, y, z, dx, dy, dz, rz] in LiDAR coordinate + // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR coordinate // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate // params pts_feature: (npoints, C) // params argmax: (N, out_x, out_y, out_z, C) diff --git a/mmdet3d/ops/sparse_block.py b/mmdet3d/ops/sparse_block.py index 52e343a18e..0539736342 100644 --- a/mmdet3d/ops/sparse_block.py +++ b/mmdet3d/ops/sparse_block.py @@ -14,12 +14,12 @@ class SparseBottleneck(Bottleneck, spconv.SparseModule): Args: inplanes (int): inplanes of block. planes (int): planes of block. - stride (int): stride of the first block. Default: 1 - downsample (None | Module): down sample module for block. - conv_cfg (dict): dictionary to construct and config conv layer. - Default: None - norm_cfg (dict): dictionary to construct and config norm layer. - Default: dict(type='BN') + stride (int, optional): stride of the first block. Default: 1. + downsample (Module, optional): down sample module for block. + conv_cfg (dict, optional): dictionary to construct and config conv + layer. Default: None. + norm_cfg (dict, optional): dictionary to construct and config norm + layer. Default: dict(type='BN'). """ expansion = 4 @@ -73,12 +73,12 @@ class SparseBasicBlock(BasicBlock, spconv.SparseModule): Args: inplanes (int): inplanes of block. planes (int): planes of block. - stride (int): stride of the first block. Default: 1 - downsample (None | Module): down sample module for block. - conv_cfg (dict): dictionary to construct and config conv layer. - Default: None - norm_cfg (dict): dictionary to construct and config norm layer. - Default: dict(type='BN') + stride (int, optional): stride of the first block. Default: 1. + downsample (Module, optional): down sample module for block. + conv_cfg (dict, optional): dictionary to construct and config conv + layer. Default: None. + norm_cfg (dict, optional): dictionary to construct and config norm + layer. Default: dict(type='BN'). """ expansion = 1 diff --git a/tests/test_models/test_common_modules/test_roiaware_pool3d.py b/tests/test_models/test_common_modules/test_roiaware_pool3d.py index db7e3de367..90559dc976 100644 --- a/tests/test_models/test_common_modules/test_roiaware_pool3d.py +++ b/tests/test_models/test_common_modules/test_roiaware_pool3d.py @@ -3,9 +3,9 @@ import pytest import torch -from mmdet3d.ops.roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_batch, +from mmdet3d.ops.roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all, points_in_boxes_cpu, - points_in_boxes_gpu) + points_in_boxes_part) def test_RoIAwarePool3d(): @@ -42,7 +42,7 @@ def test_RoIAwarePool3d(): torch.tensor(49.750).cuda(), 1e-3) -def test_points_in_boxes_gpu(): +def test_points_in_boxes_part(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') boxes = torch.tensor( @@ -58,7 +58,7 @@ def test_points_in_boxes_gpu(): [0, 0, 0], [6, 7, 8], [-2, -3, -4], [6, 4, 9]]], dtype=torch.float32).cuda() # points (b, m, 3) in lidar coordinate - point_indices = points_in_boxes_gpu(points=pts, boxes=boxes) + point_indices = points_in_boxes_part(points=pts, boxes=boxes) expected_point_indices = torch.tensor( [[0, 0, 0, 0, 0, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1]], dtype=torch.int32).cuda() @@ -71,7 +71,7 @@ def test_points_in_boxes_gpu(): [[[4, 6.928, 0], [6.928, 4, 0], [4, -6.928, 0], [6.928, -4, 0], [-4, 6.928, 0], [-6.928, 4, 0], [-4, -6.928, 0], [-6.928, -4, 0]]], dtype=torch.float32).cuda() - point_indices = points_in_boxes_gpu(points=pts, boxes=boxes) + point_indices = points_in_boxes_part(points=pts, boxes=boxes) expected_point_indices = torch.tensor([[-1, -1, 0, -1, 0, -1, -1, -1]], dtype=torch.int32).cuda() assert (point_indices == expected_point_indices).all() @@ -80,7 +80,7 @@ def test_points_in_boxes_gpu(): pts = pts.to('cuda:1') boxes = boxes.to('cuda:1') expected_point_indices = expected_point_indices.to('cuda:1') - point_indices = points_in_boxes_gpu(points=pts, boxes=boxes) + point_indices = points_in_boxes_part(points=pts, boxes=boxes) assert point_indices.shape == torch.Size([2, 8]) assert (point_indices == expected_point_indices).all() @@ -119,7 +119,7 @@ def test_points_in_boxes_cpu(): assert (point_indices == expected_point_indices).all() -def test_points_in_boxes_batch(): +def test_points_in_boxes_all(): if not torch.cuda.is_available(): pytest.skip('test requires GPU and torch+cuda') @@ -136,7 +136,7 @@ def test_points_in_boxes_batch(): ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]], dtype=torch.float32).cuda() # points (n, 3) in lidar coordinate - point_indices = points_in_boxes_batch(points=pts, boxes=boxes) + point_indices = points_in_boxes_all(points=pts, boxes=boxes) expected_point_indices = torch.tensor( [[[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], @@ -148,6 +148,6 @@ def test_points_in_boxes_batch(): pts = pts.to('cuda:1') boxes = boxes.to('cuda:1') expected_point_indices = expected_point_indices.to('cuda:1') - point_indices = points_in_boxes_batch(points=pts, boxes=boxes) + point_indices = points_in_boxes_all(points=pts, boxes=boxes) assert point_indices.shape == torch.Size([1, 15, 2]) assert (point_indices == expected_point_indices).all() diff --git a/tests/test_models/test_forward.py b/tests/test_models/test_forward.py index eabacc0928..107f199241 100644 --- a/tests/test_models/test_forward.py +++ b/tests/test_models/test_forward.py @@ -148,7 +148,7 @@ def _demo_mm_inputs(input_shape=(1, 3, 300, 300), input_shape (tuple): input batch dimensions - num_items (None | List[int]): + num_items (List[int]): specifies the number of boxes in each batch item num_classes (int): diff --git a/tests/test_models/test_heads/test_heads.py b/tests/test_models/test_heads/test_heads.py index 01f1c09509..63cd1f8346 100644 --- a/tests/test_models/test_heads/test_heads.py +++ b/tests/test_models/test_heads/test_heads.py @@ -1144,7 +1144,7 @@ def test_groupfree3d_head(): assert ret_dict['s5.sem_scores'].shape == torch.Size([2, 256, 18]) # test losses - points = [torch.rand([50000, 4], device='cuda') for i in range(2)] + points = [torch.rand([5000, 4], device='cuda') for i in range(2)] gt_bbox1 = torch.rand([10, 7], dtype=torch.float32).cuda() gt_bbox2 = torch.rand([10, 7], dtype=torch.float32).cuda() @@ -1152,12 +1152,12 @@ def test_groupfree3d_head(): gt_bbox2 = DepthInstance3DBoxes(gt_bbox2) gt_bboxes = [gt_bbox1, gt_bbox2] - pts_instance_mask_1 = torch.randint(0, 10, [50000], device='cuda') - pts_instance_mask_2 = torch.randint(0, 10, [50000], device='cuda') + pts_instance_mask_1 = torch.randint(0, 10, [5000], device='cuda') + pts_instance_mask_2 = torch.randint(0, 10, [5000], device='cuda') pts_instance_mask = [pts_instance_mask_1, pts_instance_mask_2] - pts_semantic_mask_1 = torch.randint(0, 19, [50000], device='cuda') - pts_semantic_mask_2 = torch.randint(0, 19, [50000], device='cuda') + pts_semantic_mask_1 = torch.randint(0, 19, [5000], device='cuda') + pts_semantic_mask_2 = torch.randint(0, 19, [5000], device='cuda') pts_semantic_mask = [pts_semantic_mask_1, pts_semantic_mask_2] labels_1 = torch.randint(0, 18, [10], device='cuda') @@ -1178,7 +1178,7 @@ def test_groupfree3d_head(): # test multiclass_nms_single obj_scores = torch.rand([256], device='cuda') sem_scores = torch.rand([256, 18], device='cuda') - points = torch.rand([50000, 3], device='cuda') + points = torch.rand([5000, 3], device='cuda') bbox = torch.rand([256, 7], device='cuda') input_meta = dict(box_type_3d=DepthInstance3DBoxes) bbox_selected, score_selected, labels = \ @@ -1193,9 +1193,9 @@ def test_groupfree3d_head(): assert labels.shape[0] >= 0 # test get_boxes - points = torch.rand([1, 50000, 3], device='cuda') + points = torch.rand([1, 5000, 3], device='cuda') seed_points = torch.rand([1, 1024, 3], device='cuda') - seed_indices = torch.randint(0, 50000, [1, 1024], device='cuda') + seed_indices = torch.randint(0, 5000, [1, 1024], device='cuda') obj_scores = torch.rand([1, 256, 1], device='cuda') center = torch.rand([1, 256, 3], device='cuda') dir_class = torch.rand([1, 256, 1], device='cuda') diff --git a/tests/test_utils/test_box3d.py b/tests/test_utils/test_box3d.py index 2915e70f0f..974f22caec 100644 --- a/tests/test_utils/test_box3d.py +++ b/tests/test_utils/test_box3d.py @@ -5,9 +5,9 @@ import unittest from mmdet3d.core.bbox import (BaseInstance3DBoxes, Box3DMode, - CameraInstance3DBoxes, DepthInstance3DBoxes, - LiDARInstance3DBoxes, bbox3d2roi, - bbox3d_mapping_back) + CameraInstance3DBoxes, Coord3DMode, + DepthInstance3DBoxes, LiDARInstance3DBoxes, + bbox3d2roi, bbox3d_mapping_back) from mmdet3d.core.bbox.structures.utils import (get_box_type, limit_period, points_cam2img, rotation_3d_in_axis, @@ -409,6 +409,13 @@ def test_lidar_boxes3d(): assert torch.allclose(boxes.tensor, expected_tensor) # test bbox in_range_bev + expected_tensor = torch.tensor( + [[1.1282, -3.0508, 1.7598, 3.4090, -1.2079], + [8.0981, -4.9332, 1.5486, 4.0325, -1.3479], + [27.6424, -7.2409, 1.4782, 2.2425, 1.8421], + [20.0183, -28.4773, 1.5687, 3.4995, 1.9621], + [28.2147, -16.5020, 1.7497, 3.7911, -2.5179]]) + assert torch.allclose(boxes.bev, expected_tensor, atol=1e-3) expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool) mask = boxes.in_range_bev([0., -40., 70.4, 40.]) assert (mask == expected_tensor).all() @@ -1000,6 +1007,14 @@ def test_camera_boxes3d(): mask = boxes.in_range_3d([-2, -5, 0, 20, 2, 22]) assert (mask == expected_tensor).all() + expected_tensor = torch.tensor( + [[3.0508, 1.1282, 1.7598, 3.4090, -5.9203], + [4.9332, 8.0981, 1.5486, 4.0325, -6.0603], + [7.2409, 27.6424, 1.4782, 2.2425, -2.8703], + [28.4773, 20.0183, 1.5687, 3.4995, -2.7503], + [16.5020, 28.2147, 1.7497, 3.7911, -0.9471]]) + assert torch.allclose(boxes.bev, expected_tensor, atol=1e-3) + # test properties assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3]) expected_tensor = ( @@ -1389,6 +1404,11 @@ def test_depth_boxes3d(): mask = boxes.nonempty() assert (mask == expected_tensor).all() + # test bbox in_range + expected_tensor = torch.tensor([0, 1], dtype=torch.bool) + mask = boxes.in_range_3d([1, 0, -2, 2, 1, 5]) + assert (mask == expected_tensor).all() + expected_tensor = torch.tensor([[[-0.1030, 0.6649, 0.1056], [-0.1030, 0.6649, 0.3852], [-0.1030, 0.9029, 0.3852], @@ -1409,7 +1429,7 @@ def test_depth_boxes3d(): # test points in boxes if torch.cuda.is_available(): - box_idxs_of_pts = boxes.points_in_boxes_batch(points.cuda()) + box_idxs_of_pts = boxes.points_in_boxes_all(points.cuda()) expected_idxs_of_pts = torch.tensor( [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], device='cuda:0', @@ -1467,23 +1487,25 @@ def test_depth_boxes3d(): def test_rotation_3d_in_axis(): - # # clockwise - # points = torch.tensor([[[-0.4599, -0.0471, 0.0000], - # [-0.4599, -0.0471, 1.8433], - # [-0.4599, 0.0471, 1.8433]], - # [[-0.2555, -0.2683, 0.0000], - # [-0.2555, -0.2683, 0.9072], - # [-0.2555, 0.2683, 0.9072]]]) - # rotated = rotation_3d_in_axis( - # points, torch.tensor([-np.pi / 10, np.pi / 10]), - # axis=0, clockwise=True) - # expected_rotated = torch.tensor([[[0.0000, -0.4228, -0.1869], - # [1.8433, -0.4228, -0.1869], - # [1.8433, -0.4519, -0.0973]], - # [[0.0000, -0.3259, -0.1762], - # [0.9072, -0.3259, -0.1762], - # [0.9072, -0.1601, 0.3341]]]) - # assert torch.allclose(rotated, expected_rotated, 1e-3) + # clockwise + points = torch.tensor([[[-0.4599, -0.0471, 0.0000], + [-0.4599, -0.0471, 1.8433], + [-0.4599, 0.0471, 1.8433]], + [[-0.2555, -0.2683, 0.0000], + [-0.2555, -0.2683, 0.9072], + [-0.2555, 0.2683, 0.9072]]]) + rotated = rotation_3d_in_axis( + points, + torch.tensor([-np.pi / 10, np.pi / 10]), + axis=0, + clockwise=True) + expected_rotated = torch.tensor( + [[[-0.4599, -0.0448, -0.0146], [-0.4599, -0.6144, 1.7385], + [-0.4599, -0.5248, 1.7676]], + [[-0.2555, -0.2552, 0.0829], [-0.2555, 0.0252, 0.9457], + [-0.2555, 0.5355, 0.7799]]], + dtype=torch.float32) + assert torch.allclose(rotated, expected_rotated, atol=1e-3) # anti-clockwise with return rotation mat points = torch.tensor([[[-0.4599, -0.0471, 0.0000], @@ -1622,3 +1644,128 @@ def test_points_cam2img(): point_2d_res = points_cam2img(points, proj_mat) expected_point_2d_res = torch.from_numpy(expected_point_2d_res) assert torch.allclose(point_2d_res, expected_point_2d_res, 1e-3) + + point_2d_res = points_cam2img(points, proj_mat, with_depth=True) + expected_point_2d_res = torch.tensor([[0.5832, 0.6496, 1.7577], + [0.6146, 0.7910, 1.5477], + [0.6994, 0.7782, 2.0091], + [0.5623, 0.6303, 1.8739], + [0.4359, 0.6532, 1.2056]]) + assert torch.allclose(point_2d_res, expected_point_2d_res, 1e-3) + + +def test_points_in_boxes(): + if not torch.cuda.is_available(): + pytest.skip('test requires GPU and torch+cuda') + lidar_pts = torch.tensor([[1.0, 4.3, 0.1], [1.0, 4.4, + 0.1], [1.1, 4.3, 0.1], + [0.9, 4.3, 0.1], [1.0, -0.3, 0.1], + [1.0, -0.4, 0.1], [2.9, 0.1, 6.0], + [-0.9, 3.9, 6.0]]).cuda() + lidar_boxes = torch.tensor([[1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 6], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 2], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 7 * np.pi / 6], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, -np.pi / 6]], + dtype=torch.float32).cuda() + lidar_boxes = LiDARInstance3DBoxes(lidar_boxes) + + point_indices = lidar_boxes.points_in_boxes_all(lidar_pts) + expected_point_indices = torch.tensor( + [[1, 0, 1, 1], [0, 0, 0, 0], [1, 0, 1, 0], [0, 0, 0, 1], [1, 0, 1, 1], + [0, 0, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0]], + dtype=torch.int32).cuda() + assert point_indices.shape == torch.Size([8, 4]) + assert (point_indices == expected_point_indices).all() + + lidar_pts = torch.tensor([[1.0, 4.3, 0.1], [1.0, 4.4, + 0.1], [1.1, 4.3, 0.1], + [0.9, 4.3, 0.1], [1.0, -0.3, 0.1], + [1.0, -0.4, 0.1], [2.9, 0.1, 6.0], + [-0.9, 3.9, 6.0]]).cuda() + lidar_boxes = torch.tensor([[1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 6], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 2], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 7 * np.pi / 6], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, -np.pi / 6]], + dtype=torch.float32).cuda() + lidar_boxes = LiDARInstance3DBoxes(lidar_boxes) + + point_indices = lidar_boxes.points_in_boxes_part(lidar_pts) + expected_point_indices = torch.tensor([0, -1, 0, 3, 0, -1, 1, 1], + dtype=torch.int32).cuda() + assert point_indices.shape == torch.Size([8]) + assert (point_indices == expected_point_indices).all() + + depth_boxes = torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], + [-10.0, 23.0, 16.0, 10, 20, 20, 0.5]], + dtype=torch.float32).cuda() + depth_boxes = DepthInstance3DBoxes(depth_boxes) + depth_pts = torch.tensor( + [[[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], + [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], + [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [ + -16, -18, 9 + ], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]]], + dtype=torch.float32).cuda() + + point_indices = depth_boxes.points_in_boxes_all(depth_pts) + expected_point_indices = torch.tensor( + [[1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1], [0, 0], [0, 0], + [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], + dtype=torch.int32).cuda() + assert point_indices.shape == torch.Size([15, 2]) + assert (point_indices == expected_point_indices).all() + + point_indices = depth_boxes.points_in_boxes_part(depth_pts) + expected_point_indices = torch.tensor( + [0, 0, 0, 0, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1], + dtype=torch.int32).cuda() + assert point_indices.shape == torch.Size([15]) + assert (point_indices == expected_point_indices).all() + + depth_boxes = torch.tensor([[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 0.3], + [-10.0, 23.0, 16.0, 10, 20, 20, 0.5], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 6], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, np.pi / 2], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, 7 * np.pi / 6], + [1.0, 2.0, 0.0, 4.0, 4.0, 6.0, -np.pi / 6]], + dtype=torch.float32).cuda() + cam_boxes = DepthInstance3DBoxes(depth_boxes).convert_to(Box3DMode.CAM) + depth_pts = torch.tensor( + [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6], + [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3], + [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9], + [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4], [1.0, 4.3, 0.1], + [1.0, 4.4, 0.1], [1.1, 4.3, 0.1], [0.9, 4.3, 0.1], [1.0, -0.3, 0.1], + [1.0, -0.4, 0.1], [2.9, 0.1, 6.0], [-0.9, 3.9, 6.0]], + dtype=torch.float32).cuda() + + cam_pts = DepthPoints(depth_pts).convert_to(Coord3DMode.CAM).tensor + + point_indices = cam_boxes.points_in_boxes_all(cam_pts) + expected_point_indices = torch.tensor( + [[1, 0, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1], + [1, 0, 1, 1, 1, 1], [1, 0, 1, 1, 1, 1], [0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1], + [0, 0, 1, 1, 1, 0], [0, 0, 1, 1, 1, 1], [0, 0, 0, 1, 0, 0], + [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0]], + dtype=torch.int32).cuda() + assert point_indices.shape == torch.Size([23, 6]) + assert (point_indices == expected_point_indices).all() + + point_indices = cam_boxes.points_in_boxes_batch(cam_pts) + assert (point_indices == expected_point_indices).all() + + point_indices = cam_boxes.points_in_boxes_part(cam_pts) + expected_point_indices = torch.tensor([ + 0, 0, 0, 0, 0, 1, -1, -1, -1, -1, -1, -1, 3, -1, -1, 2, 3, 3, 2, 2, 3, + 0, 0 + ], + dtype=torch.int32).cuda() + assert point_indices.shape == torch.Size([23]) + assert (point_indices == expected_point_indices).all() + + point_indices = cam_boxes.points_in_boxes(cam_pts) + assert (point_indices == expected_point_indices).all() diff --git a/tests/test_utils/test_box_np_ops.py b/tests/test_utils/test_box_np_ops.py index 22757cb8d0..1c6275de52 100644 --- a/tests/test_utils/test_box_np_ops.py +++ b/tests/test_utils/test_box_np_ops.py @@ -63,3 +63,21 @@ def test_center_to_corner_box2d(): expected_corner = np.array([[[-4.24264, -1.41421], [1.41421, 4.24264], [4.24264, 1.41421], [-1.41421, -4.24264]]]) assert np.allclose(corner, expected_corner) + + +def test_points_in_convex_polygon_jit(): + from mmdet3d.core.bbox.box_np_ops import points_in_convex_polygon_jit + points = np.array([[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]) + polygons = np.array([[[1.0, 0.0], [0.0, 1.0], [0.0, 0.5], [0.0, 0.0]], + [[1.0, 0.0], [1.0, 1.0], [0.5, 1.0], [0.0, 1.0]], + [[1.0, 0.0], [0.0, 1.0], [-1.0, 0.0], [0.0, -1.0]]]) + res = points_in_convex_polygon_jit(points, polygons) + expected_res = np.array([[1, 0, 1], [0, 0, 0], [0, 1, 0]]).astype(np.bool) + assert np.allclose(res, expected_res) + + polygons = np.array([[[0.0, 0.0], [0.0, 1.0], [0.5, 0.5], [1.0, 0.0]], + [[0.0, 1.0], [1.0, 1.0], [1.0, 0.5], [1.0, 0.0]], + [[1.0, 0.0], [0.0, -1.0], [-1.0, 0.0], [0.0, 1.1]]]) + res = points_in_convex_polygon_jit(points, polygons, clockwise=True) + expected_res = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 0]]).astype(np.bool) + assert np.allclose(res, expected_res) diff --git a/tests/test_utils/test_coord_3d_mode.py b/tests/test_utils/test_coord_3d_mode.py index e4519829f6..24f0e192c8 100644 --- a/tests/test_utils/test_coord_3d_mode.py +++ b/tests/test_utils/test_coord_3d_mode.py @@ -263,11 +263,11 @@ def test_boxes_conversion(): convert_depth_boxes = Coord3DMode.convert(cam_boxes, Coord3DMode.CAM, Coord3DMode.DEPTH) expected_tensor = torch.tensor( - [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, -1.4800], - [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, -1.6200], - [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, 1.5700], - [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, 1.6900], - [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, -2.7900]]) + [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, -1.4800], + [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, -1.6200], + [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, 1.5700], + [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, 1.6900], + [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, -2.7900]]) assert torch.allclose(expected_tensor, convert_depth_boxes.tensor, 1e-3) # test LIDAR to CAM and DEPTH @@ -327,11 +327,11 @@ def test_boxes_conversion(): convert_cam_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, Coord3DMode.CAM) expected_tensor = torch.tensor( - [[1.7802, -1.7501, -2.5162, 1.7500, 1.6500, 3.3900, -1.4800], - [8.9594, -1.6357, -2.4567, 1.5400, 1.5700, 4.0100, -1.6200], - [28.2967, -1.3033, 0.5558, 1.4700, 1.4800, 2.2300, 1.5700], - [26.6690, -1.7361, -21.8230, 1.5600, 1.4000, 3.4800, 1.6900], - [31.3198, -1.6218, -8.1621, 1.7400, 1.4800, 3.7700, -2.7900]]) + [[1.7802, 1.7501, 2.5162, 1.7500, 1.6500, 3.3900, -1.4800], + [8.9594, 1.6357, 2.4567, 1.5400, 1.5700, 4.0100, -1.6200], + [28.2967, 1.3033, -0.5558, 1.4700, 1.4800, 2.2300, 1.5700], + [26.6690, 1.7361, 21.8230, 1.5600, 1.4000, 3.4800, 1.6900], + [31.3198, 1.6218, 8.1621, 1.7400, 1.4800, 3.7700, -2.7900]]) assert torch.allclose(expected_tensor, convert_cam_boxes.tensor, 1e-3) convert_lidar_boxes = Coord3DMode.convert(depth_boxes, Coord3DMode.DEPTH, diff --git a/tests/test_utils/test_points.py b/tests/test_utils/test_points.py index 5376ffe8bd..20af27fc24 100644 --- a/tests/test_utils/test_points.py +++ b/tests/test_utils/test_points.py @@ -66,6 +66,7 @@ def test_base_points(): ]]) assert torch.allclose(expected_tensor, base_points.tensor) + assert torch.allclose(expected_tensor[:, :2], base_points.bev) assert torch.allclose(expected_tensor[:, :3], base_points.coord) assert torch.allclose(expected_tensor[:, 3:6], base_points.color) assert torch.allclose(expected_tensor[:, 6], base_points.height) @@ -327,6 +328,7 @@ def test_cam_points(): ]]) assert torch.allclose(expected_tensor, cam_points.tensor) + assert torch.allclose(expected_tensor[:, [0, 2]], cam_points.bev) assert torch.allclose(expected_tensor[:, :3], cam_points.coord) assert torch.allclose(expected_tensor[:, 3:6], cam_points.color) assert torch.allclose(expected_tensor[:, 6], cam_points.height) @@ -603,6 +605,7 @@ def test_lidar_points(): ]]) assert torch.allclose(expected_tensor, lidar_points.tensor) + assert torch.allclose(expected_tensor[:, :2], lidar_points.bev) assert torch.allclose(expected_tensor[:, :3], lidar_points.coord) assert torch.allclose(expected_tensor[:, 3:6], lidar_points.color) assert torch.allclose(expected_tensor[:, 6], lidar_points.height) @@ -879,6 +882,7 @@ def test_depth_points(): ]]) assert torch.allclose(expected_tensor, depth_points.tensor) + assert torch.allclose(expected_tensor[:, :2], depth_points.bev) assert torch.allclose(expected_tensor[:, :3], depth_points.coord) assert torch.allclose(expected_tensor[:, 3:6], depth_points.color) assert torch.allclose(expected_tensor[:, 6], depth_points.height) diff --git a/tools/create_data.py b/tools/create_data.py index 826720fdc1..798e7dfc78 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -61,7 +61,8 @@ def nuscenes_data_prep(root_path, version (str): Dataset version. dataset_name (str): The dataset class name. out_dir (str): Output directory of the groundtruth database info. - max_sweeps (int): Number of input consecutive frames. Default: 10 + max_sweeps (int, optional): Number of input consecutive frames. + Default: 10 """ nuscenes_converter.create_nuscenes_infos( root_path, info_prefix, version=version, max_sweeps=max_sweeps) @@ -152,8 +153,9 @@ def waymo_data_prep(root_path, info_prefix (str): The prefix of info filenames. out_dir (str): Output directory of the generated info file. workers (int): Number of threads to be used. - max_sweeps (int): Number of input consecutive frames. Default: 5 \ - Here we store pose information of these frames for later use. + max_sweeps (int, optional): Number of input consecutive frames. + Default: 5. Here we store pose information of these frames + for later use. """ from tools.data_converter import waymo_converter as waymo diff --git a/tools/data_converter/create_gt_database.py b/tools/data_converter/create_gt_database.py index 7317cedd08..3e62977160 100644 --- a/tools/data_converter/create_gt_database.py +++ b/tools/data_converter/create_gt_database.py @@ -126,19 +126,19 @@ def create_groundtruth_database(dataset_class_name, dataset_class_name (str): Name of the input dataset. data_path (str): Path of the data. info_prefix (str): Prefix of the info file. - info_path (str): Path of the info file. + info_path (str, optional): Path of the info file. Default: None. - mask_anno_path (str): Path of the mask_anno. + mask_anno_path (str, optional): Path of the mask_anno. Default: None. - used_classes (list[str]): Classes have been used. + used_classes (list[str], optional): Classes have been used. Default: None. - database_save_path (str): Path to save database. + database_save_path (str, optional): Path to save database. Default: None. - db_info_save_path (str): Path to save db_info. + db_info_save_path (str, optional): Path to save db_info. Default: None. - relative_path (bool): Whether to use relative path. + relative_path (bool, optional): Whether to use relative path. Default: True. - with_mask (bool): Whether to use mask. + with_mask (bool, optional): Whether to use mask. Default: False. """ print(f'Create GT Database of {dataset_class_name}') diff --git a/tools/data_converter/indoor_converter.py b/tools/data_converter/indoor_converter.py index 4072397605..1178f3c1ea 100644 --- a/tools/data_converter/indoor_converter.py +++ b/tools/data_converter/indoor_converter.py @@ -19,10 +19,11 @@ def create_indoor_info_file(data_path, Args: data_path (str): Path of the data. - pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'. - save_path (str): Path of the pkl to be saved. Default: None. - use_v1 (bool): Whether to use v1. Default: False. - workers (int): Number of threads to be used. Default: 4. + pkl_prefix (str, optional): Prefix of the pkl to be saved. + Default: 'sunrgbd'. + save_path (str, optional): Path of the pkl to be saved. Default: None. + use_v1 (bool, optional): Whether to use v1. Default: False. + workers (int, optional): Number of threads to be used. Default: 4. """ assert os.path.exists(data_path) assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \ diff --git a/tools/data_converter/kitti_converter.py b/tools/data_converter/kitti_converter.py index 1b7eadb641..cc470b6996 100644 --- a/tools/data_converter/kitti_converter.py +++ b/tools/data_converter/kitti_converter.py @@ -159,7 +159,7 @@ def create_waymo_info_file(data_path, Args: data_path (str): Path of the data root. pkl_prefix (str): Prefix of the info file to be generated. - save_path (str | None): Path to save the info file. + save_path (str): Path to save the info file. relative_path (bool): Whether to use relative path. max_sweeps (int): Max sweeps before the detection frame to be used. """ @@ -238,11 +238,13 @@ def _create_reduced_point_cloud(data_path, Args: data_path (str): Path of original data. info_path (str): Path of data info. - save_path (str | None): Path to save reduced point cloud data. - Default: None. - back (bool): Whether to flip the points to back. - num_features (int): Number of point features. Default: 4. - front_camera_id (int): The referenced/front camera ID. Default: 2. + save_path (str, optional): Path to save reduced point cloud + data. Default: None. + back (bool, optional): Whether to flip the points to back. + Default: False. + num_features (int, optional): Number of point features. Default: 4. + front_camera_id (int, optional): The referenced/front camera ID. + Default: 2. """ kitti_infos = mmcv.load(info_path) @@ -298,14 +300,16 @@ def create_reduced_point_cloud(data_path, Args: data_path (str): Path of original data. pkl_prefix (str): Prefix of info files. - train_info_path (str | None): Path of training set info. + train_info_path (str, optional): Path of training set info. + Default: None. + val_info_path (str, optional): Path of validation set info. Default: None. - val_info_path (str | None): Path of validation set info. + test_info_path (str, optional): Path of test set info. Default: None. - test_info_path (str | None): Path of test set info. + save_path (str, optional): Path to save reduced point cloud data. Default: None. - save_path (str | None): Path to save reduced point cloud data. - with_back (bool): Whether to flip the points to back. + with_back (bool, optional): Whether to flip the points to back. + Default: False. """ if train_info_path is None: train_info_path = Path(data_path) / f'{pkl_prefix}_infos_train.pkl' @@ -335,7 +339,8 @@ def export_2d_annotation(root_path, info_path, mono3d=True): Args: root_path (str): Root path of the raw data. info_path (str): Path of the info file. - mono3d (bool): Whether to export mono3d annotation. Default: True. + mono3d (bool, optional): Whether to export mono3d annotation. + Default: True. """ # get bbox annotations for camera kitti_infos = mmcv.load(info_path) @@ -381,8 +386,8 @@ def get_2d_boxes(info, occluded, mono3d=True): Args: info: Information of the given sample data. - occluded: Integer (0, 1, 2, 3) indicating occlusion state: \ - 0 = fully visible, 1 = partly occluded, 2 = largely occluded, \ + occluded: Integer (0, 1, 2, 3) indicating occlusion state: + 0 = fully visible, 1 = partly occluded, 2 = largely occluded, 3 = unknown, -1 = DontCare mono3d (bool): Whether to get boxes with mono3d annotation. @@ -508,7 +513,7 @@ def generate_record(ann_rec, x1, y1, x2, y2, sample_data_token, filename): - area (float): 2d box area - category_name (str): category name - category_id (int): category id - - bbox (list[float]): left x, top y, dx, dy of 2d box + - bbox (list[float]): left x, top y, x_size, y_size of 2d box - iscrowd (int): whether the area is crowd """ repro_rec = OrderedDict() diff --git a/tools/data_converter/lyft_converter.py b/tools/data_converter/lyft_converter.py index f33d5eb6b5..ba35dea239 100644 --- a/tools/data_converter/lyft_converter.py +++ b/tools/data_converter/lyft_converter.py @@ -26,10 +26,10 @@ def create_lyft_infos(root_path, Args: root_path (str): Path of the data root. info_prefix (str): Prefix of the info file to be generated. - version (str): Version of the data. - Default: 'v1.01-train' - max_sweeps (int): Max number of sweeps. - Default: 10 + version (str, optional): Version of the data. + Default: 'v1.01-train'. + max_sweeps (int, optional): Max number of sweeps. + Default: 10. """ lyft = Lyft( data_path=osp.join(root_path, version), @@ -101,9 +101,9 @@ def _fill_trainval_infos(lyft, lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset. train_scenes (list[str]): Basic information of training scenes. val_scenes (list[str]): Basic information of validation scenes. - test (bool): Whether use the test mode. In the test mode, no + test (bool, optional): Whether use the test mode. In the test mode, no annotations can be accessed. Default: False. - max_sweeps (int): Max number of sweeps. Default: 10. + max_sweeps (int, optional): Max number of sweeps. Default: 10. Returns: tuple[list[dict]]: Information of training set and @@ -194,7 +194,7 @@ def _fill_trainval_infos(lyft, # we need to convert box size to # the format of our lidar coordinate system - # which is dx, dy, dz (corresponding to l, w, h) + # which is x_size, y_size, z_size (corresponding to l, w, h) gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1) assert len(gt_boxes) == len( annotations), f'{len(gt_boxes)}, {len(annotations)}' diff --git a/tools/data_converter/nuscenes_converter.py b/tools/data_converter/nuscenes_converter.py index 74c9dd04d5..627212a601 100644 --- a/tools/data_converter/nuscenes_converter.py +++ b/tools/data_converter/nuscenes_converter.py @@ -34,10 +34,10 @@ def create_nuscenes_infos(root_path, Args: root_path (str): Path of the data root. info_prefix (str): Prefix of the info file to be generated. - version (str): Version of the data. - Default: 'v1.0-trainval' - max_sweeps (int): Max number of sweeps. - Default: 10 + version (str, optional): Version of the data. + Default: 'v1.0-trainval'. + max_sweeps (int, optional): Max number of sweeps. + Default: 10. """ from nuscenes.nuscenes import NuScenes nusc = NuScenes(version=version, dataroot=root_path, verbose=True) @@ -152,9 +152,9 @@ def _fill_trainval_infos(nusc, nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset. train_scenes (list[str]): Basic information of training scenes. val_scenes (list[str]): Basic information of validation scenes. - test (bool): Whether use the test mode. In the test mode, no + test (bool, optional): Whether use the test mode. In test mode, no annotations can be accessed. Default: False. - max_sweeps (int): Max number of sweeps. Default: 10. + max_sweeps (int, optional): Max number of sweeps. Default: 10. Returns: tuple[list[dict]]: Information of training set and validation set @@ -251,7 +251,7 @@ def _fill_trainval_infos(nusc, names = np.array(names) # we need to convert box size to # the format of our lidar coordinate system - # which is dx, dy, dz (corresponding to l, w, h) + # which is x_size, y_size, z_size (corresponding to l, w, h) gt_boxes = np.concatenate([locs, dims[:, [1, 0, 2]], rots], axis=1) assert len(gt_boxes) == len( annotations), f'{len(gt_boxes)}, {len(annotations)}' @@ -291,7 +291,7 @@ def obtain_sensor2top(nusc, e2g_t (np.ndarray): Translation from ego to global in shape (1, 3). e2g_r_mat (np.ndarray): Rotation matrix from ego to global in shape (3, 3). - sensor_type (str): Sensor to calibrate. Default: 'lidar'. + sensor_type (str, optional): Sensor to calibrate. Default: 'lidar'. Returns: sweep (dict): Sweep information after transformation. @@ -340,7 +340,8 @@ def export_2d_annotation(root_path, info_path, version, mono3d=True): root_path (str): Root path of the raw data. info_path (str): Path of the info file. version (str): Dataset version. - mono3d (bool): Whether to export mono3d annotation. Default: True. + mono3d (bool, optional): Whether to export mono3d annotation. + Default: True. """ # get bbox annotations for camera camera_types = [ @@ -404,7 +405,7 @@ def get_2d_boxes(nusc, """Get the 2D annotation records for a given `sample_data_token`. Args: - sample_data_token (str): Sample data token belonging to a camera \ + sample_data_token (str): Sample data token belonging to a camera keyframe. visibilities (list[str]): Visibility filter. mono3d (bool): Whether to get boxes with mono3d annotation. diff --git a/tools/data_converter/s3dis_data_utils.py b/tools/data_converter/s3dis_data_utils.py index d2b6b773e9..bc473fbd93 100644 --- a/tools/data_converter/s3dis_data_utils.py +++ b/tools/data_converter/s3dis_data_utils.py @@ -13,7 +13,7 @@ class S3DISData(object): Args: root_path (str): Root path of the raw data. - split (str): Set split type of the data. Default: 'Area_1'. + split (str, optional): Set split type of the data. Default: 'Area_1'. """ def __init__(self, root_path, split='Area_1'): @@ -48,9 +48,11 @@ def get_infos(self, num_workers=4, has_label=True, sample_id_list=None): This method gets information from the raw data. Args: - num_workers (int): Number of threads to be used. Default: 4. - has_label (bool): Whether the data has label. Default: True. - sample_id_list (list[int]): Index list of the sample. + num_workers (int, optional): Number of threads to be used. + Default: 4. + has_label (bool, optional): Whether the data has label. + Default: True. + sample_id_list (list[int], optional): Index list of the sample. Default: None. Returns: @@ -154,10 +156,11 @@ class S3DISSegData(object): Args: data_root (str): Root path of the raw data. ann_file (str): The generated scannet infos. - split (str): Set split type of the data. Default: 'train'. - num_points (int): Number of points in each data input. Default: 8192. - label_weight_func (function): Function to compute the label weight. - Default: None. + split (str, optional): Set split type of the data. Default: 'train'. + num_points (int, optional): Number of points in each data input. + Default: 8192. + label_weight_func (function, optional): Function to compute the + label weight. Default: None. """ def __init__(self, @@ -209,7 +212,7 @@ def _convert_to_label(self, mask): return label def get_scene_idxs_and_label_weight(self): - """Compute scene_idxs for data sampling and label weight for loss \ + """Compute scene_idxs for data sampling and label weight for loss calculation. We sample more times for scenes with more points. Label_weight is diff --git a/tools/data_converter/scannet_data_utils.py b/tools/data_converter/scannet_data_utils.py index a437fe01ce..4f341edb36 100644 --- a/tools/data_converter/scannet_data_utils.py +++ b/tools/data_converter/scannet_data_utils.py @@ -13,7 +13,7 @@ class ScanNetData(object): Args: root_path (str): Root path of the raw data. - split (str): Set split type of the data. Default: 'train'. + split (str, optional): Set split type of the data. Default: 'train'. """ def __init__(self, root_path, split='train'): @@ -90,9 +90,11 @@ def get_infos(self, num_workers=4, has_label=True, sample_id_list=None): This method gets information from the raw data. Args: - num_workers (int): Number of threads to be used. Default: 4. - has_label (bool): Whether the data has label. Default: True. - sample_id_list (list[int]): Index list of the sample. + num_workers (int, optional): Number of threads to be used. + Default: 4. + has_label (bool, optional): Whether the data has label. + Default: True. + sample_id_list (list[int], optional): Index list of the sample. Default: None. Returns: @@ -201,10 +203,11 @@ class ScanNetSegData(object): Args: data_root (str): Root path of the raw data. ann_file (str): The generated scannet infos. - split (str): Set split type of the data. Default: 'train'. - num_points (int): Number of points in each data input. Default: 8192. - label_weight_func (function): Function to compute the label weight. - Default: None. + split (str, optional): Set split type of the data. Default: 'train'. + num_points (int, optional): Number of points in each data input. + Default: 8192. + label_weight_func (function, optional): Function to compute the + label weight. Default: None. """ def __init__(self, @@ -261,7 +264,7 @@ def _convert_to_label(self, mask): return label def get_scene_idxs_and_label_weight(self): - """Compute scene_idxs for data sampling and label weight for loss \ + """Compute scene_idxs for data sampling and label weight for loss calculation. We sample more times for scenes with more points. Label_weight is diff --git a/tools/data_converter/sunrgbd_data_utils.py b/tools/data_converter/sunrgbd_data_utils.py index b06c5c5319..931d2a3eaa 100644 --- a/tools/data_converter/sunrgbd_data_utils.py +++ b/tools/data_converter/sunrgbd_data_utils.py @@ -42,7 +42,7 @@ def __init__(self, line): self.ymax = data[2] + data[4] self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax]) self.centroid = np.array([data[5], data[6], data[7]]) - # data[9] is dx (l), data[8] is dy (w), data[10] is dz (h) + # data[9] is x_size (l), data[8] is y_size (w), data[10] is z_size (h) # in our depth coordinate system, # l corresponds to the size along the x axis self.size = np.array([data[9], data[8], data[10]]) * 2 @@ -62,8 +62,8 @@ class SUNRGBDData(object): Args: root_path (str): Root path of the raw data. - split (str): Set split type of the data. Default: 'train'. - use_v1 (bool): Whether to use v1. Default: False. + split (str, optional): Set split type of the data. Default: 'train'. + use_v1 (bool, optional): Whether to use v1. Default: False. """ def __init__(self, root_path, split='train', use_v1=False): @@ -128,9 +128,11 @@ def get_infos(self, num_workers=4, has_label=True, sample_id_list=None): This method gets information from the raw data. Args: - num_workers (int): Number of threads to be used. Default: 4. - has_label (bool): Whether the data has label. Default: True. - sample_id_list (list[int]): Index list of the sample. + num_workers (int, optional): Number of threads to be used. + Default: 4. + has_label (bool, optional): Whether the data has label. + Default: True. + sample_id_list (list[int], optional): Index list of the sample. Default: None. Returns: diff --git a/tools/data_converter/waymo_converter.py b/tools/data_converter/waymo_converter.py index fc2ae013b5..0642d0f714 100644 --- a/tools/data_converter/waymo_converter.py +++ b/tools/data_converter/waymo_converter.py @@ -31,8 +31,8 @@ class Waymo2KITTI(object): save_dir (str): Directory to save data in KITTI format. prefix (str): Prefix of filename. In general, 0 for training, 1 for validation and 2 for testing. - workers (str): Number of workers for the parallel process. - test_mode (bool): Whether in the test_mode. Default: False. + workers (int, optional): Number of workers for the parallel process. + test_mode (bool, optional): Whether in the test_mode. Default: False. """ def __init__(self, @@ -402,8 +402,8 @@ def convert_range_image_to_point_cloud(self, camera projections corresponding with two returns. range_image_top_pose (:obj:`Transform`): Range image pixel pose for top lidar. - ri_index (int): 0 for the first return, 1 for the second return. - Default: 0. + ri_index (int, optional): 0 for the first return, + 1 for the second return. Default: 0. Returns: tuple[list[np.ndarray]]: (List of points with shape [N, 3],