Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Fix] Fix the dimension and yaw hack of FCOS3D on nuScenes #744

Merged
merged 12 commits into from
Jul 21, 2021
Merged
3 changes: 0 additions & 3 deletions mmdet3d/datasets/kitti_mono_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,6 @@ def _parse_ann_info(self, img_info, ann_info):
gt_masks_ann.append(ann.get('segmentation', None))
# 3D annotations in camera coordinates
bbox_cam3d = np.array(ann['bbox_cam3d']).reshape(-1, )
# change orientation to local yaw
bbox_cam3d[6] = -np.arctan2(bbox_cam3d[0],
bbox_cam3d[2]) + bbox_cam3d[6]
gt_bboxes_cam3d.append(bbox_cam3d)
# 2.5D annotations in camera coordinates
center2d = ann['center2d'][:2]
Expand Down
12 changes: 9 additions & 3 deletions mmdet3d/datasets/nuscenes_mono_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,6 @@ def _parse_ann_info(self, img_info, ann_info):
gt_masks_ann.append(ann.get('segmentation', None))
# 3D annotations in camera coordinates
bbox_cam3d = np.array(ann['bbox_cam3d']).reshape(1, -1)
# change orientation to local yaw
bbox_cam3d[0, 6] = -np.arctan2(
bbox_cam3d[0, 0], bbox_cam3d[0, 2]) + bbox_cam3d[0, 6]
velo_cam3d = np.array(ann['velo_cam3d']).reshape(1, 2)
nan_mask = np.isnan(velo_cam3d[:, 0])
velo_cam3d[nan_mask] = [0.0, 0.0]
Expand Down Expand Up @@ -666,6 +663,10 @@ def output_to_nusc_box(detection):
box_dims = box3d.dims.numpy()
box_yaw = box3d.yaw.numpy()

# convert the dim/rot to nuscbox convention
box_dims[:, [0, 1, 2]] = box_dims[:, [2, 0, 1]]
Tai-Wang marked this conversation as resolved.
Show resolved Hide resolved
box_yaw = -box_yaw

box_list = []
for i in range(len(box3d)):
q1 = pyquaternion.Quaternion(axis=[0, 0, 1], radians=box_yaw[i])
Expand Down Expand Up @@ -778,6 +779,11 @@ def nusc_box_to_cam_box3d(boxes):
rots = torch.Tensor([b.orientation.yaw_pitch_roll[0]
for b in boxes]).view(-1, 1)
velocity = torch.Tensor([b.velocity[:2] for b in boxes]).view(-1, 2)

# convert nusbox to cambox convention
dims[:, [0, 1, 2]] = dims[:, [1, 2, 0]]
rots = -rots

boxes_3d = torch.cat([locs, dims, rots, velocity], dim=1).cuda()
cam_boxes3d = CameraInstance3DBoxes(
boxes_3d, box_dim=9, origin=(0.5, 0.5, 0.5))
Expand Down
4 changes: 3 additions & 1 deletion mmdet3d/datasets/pipelines/transforms_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,11 @@ def random_flip_data_3d(self, input_dict, direction='horizontal'):
if 'centers2d' in input_dict:
assert self.sync_2d is True and direction == 'horizontal', \
'Only support sync_2d=True and horizontal flip with images'
w = input_dict['img_shape'][1]
w = input_dict['ori_shape'][1]
input_dict['centers2d'][..., 0] = \
w - input_dict['centers2d'][..., 0]
input_dict['cam_intrinsic'][0][2] = \
w - input_dict['cam_intrinsic'][0][2]
Tai-Wang marked this conversation as resolved.
Show resolved Hide resolved

def __call__(self, input_dict):
"""Call function to flip points, values in the ``bbox3d_fields`` and \
Expand Down
4 changes: 4 additions & 0 deletions mmdet3d/models/dense_heads/fcos_mono3d_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,10 @@ def _get_target_single(self, gt_bboxes, gt_labels, gt_bboxes_3d,
attr_labels.new_full(
(num_points,), self.attr_background_label)

# change orientation to local yaw
gt_bboxes_3d[..., 6] = -torch.atan2(
gt_bboxes_3d[..., 0], gt_bboxes_3d[..., 2]) + gt_bboxes_3d[..., 6]

areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (
gt_bboxes[:, 3] - gt_bboxes[:, 1])
areas = areas[None].repeat(num_points, 1)
Expand Down
9 changes: 7 additions & 2 deletions tools/data_converter/nuscenes_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,13 @@ def get_2d_boxes(nusc,
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
loc = box.center.tolist()
dim = box.wlh.tolist()
rot = [box.orientation.yaw_pitch_roll[0]]

dim = box.wlh
dim[[0, 1, 2]] = dim[[1, 2, 0]] # convert wlh to our lhw
dim = dim.tolist()

rot = box.orientation.yaw_pitch_roll[0]
rot = [-rot] # convert the rot to our cam coordinate

global_velo2d = nusc.box_velocity(box.token)[:2]
global_velo3d = np.array([*global_velo2d, 0.0])
Expand Down
4 changes: 0 additions & 4 deletions tools/misc/browse_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,6 @@ def show_proj_bbox_img(idx,
img_metas=img_metas,
show=show)
elif isinstance(gt_bboxes, CameraInstance3DBoxes):
# TODO: remove the hack of box from NuScenesMonoDataset
if is_nus_mono:
from mmdet3d.core.bbox import mono_cam_box2vis
gt_bboxes = mono_cam_box2vis(gt_bboxes)
show_multi_modality_result(
img,
gt_bboxes,
Expand Down