-
Notifications
You must be signed in to change notification settings - Fork 15
/
bounding_box.py
180 lines (152 loc) · 7.87 KB
/
bounding_box.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
from carla.transform import Transform
import numpy as np
from datadescriptor import KittiDescriptor
from camera_utils import *
from constants import WINDOW_HEIGHT, WINDOW_WIDTH, MAX_RENDER_DEPTH_IN_METERS, MIN_VISIBLE_VERTICES_FOR_RENDER, VISIBLE_VERTEX_COLOR, OCCLUDED_VERTEX_COLOR, MIN_BBOX_AREA_IN_PX
from utils import degrees_to_radians
import logging
def bbox_2d_from_agent(agent, intrinsic_mat, extrinsic_mat, ext, bbox_transform, agent_transform, rotRP): # rotRP expects point to be in Kitti lidar format
""" Creates bounding boxes for a given agent and camera/world calibration matrices.
Returns the modified image that contains the screen rendering with drawn on vertices from the agent """
bbox = vertices_from_extension(ext)
# transform the vertices respect to the bounding box transform
bbox = bbox_transform.transform_points(bbox)
# the bounding box transform is respect to the agents transform
# so let's transform the points relative to it's transform
bbox = agent_transform.transform_points(bbox)
# agents's transform is relative to the world, so now,
# bbox contains the 3D bounding box vertices relative to the world
# Additionally, you can logging.info these vertices to check that is working
# Store each vertex 2d points for drawing bounding boxes later
vertices_pos2d = vertices_to_2d_coords(bbox, intrinsic_mat, extrinsic_mat)
return vertices_pos2d
def calculate_occlusion_stats(image, vertices_pos2d, depth_map, draw_vertices=True):
""" Draws each vertex in vertices_pos2d if it is in front of the camera
The color is based on whether the object is occluded or not.
Returns the number of visible vertices and the number of vertices outside the camera.
"""
num_visible_vertices = 0
num_vertices_outside_camera = 0
for y_2d, x_2d, vertex_depth in vertices_pos2d:
# if the point is in front of the camera but not too far away
if MAX_RENDER_DEPTH_IN_METERS > vertex_depth > 0 and point_in_canvas((y_2d, x_2d)):
is_occluded = point_is_occluded(
(y_2d, x_2d), vertex_depth, depth_map)
if is_occluded:
vertex_color = OCCLUDED_VERTEX_COLOR
else:
num_visible_vertices += 1
vertex_color = VISIBLE_VERTEX_COLOR
if draw_vertices:
draw_rect(image, (y_2d, x_2d), 4, vertex_color)
else:
num_vertices_outside_camera += 1
return num_visible_vertices, num_vertices_outside_camera
def create_kitti_datapoint(agent, intrinsic_mat, extrinsic_mat, image, depth_image, player_measurements, rotRP, draw_3D_bbox=True):
""" Calculates the bounding box of the given agent, and returns a KittiDescriptor which describes the object to be labeled """
obj_type, agent_transform, bbox_transform, ext, location = transforms_from_agent(
agent)
if obj_type is None:
logging.warning(
"Could not get bounding box for agent. Object type is None")
return image, None
vertices_pos2d = bbox_2d_from_agent(
agent, intrinsic_mat, extrinsic_mat, ext, bbox_transform, agent_transform, rotRP)
depth_map = depth_image.data * 1000
num_visible_vertices, num_vertices_outside_camera = calculate_occlusion_stats(
image, vertices_pos2d, depth_map, draw_vertices=draw_3D_bbox)
midpoint = midpoint_from_agent_location(
image, location, extrinsic_mat, intrinsic_mat)
# At least N vertices has to be visible in order to draw bbox
if num_visible_vertices >= MIN_VISIBLE_VERTICES_FOR_RENDER and num_vertices_outside_camera < MIN_VISIBLE_VERTICES_FOR_RENDER:
midpoint[:3] = rotRP * midpoint[:3]
bbox_2d = calc_projected_2d_bbox(vertices_pos2d)
area = calc_bbox2d_area(bbox_2d)
if area < MIN_BBOX_AREA_IN_PX:
logging.info("Filtered out bbox with too low area {}".format(area))
return image, None
if draw_3D_bbox:
draw_3d_bounding_box(image, vertices_pos2d)
from math import pi
rotation_y = get_relative_rotation_y(agent, player_measurements) % pi
datapoint = KittiDescriptor()
datapoint.set_bbox(bbox_2d)
datapoint.set_3d_object_dimensions(ext)
datapoint.set_type(obj_type)
datapoint.set_3d_object_location(midpoint)
datapoint.set_rotation_y(rotation_y)
return image, datapoint
else:
return image, None
def get_relative_rotation_y(agent, player_measurements):
""" Returns the relative rotation of the agent to the camera in yaw
The relative rotation is the difference between the camera rotation (on car) and the agent rotation"""
# We only car about the rotation for the classes we do detection on
if agent.vehicle.transform:
rot_agent = agent.vehicle.transform.rotation.yaw
rot_car = player_measurements.transform.rotation.yaw
return degrees_to_radians(rot_agent - rot_car)
def vertices_to_2d_coords(bbox, intrinsic_mat, extrinsic_mat):
""" Accepts a bbox which is a list of 3d world coordinates and returns a list
of the 2d pixel coordinates of each vertex.
This is represented as a tuple (y, x, d) where y and x are the 2d pixel coordinates
while d is the depth. The depth can be used for filtering visible vertices.
"""
vertices_pos2d = []
for vertex in bbox:
pos_vector = vertex_to_world_vector(vertex)
# Camera coordinates
transformed_3d_pos = proj_to_camera(pos_vector, extrinsic_mat)
# 2d pixel coordinates
pos2d = proj_to_2d(transformed_3d_pos, intrinsic_mat)
# The actual rendered depth (may be wall or other object instead of vertex)
vertex_depth = pos2d[2]
x_2d, y_2d = WINDOW_WIDTH - pos2d[0], WINDOW_HEIGHT - pos2d[1]
vertices_pos2d.append((y_2d, x_2d, vertex_depth))
return vertices_pos2d
def vertex_to_world_vector(vertex):
""" Returns the coordinates of the vector in correct carla world format (X,Y,Z,1) """
return np.array([
[vertex[0, 0]], # [[X,
[vertex[0, 1]], # Y,
[vertex[0, 2]], # Z,
[1.0] # 1.0]]
])
def vertices_from_extension(ext):
""" Extraxts the 8 bounding box vertices relative to (0,0,0)
https://github.com/carla-simulator/carla/commits/master/Docs/img/vehicle_bounding_box.png
8 bounding box vertices relative to (0,0,0)
"""
return np.array([
[ext.x, ext.y, ext.z], # Top left front
[- ext.x, ext.y, ext.z], # Top left back
[ext.x, - ext.y, ext.z], # Top right front
[- ext.x, - ext.y, ext.z], # Top right back
[ext.x, ext.y, - ext.z], # Bottom left front
[- ext.x, ext.y, - ext.z], # Bottom left back
[ext.x, - ext.y, - ext.z], # Bottom right front
[- ext.x, - ext.y, - ext.z] # Bottom right back
])
def transforms_from_agent(agent):
""" Returns the KITTI object type and transforms, locations and extension of the given agent """
if agent.HasField('pedestrian'):
obj_type = 'Pedestrian'
agent_transform = Transform(agent.pedestrian.transform)
bbox_transform = Transform(agent.pedestrian.bounding_box.transform)
ext = agent.pedestrian.bounding_box.extent
location = agent.pedestrian.transform.location
elif agent.HasField('vehicle'):
obj_type = 'Car'
agent_transform = Transform(agent.vehicle.transform)
bbox_transform = Transform(agent.vehicle.bounding_box.transform)
ext = agent.vehicle.bounding_box.extent
location = agent.vehicle.transform.location
else:
return (None, None, None, None, None)
return obj_type, agent_transform, bbox_transform, ext, location
def calc_bbox2d_area(bbox_2d):
""" Calculate the area of the given 2d bbox
Input is assumed to be xmin, ymin, xmax, ymax tuple
"""
xmin, ymin, xmax, ymax = bbox_2d
return (ymax - ymin) * (xmax - xmin)