Skip to content

Commit

Permalink
allow evaluating forest at lower resolution for faster framerate
Browse files Browse the repository at this point in the history
  • Loading branch information
carsonswope committed Sep 10, 2021
1 parent 74aa84e commit 3fd2bf9
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 49 deletions.
34 changes: 21 additions & 13 deletions src/3d_bz.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def __init__(self):

self.midi = Midi()

self.LABELS_REDUCE = 2

self.NO_DEBUG = args.no_debug

self.NUM_RANDOM_GUESSES = args.plane_num_iterations or 25000
Expand All @@ -67,7 +69,7 @@ def __init__(self):

print('loading forest')

self.layered_rdf = LayeredDecisionForest.load(args.cfg, (480, 848))
self.layered_rdf = LayeredDecisionForest.load(args.cfg, (480, 848), labels_reduce = self.LABELS_REDUCE)

self.points_ops = PointsOps()

Expand All @@ -94,11 +96,15 @@ def __init__(self):
self.coord_croups_cpu = np.zeros((self.depth_mm_dims[0] * self.depth_mm_dims[1], 3), dtype=np.int32)
self.coord_groups_gpu = GpuBuffer((self.depth_mm_dims[0] * self.depth_mm_dims[1], 3), dtype=np.int32)

self.labels_image = GpuBuffer((self.DIM_Y, self.DIM_X), dtype=np.uint16)
self.labels_image_2 = GpuBuffer((self.DIM_Y, self.DIM_X), dtype=np.uint16)
self.labels_image_rgba_cpu = np.zeros((self.DIM_Y, self.DIM_X, 4), dtype=np.uint8)
self.labels_image_rgba = GpuBuffer((self.DIM_Y, self.DIM_X, 4), dtype=np.uint8)
self.labels_image_rgba_tex = GpuTexture((self.DIM_X, self.DIM_Y), (GL_RGBA, GL_UNSIGNED_BYTE))
self.LABELS_DIM_X = self.DIM_X // self.LABELS_REDUCE
self.LABELS_DIM_Y = self.DIM_Y // self.LABELS_REDUCE
self.LABELS_DIM = (self.LABELS_DIM_Y, self.LABELS_DIM_X)

self.labels_image = GpuBuffer(self.LABELS_DIM, dtype=np.uint16)
self.labels_image_2 = GpuBuffer(self.LABELS_DIM, dtype=np.uint16)
self.labels_image_rgba_cpu = np.zeros(self.LABELS_DIM + (4,), dtype=np.uint8)
self.labels_image_rgba = GpuBuffer(self.LABELS_DIM + (4,), dtype=np.uint8)
self.labels_image_rgba_tex = GpuTexture((self.LABELS_DIM_X, self.LABELS_DIM_Y), (GL_RGBA, GL_UNSIGNED_BYTE))

mean_shift_variances = np.array(
[100., 50., 50., 50., 50., 50., 50.],
Expand Down Expand Up @@ -296,7 +302,7 @@ def tick(self, _):
window_pad = 24 * self.dpi_scale

imgui.push_style_var(imgui.STYLE_WINDOW_PADDING, (window_pad, window_pad))
imgui.set_next_window_position(0, 0)#self.DIM_Y * self.dpi_scale)
imgui.set_next_window_position(0, 0)
imgui.set_next_window_size(self.width * self.dpi_scale, 220 * self.dpi_scale)
imgui.set_next_window_bg_alpha(0.3)
imgui.begin('Hand state', flags= imgui.WINDOW_NO_MOVE | imgui.WINDOW_NO_RESIZE | imgui.WINDOW_NO_TITLE_BAR | imgui.WINDOW_NO_SCROLLBAR)
Expand Down Expand Up @@ -422,28 +428,28 @@ def run_per_hand_pipeline(self, g_id, flip_x):
if flip_x:
self.labels_image_2.cu().set(self.labels_image.cu())
self.points_ops.flip_x(
np.array([self.DIM_X, self.DIM_Y], dtype=np.int32),
np.array([self.LABELS_DIM_X, self.LABELS_DIM_Y], dtype=np.int32),
self.labels_image_2.cu(),
self.labels_image.cu(),
grid=make_grid((self.DIM_X, self.DIM_Y, 1), (32, 32, 1)),
grid=make_grid((self.LABELS_DIM_X, self.LABELS_DIM_Y, 1), (32, 32, 1)),
block=(32, 32, 1))

self.points_ops.make_rgba_from_labels(
np.uint32(self.DIM_X),
np.uint32(self.DIM_Y),
np.uint32(self.LABELS_DIM_X),
np.uint32(self.LABELS_DIM_Y),
np.uint32(self.layered_rdf.num_layered_classes),
self.labels_image.cu(),
self.layered_rdf.label_colors.cu(),
self.labels_image_rgba.cu(),
grid = ((self.DIM_X // 32) + 1, (self.DIM_Y // 32) + 1, 1),
grid = ((self.LABELS_DIM_X // 32) + 1, (self.LABELS_DIM_Y // 32) + 1, 1),
block = (32,32,1))

# self.cu_ctx.synchronize()
# self.t.record('--mean shift')

label_means = self.mean_shift.run(
self.mean_shift_rounds,
self.labels_image.cu().reshape((1, self.DIM_Y, self.DIM_X)),
self.labels_image.cu().reshape((1, self.LABELS_DIM_Y, self.LABELS_DIM_X)),
self.layered_rdf.num_layered_classes,
self.mean_shift_variances)

Expand Down Expand Up @@ -486,6 +492,8 @@ def run_per_hand_pipeline(self, g_id, flip_x):
for i, f_idx in zip(range(len(self.fingertip_idxes)), self.fingertip_idxes):

px, py = label_means[f_idx-1].astype(np.int32)
px *= self.LABELS_REDUCE
py *= self.LABELS_REDUCE
if px < 0 or py < 0 or px >= self.DIM_X or py >= self.DIM_Y:
hand_state.fingertips[i].reset_positions()
else:
Expand Down
32 changes: 18 additions & 14 deletions src/cuda/tree_eval.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,24 @@ extern "C" {__global__
void evaluate_image_using_forest(
int NUM_TREES,
int NUM_IMAGES,
int IMG_DIM_X,
int IMG_DIM_Y,
int depth_dim_x,
int depth_dim_y,
int NUM_CLASSES,
int MAX_TREE_DEPTH,
int BLOCK_DIM_X,
uint16* _img_in,
int filter_class,
uint16* _filter,
float* _forest,
uint16* _labels_out)
uint16* _labels_out,
int labels_reduce)
{

extern __shared__ float _thread_pdf[];
Array2d<float> thread_pdf(_thread_pdf, {BLOCK_DIM_X, NUM_CLASSES});

const int2 IMG_DIM{IMG_DIM_X, IMG_DIM_Y};
const int TOTAL_NUM_PIXELS = NUM_IMAGES * IMG_DIM.x * IMG_DIM.y;
const int2 labels_img_dim{depth_dim_x / labels_reduce, depth_dim_y / labels_reduce};
const int TOTAL_NUM_PIXELS = NUM_IMAGES * labels_img_dim.x * labels_img_dim.y;
const int TREE_NODE_ELS = 7 + NUM_CLASSES + NUM_CLASSES; // (ux,uy,vx,vy,thresh,l_next,r_next,{l_pdf},{r_pdf})

const int i = blockIdx.x * blockDim.x + threadIdx.x;
Expand All @@ -59,13 +60,16 @@ extern "C" {__global__

__syncthreads();

const int img_idx = i / (IMG_DIM.x * IMG_DIM.y);
const int i_rem = i % (IMG_DIM.x * IMG_DIM.y);
const int img_y = i_rem / IMG_DIM.x;
const int img_x = i_rem % IMG_DIM.x;
const int img_idx = i / (labels_img_dim.x * labels_img_dim.y);
const int i_rem = i % (labels_img_dim.x * labels_img_dim.y);
const int img_y = i_rem / labels_img_dim.x;
const int img_x = i_rem % labels_img_dim.x;

Array3d<uint16> img_in(_img_in, {NUM_IMAGES,IMG_DIM_Y,IMG_DIM_X}, MAX_UINT16);
Array3d<uint16> labels_out(_labels_out, {NUM_IMAGES,IMG_DIM_Y,IMG_DIM_X});
const int depth_img_y = img_y * labels_reduce;
const int depth_img_x = img_x * labels_reduce;

Array3d<uint16> img_in(_img_in, {NUM_IMAGES,depth_dim_y,depth_dim_x}, MAX_UINT16);
Array3d<uint16> labels_out(_labels_out, {NUM_IMAGES,labels_img_dim.y,labels_img_dim.x});

const int TOTAL_TREE_NODES = (1 << MAX_TREE_DEPTH) - 1;

Expand All @@ -74,13 +78,13 @@ extern "C" {__global__

// Don't try to evaluate if filtering by a filter image!
if (filter_class != -1) {
Array3d<uint16> filter(_filter, {NUM_IMAGES,IMG_DIM_Y,IMG_DIM_X}, MAX_UINT16);
Array3d<uint16> filter(_filter, {NUM_IMAGES,labels_img_dim.y,labels_img_dim.x}, MAX_UINT16);
const uint16 img_label = filter.get({img_idx, img_y, img_x});
if ((int)img_label != filter_class) { return; }
}

// Don't try to evaluate if img in has 0 value!
const uint16 img_d = img_in.get({img_idx, img_y, img_x});
const uint16 img_d = img_in.get({img_idx, depth_img_y, depth_img_x});
if (img_d == 0 || img_d == MAX_UINT16) { return; } // max uint16 is also considered 'pixel not present'

// current node ID
Expand All @@ -96,7 +100,7 @@ extern "C" {__global__
const int l_next = __float2int_rd(d_ptr[5]);
const int r_next = __float2int_rd(d_ptr[6]);

const float f = compute_feature(img_in, img_idx, int2{img_x, img_y}, u, v);
const float f = compute_feature(img_in, img_idx, int2{depth_img_x, depth_img_y}, u, v);
float* final_pdf = nullptr;

if (f < thresh) {
Expand Down
40 changes: 25 additions & 15 deletions src/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,21 @@ def __init__(self, num_trees, max_depth, num_classes):
# comes with gpu memory
class LayeredDecisionForest():
@staticmethod
def load(config_filename, eval_dims):
def load(config_filename, depth_dims, labels_reduce=1):
cfg = json.loads(open(config_filename).read())
# models are loaded 1-by-1 from paths with parent directory as a root
cfg['root'] = os.path.join(*Path(config_filename).parts[0:-1])
return LayeredDecisionForest(cfg, eval_dims)
return LayeredDecisionForest(cfg, depth_dims, labels_reduce)

def __init__(self, cfg, eval_dims):
def __init__(self, cfg, depth_dims, labels_reduce):

self.eval = DecisionTreeEvaluator()
self.eval_dims = eval_dims # y,x !!

self.depth_dims = depth_dims # y,x !!

self.labels_reduce = labels_reduce
self.labels_dims = (depth_dims[0] // labels_reduce, depth_dims[1] // labels_reduce)

self.m = []
for l in cfg['layers']:
# model path is relative to config file itself
Expand All @@ -195,7 +200,7 @@ def __init__(self, cfg, eval_dims):

self.num_models = len(self.m)

self.label_images = [GpuBuffer(eval_dims, dtype=np.uint16) for _ in range(self.num_models)]
self.label_images = [GpuBuffer(self.labels_dims, dtype=np.uint16) for _ in range(self.num_models)]

self.labels_images_ptrs_cu = GpuBuffer((self.num_models,), dtype=np.int64)
label_images_ptrs = np.array([i.cu().__cuda_array_interface__['data'][0] for i in self.label_images], dtype=np.int64)
Expand Down Expand Up @@ -235,25 +240,27 @@ def run(self, depth_image, labels_image):
i.cu().fill(MAX_UINT16)

# first dim: image id. only one image!
dims = (1,) + self.eval_dims
depth_img_dims = (1,) + self.depth_dims
label_img_dims = (1,) + self.labels_dims

for i in range(self.num_models):
m, filter_model, filter_model_class = self.m[i]
single_labels_image = self.label_images[i]

self.eval.get_labels_forest(
m,
depth_image.cu().reshape(dims),
single_labels_image.cu().reshape(dims),
filter_images=self.label_images[filter_model].cu().reshape(dims) if (filter_model is not None) else None,
depth_image.cu().reshape(depth_img_dims),
single_labels_image.cu().reshape(label_img_dims),
labels_reduce=self.labels_reduce,
filter_images=self.label_images[filter_model].cu().reshape(label_img_dims) if (filter_model is not None) else None,
filter_images_class=filter_model_class)

self.eval.make_composite_labels_image(
self.labels_images_ptrs_cu.cu(),
self.eval_dims[1],
self.eval_dims[0],
self.labels_dims[1],
self.labels_dims[0],
self.labels_conditions_cu.cu(),
labels_image.cu().reshape(dims))
labels_image.cu().reshape(label_img_dims))

# def eval()
class DecisionTreeEvaluator():
Expand Down Expand Up @@ -287,14 +294,16 @@ def get_labels(self, tree, depth_images_in, labels_out):


# TODO: support filter image for single tree forest! or not??
def get_labels_forest(self, forest, depth_images_in, labels_out, filter_images=None, filter_images_class=None):
def get_labels_forest(self, forest, depth_images_in, labels_out, labels_reduce = 1, filter_images=None, filter_images_class=None):
num_images, dim_y, dim_x = depth_images_in.shape

assert labels_out.shape == (num_images, dim_y // labels_reduce, dim_x // labels_reduce)

if filter_images is not None:
assert filter_images_class is not None
assert filter_images.shape == depth_images_in.shape
assert filter_images.shape == labels_out.shape

num_test_pixels = num_images * dim_y * dim_x
num_test_pixels = num_images * (dim_y // labels_reduce) * (dim_x // labels_reduce)

BLOCK_DIM_X = int(MAX_THREADS_PER_BLOCK // forest.num_trees)
grid_dim = (int(num_test_pixels // BLOCK_DIM_X) + 1, 1, 1)
Expand All @@ -315,6 +324,7 @@ def get_labels_forest(self, forest, depth_images_in, labels_out, filter_images=N
f_img,
forest.forest_cu,
labels_out,
np.int32(labels_reduce),
grid=grid_dim, block=block_dim, shared=(BLOCK_DIM_X * forest.num_classes * 4)) # sizeof(float), right?


Expand Down
33 changes: 26 additions & 7 deletions src/run_live_layered.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

class RunLive_Layered(AppBase):
def __init__(self):
super().__init__(title="Layered RDF Demo")
super().__init__(title="Layered RDF Demo", width=848, height=800)

parser = argparse.ArgumentParser(description='Train a classifier RDF for depth images')
parser.add_argument('-cfg', nargs='?', required=True, type=str, help='Path to the layered decision forest config file')
Expand All @@ -35,16 +35,20 @@ def __init__(self):

self.pipeline, self.depth_intrin, self.DIM_X, self.DIM_Y, self.FOCAL, self.PP = rs_util.start_stream(args)

self.layered_rdf = LayeredDecisionForest.load(args.cfg, (self.DIM_Y, self.DIM_X))
self.LABELS_REDUCE = 2

self.layered_rdf = LayeredDecisionForest.load(args.cfg, (self.DIM_Y, self.DIM_X), self.LABELS_REDUCE)
self.points_ops = PointsOps()

self.pts = GpuBuffer((self.DIM_Y, self.DIM_X, 4), dtype=np.float32)

self.depth_image = GpuBuffer((1, self.DIM_Y, self.DIM_X), np.uint16)
self.labels_image = GpuBuffer((1, self.DIM_Y, self.DIM_X), dtype=np.uint16)

self.labels_image_rgba = GpuBuffer((self.DIM_Y, self.DIM_X, 4), dtype=np.uint8)
self.labels_image_rgba_tex = GpuTexture((self.DIM_X, self.DIM_Y), (GL_RGBA, GL_UNSIGNED_BYTE))

self.labels_image = GpuBuffer((1, self.DIM_Y // self.LABELS_REDUCE, self.DIM_X // self.LABELS_REDUCE), dtype=np.uint16)

self.labels_image_rgba = GpuBuffer((self.DIM_Y // self.LABELS_REDUCE, self.DIM_X // self.LABELS_REDUCE, 4), dtype=np.uint8)
self.labels_image_rgba_tex = GpuTexture((self.DIM_X // self.LABELS_REDUCE, self.DIM_Y // self.LABELS_REDUCE), (GL_RGBA, GL_UNSIGNED_BYTE))

self.frame_num = 0

Expand Down Expand Up @@ -123,8 +127,8 @@ def tick(self, t):
# make RGBA image
self.labels_image_rgba.cu().fill(0)
self.points_ops.make_rgba_from_labels(
np.uint32(self.DIM_X),
np.uint32(self.DIM_Y),
np.uint32(self.DIM_X // self.LABELS_REDUCE),
np.uint32(self.DIM_Y // self.LABELS_REDUCE),
np.uint32(self.layered_rdf.num_layered_classes),
self.labels_image.cu(),
self.layered_rdf.label_colors.cu(),
Expand All @@ -135,7 +139,22 @@ def tick(self, t):

self.frame_num += 1

self.begin_imgui_main()
imgui.image(self.labels_image_rgba_tex.gl(), self.DIM_X * self.dpi_scale, self.DIM_Y * self.dpi_scale)
imgui.end()

imgui.set_next_window_size(200 * self.dpi_scale, 124 * self.dpi_scale)
imgui.set_next_window_bg_alpha(0.3)
imgui.begin('profile', imgui.WINDOW_NO_RESIZE | imgui.WINDOW_NO_TITLE_BAR | imgui.WINDOW_NO_SCROLLBAR)
profile_plot_width = 150 * self.dpi_scale
profile_plot_height = 60 * self.dpi_scale
imgui.text(f'ms/frame: {"{:.1f}".format(self.ms_per_frame_log[-1])}')
imgui.plot_lines('##ms-frame',
np.array(self.ms_per_frame_log, dtype=np.float32),
scale_max=100.,
scale_min=0.,
graph_size=(profile_plot_width , profile_plot_height))
imgui.end()

if __name__ == '__main__':
run_app(RunLive_Layered)

0 comments on commit 3fd2bf9

Please sign in to comment.