forked from zeyuanxy/fast-rcnn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfast_rcnn_im_detect.m
134 lines (114 loc) · 4.11 KB
/
fast_rcnn_im_detect.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
% --------------------------------------------------------
% Fast R-CNN
% Copyright (c) 2015 Microsoft
% Licensed under The MIT License [see LICENSE for details]
% Written by Ross Girshick
% --------------------------------------------------------
function dets = fast_rcnn_im_detect(model, im, boxes)
% Perform detection a Fast R-CNN network given an image and
% object proposals.
if model.init_key ~= caffe('get_init_key')
error('You probably need call fast_rcnn_load_net() first.');
end
[im_batch, scales] = image_pyramid(im, model.pixel_means, false);
[feat_pyra_boxes, feat_pyra_levels] = project_im_rois(boxes, scales);
rois = cat(2, feat_pyra_levels, feat_pyra_boxes);
% Adjust to 0-based indexing and make roi info the fastest dimension
rois = rois - 1;
rois = permute(rois, [2 1]);
input_blobs = cell(2, 1);
input_blobs{1} = im_batch;
input_blobs{2} = rois;
th = tic();
blobs_out = caffe('forward', input_blobs);
fprintf('fwd: %.3fs\n', toc(th));
bbox_deltas = squeeze(blobs_out{1})';
probs = squeeze(blobs_out{2})';
num_classes = size(probs, 2);
dets = cell(num_classes - 1, 1);
NMS_THRESH = 0.3;
% class index 1 is __background__, so we don't return it
for j = 2:num_classes
cls_probs = probs(:, j);
cls_deltas = bbox_deltas(:, (1 + (j - 1) * 4):(j * 4));
pred_boxes = bbox_pred(boxes, cls_deltas);
cls_dets = [pred_boxes cls_probs];
keep = nms(cls_dets, NMS_THRESH);
cls_dets = cls_dets(keep, :);
dets{j - 1} = cls_dets;
end
% ------------------------------------------------------------------------
function [batch, scales] = image_pyramid(im, pixel_means, multiscale)
% ------------------------------------------------------------------------
% Construct an image pyramid that's ready for feeding directly into caffe
if ~multiscale
SCALES = [600];
MAX_SIZE = 1000;
else
SCALES = [1200 864 688 576 480];
MAX_SIZE = 2000;
end
num_levels = length(SCALES);
im = single(im);
% Convert to BGR
im = im(:, :, [3 2 1]);
% Subtract mean (mean of the image mean--one mean per channel)
im = bsxfun(@minus, im, pixel_means);
im_orig = im;
im_size = min([size(im_orig, 1) size(im_orig, 2)]);
im_size_big = max([size(im_orig, 1) size(im_orig, 2)]);
scale_factors = SCALES ./ im_size;
max_size = [0 0 0];
for i = 1:num_levels
if round(im_size_big * scale_factors(i)) > MAX_SIZE
scale_factors(i) = MAX_SIZE / im_size_big;
end
ims{i} = imresize(im_orig, scale_factors(i), 'bilinear', ...
'antialiasing', false);
max_size = max(cat(1, max_size, size(ims{i})), [], 1);
end
batch = zeros(max_size(2), max_size(1), 3, num_levels, 'single');
for i = 1:num_levels
im = ims{i};
im_sz = size(im);
im_sz = im_sz(1:2);
% Make width the fastest dimension (for caffe)
im = permute(im, [2 1 3]);
batch(1:im_sz(2), 1:im_sz(1), :, i) = im;
end
scales = scale_factors';
% ------------------------------------------------------------------------
function [boxes, levels] = project_im_rois(boxes, scales)
% ------------------------------------------------------------------------
widths = boxes(:,3) - boxes(:,1) + 1;
heights = boxes(:,4) - boxes(:,2) + 1;
areas = widths .* heights;
scaled_areas = bsxfun(@times, areas, (scales.^2)');
diff_areas = abs(scaled_areas - (224 * 224));
[~, levels] = min(diff_areas, [], 2);
boxes = boxes - 1;
boxes = bsxfun(@times, boxes, scales(levels));
boxes = boxes + 1;
% ------------------------------------------------------------------------
function pred_boxes = bbox_pred(boxes, bbox_deltas)
% ------------------------------------------------------------------------
if isempty(boxes)
pred_boxes = [];
return;
end
Y = bbox_deltas;
% Read out predictions
dst_ctr_x = Y(:, 1);
dst_ctr_y = Y(:, 2);
dst_scl_x = Y(:, 3);
dst_scl_y = Y(:, 4);
src_w = boxes(:, 3) - boxes(:, 1) + eps;
src_h = boxes(:, 4) - boxes(:, 2) + eps;
src_ctr_x = boxes(:, 1) + 0.5 * src_w;
src_ctr_y = boxes(:, 2) + 0.5 * src_h;
pred_ctr_x = (dst_ctr_x .* src_w) + src_ctr_x;
pred_ctr_y = (dst_ctr_y .* src_h) + src_ctr_y;
pred_w = exp(dst_scl_x) .* src_w;
pred_h = exp(dst_scl_y) .* src_h;
pred_boxes = [pred_ctr_x - 0.5 * pred_w, pred_ctr_y - 0.5 * pred_h, ...
pred_ctr_x + 0.5 * pred_w, pred_ctr_y + 0.5 * pred_h];