diff --git a/utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/0004/mappings.json b/utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/0004/mappings.json new file mode 100644 index 000000000000..f6a0aa87964b --- /dev/null +++ b/utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/0004/mappings.json @@ -0,0 +1,5 @@ +{ + "label_map": { + "1": "text" + } +} diff --git a/utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/0004/pixel_link_mobilenet_v2.py b/utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/0004/pixel_link_mobilenet_v2.py new file mode 100644 index 000000000000..9e3619a6725b --- /dev/null +++ b/utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/0004/pixel_link_mobilenet_v2.py @@ -0,0 +1,195 @@ +import cv2 +import numpy as np + + +class PixelLinkDecoder(): + def __init__(self): + four_neighbours = False + if four_neighbours: + self._get_neighbours = self._get_neighbours_4 + else: + self._get_neighbours = self._get_neighbours_8 + self.pixel_conf_threshold = 0.8 + self.link_conf_threshold = 0.8 + + def decode(self, height, width, detections: dict): + self.image_height = height + self.image_width = width + self.pixel_scores = self._set_pixel_scores(detections['model/segm_logits/add']) + self.link_scores = self._set_link_scores(detections['model/link_logits_/add']) + + self.pixel_mask = self.pixel_scores >= self.pixel_conf_threshold + self.link_mask = self.link_scores >= self.link_conf_threshold + self.points = list(zip(*np.where(self.pixel_mask))) + self.h, self.w = np.shape(self.pixel_mask) + self.group_mask = dict.fromkeys(self.points, -1) + self.bboxes = None + self.root_map = None + self.mask = None + + self._decode() + + def _softmax(self, x, axis=None): + return np.exp(x - self._logsumexp(x, axis=axis, keepdims=True)) + + # pylint: disable=no-self-use + def _logsumexp(self, a, axis=None, b=None, keepdims=False, return_sign=False): + if b is not None: + a, b = np.broadcast_arrays(a, b) + if np.any(b == 0): + a = a + 0. # promote to at least float + a[b == 0] = -np.inf + + a_max = np.amax(a, axis=axis, keepdims=True) + + if a_max.ndim > 0: + a_max[~np.isfinite(a_max)] = 0 + elif not np.isfinite(a_max): + a_max = 0 + + if b is not None: + b = np.asarray(b) + tmp = b * np.exp(a - a_max) + else: + tmp = np.exp(a - a_max) + + # suppress warnings about log of zero + with np.errstate(divide='ignore'): + s = np.sum(tmp, axis=axis, keepdims=keepdims) + if return_sign: + sgn = np.sign(s) + s *= sgn # /= makes more sense but we need zero -> zero + out = np.log(s) + + if not keepdims: + a_max = np.squeeze(a_max, axis=axis) + out += a_max + + if return_sign: + return out, sgn + else: + return out + + def _set_pixel_scores(self, pixel_scores): + "get softmaxed properly shaped pixel scores" + tmp = np.transpose(pixel_scores, (0, 2, 3, 1)) + return self._softmax(tmp, axis=-1)[0, :, :, 1] + + def _set_link_scores(self, link_scores): + "get softmaxed properly shaped links scores" + tmp = np.transpose(link_scores, (0, 2, 3, 1)) + tmp_reshaped = tmp.reshape(tmp.shape[:-1] + (8, 2)) + return self._softmax(tmp_reshaped, axis=-1)[0, :, :, :, 1] + + def _find_root(self, point): + root = point + update_parent = False + tmp = self.group_mask[root] + while tmp is not -1: + root = tmp + tmp = self.group_mask[root] + update_parent = True + if update_parent: + self.group_mask[point] = root + return root + + def _join(self, p1, p2): + root1 = self._find_root(p1) + root2 = self._find_root(p2) + if root1 != root2: + self.group_mask[root2] = root1 + + def _get_index(self, root): + if root not in self.root_map: + self.root_map[root] = len(self.root_map) + 1 + return self.root_map[root] + + def _get_all(self): + self.root_map = {} + self.mask = np.zeros_like(self.pixel_mask, dtype=np.int32) + + for point in self.points: + point_root = self._find_root(point) + bbox_idx = self._get_index(point_root) + self.mask[point] = bbox_idx + + def _get_neighbours_8(self, x, y): + w, h = self.w, self.h + tmp = [(0, x - 1, y - 1), (1, x, y - 1), + (2, x + 1, y - 1), (3, x - 1, y), + (4, x + 1, y), (5, x - 1, y + 1), + (6, x, y + 1), (7, x + 1, y + 1)] + + return [i for i in tmp if i[1] >= 0 and i[1] < w and i[2] >= 0 and i[2] < h] + + def _get_neighbours_4(self, x, y): + w, h = self.w, self.h + tmp = [(1, x, y - 1), + (3, x - 1, y), + (4, x + 1, y), + (6, x, y + 1)] + + return [i for i in tmp if i[1] >= 0 and i[1] < w and i[2] >= 0 and i[2] < h] + + def _mask_to_bboxes(self, min_area=300, min_height=10): + self.bboxes = [] + max_bbox_idx = self.mask.max() + mask_tmp = cv2.resize(self.mask, (self.image_width, self.image_height), interpolation=cv2.INTER_NEAREST) + + for bbox_idx in range(1, max_bbox_idx + 1): + bbox_mask = mask_tmp == bbox_idx + cnts, _ = cv2.findContours(bbox_mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + if len(cnts) == 0: + continue + cnt = cnts[0] + rect, w, h = self._min_area_rect(cnt) + if min(w, h) < min_height: + continue + if w * h < min_area: + continue + self.bboxes.append(self._order_points(rect)) + + # pylint: disable=no-self-use + def _min_area_rect(self, cnt): + rect = cv2.minAreaRect(cnt) + w, h = rect[1] + box = cv2.boxPoints(rect) + box = np.int0(box) + return box, w, h + + # pylint: disable=no-self-use + def _order_points(self, rect): + """ (x, y) + Order: TL, TR, BR, BL + """ + tmp = np.zeros_like(rect) + sums = rect.sum(axis=1) + tmp[0] = rect[np.argmin(sums)] + tmp[2] = rect[np.argmax(sums)] + diff = np.diff(rect, axis=1) + tmp[1] = rect[np.argmin(diff)] + tmp[3] = rect[np.argmax(diff)] + return tmp + + def _decode(self): + for point in self.points: + y, x = point + neighbours = self._get_neighbours(x, y) + for n_idx, nx, ny in neighbours: + link_value = self.link_mask[y, x, n_idx] + pixel_cls = self.pixel_mask[ny, nx] + if link_value and pixel_cls: + self._join(point, (ny, nx)) + + self._get_all() + self._mask_to_bboxes() + + +label = 1 +pcd = PixelLinkDecoder() +for detection in detections: + frame = detection['frame_id'] + pcd.decode(detection['frame_height'], detection['frame_width'], detection['detections']) + for box in pcd.bboxes: + box = [[int(b[0]), int(b[1])] for b in box] + results.add_polygon(box, label, frame) diff --git a/utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/0001/README.md b/utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/README.md similarity index 100% rename from utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/0001/README.md rename to utils/open_model_zoo/Retail/object_detection/text/pixel_link_mobilenet_v2/README.md