diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py index 335684ee97..21fde1167c 100644 --- a/fiftyone/utils/cvat.py +++ b/fiftyone/utils/cvat.py @@ -1587,6 +1587,31 @@ def from_image_dict(cls, d): ) +class HasCVATBinaryMask(object): + """Mixin for CVAT annotations that store RLE format instance masks.""" + + @staticmethod + def _rle_to_binary_image_mask(rle, mask_width, mask_height): + mask = np.zeros(mask_width * mask_height, dtype=np.uint8) + counter = 0 + for i, val in enumerate(rle): + if i % 2 == 1: + mask[counter : counter + val] = 1 + counter += val + return mask.reshape(mask_height, mask_width) + + @staticmethod + def _mask_to_cvat_rle(binary_mask): + counts = [] + for i, (value, elements) in enumerate( + itertools.groupby(binary_mask.ravel(order="C")) + ): + if i == 0 and value == 1: + counts.append(0) + counts.append(len(list(elements))) + return counts + + class HasCVATPoints(object): """Mixin for CVAT annotations that store a list of ``(x, y)`` pixel coordinates. @@ -5919,6 +5944,9 @@ def _parse_annotation( if shape_type == "rectangle": label_type = "detections" label = cvat_shape.to_detection() + elif shape_type == "mask": + label_type = "detections" + label = cvat_shape.to_instance() elif shape_type == "polygon": if expected_label_type == "segmentation": # A piece of a segmentation mask @@ -6430,24 +6458,23 @@ def _create_detection_shapes( if det.has_mask is None: continue - polygon = det.to_polyline() - for points in polygon.points: - if len(points) < 3: - continue # CVAT polygons must contain >= 3 points + if self._server_version >= Version("2.3"): + x, y, _, _ = det.bounding_box + frame_width, frame_height = frame_size + mask_height, mask_width = det.mask.shape + xtl, ytl = round(x * frame_width), round(y * frame_height) + xbr, ybr = xtl + mask_width, ytl + mask_height - abs_points = HasCVATPoints._to_abs_points( - points, frame_size - ) - flattened_points = list( - itertools.chain.from_iterable(abs_points) - ) + # -1 to convert from CVAT indexing + rle = HasCVATBinaryMask._mask_to_cvat_rle(det.mask) + rle.extend([xtl, ytl, xbr - 1, ybr - 1]) curr_shapes.append( { - "type": "polygon", + "type": "mask", "occluded": is_occluded, "z_order": 0, - "points": flattened_points, + "points": rle, "label_id": class_name, "group": group_id, "frame": frame_id, @@ -6455,6 +6482,32 @@ def _create_detection_shapes( "attributes": deepcopy(attributes), } ) + else: + polygon = det.to_polyline() + for points in polygon.points: + if len(points) < 3: + continue # CVAT polygons must contain >= 3 points + + abs_points = HasCVATPoints._to_abs_points( + points, frame_size + ) + flattened_points = list( + itertools.chain.from_iterable(abs_points) + ) + + curr_shapes.append( + { + "type": "polygon", + "occluded": is_occluded, + "z_order": 0, + "points": flattened_points, + "label_id": class_name, + "group": group_id, + "frame": frame_id, + "source": "manual", + "attributes": deepcopy(attributes), + } + ) if not curr_shapes: continue @@ -7103,6 +7156,38 @@ def to_detection(self): self._set_attributes(label) return label + def to_instance(self): + """Converts this shape to a :class:`fiftyone.core.labels.Detection` + with instance mask. + + Returns: + a :class:`fiftyone.core.labels.Detection` + """ + xtl, ytl, xbr, ybr = self.points[-4:] + rel = np.array(self.points[:-4], dtype=int) + frame_width, frame_height = self.frame_size + + # +1 to convert from CVAT indexing + mask_w, mask_h = round(xbr - xtl) + 1, round(ybr - ytl) + 1 + mask = HasCVATBinaryMask._rle_to_binary_image_mask( + rel, mask_height=mask_h, mask_width=mask_w + ) + + bbox = [ + xtl / frame_width, + ytl / frame_height, + (xbr - xtl) / frame_width, + (ybr - ytl) / frame_height, + ] + label = fol.Detection( + label=self.label, + bounding_box=bbox, + index=self.index, + mask=mask, + ) + self._set_attributes(label) + return label + def to_polyline(self, closed=False, filled=False): """Converts this shape to a :class:`fiftyone.core.labels.Polyline`.