From b90ca31f8e6df30cc8629f806e5fd46e7ff6f47c Mon Sep 17 00:00:00 2001 From: NicDionne Date: Wed, 16 Oct 2024 20:26:21 -0400 Subject: [PATCH 1/5] First iteration [CVAT integration] Use pixelwise masks, not polygons, for instance segmentation #4483 - We can upload mask [] Missing test [] Missing download mask --- fiftyone/utils/cvat.py | 114 +++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 27 deletions(-) diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py index c30b6811dd..6c33d69ca8 100644 --- a/fiftyone/utils/cvat.py +++ b/fiftyone/utils/cvat.py @@ -1587,6 +1587,39 @@ def from_image_dict(cls, d): ) +class HasCVATBinMask: + @staticmethod + def cvat_rle_to_binary_image_mask( + cvat_rle, left, top, width, img_h: int, img_w: int + ) -> np.ndarray: + # Source https://github.com/cvat-ai/cvat/issues/6487#issuecomment-1640097518 + # convert CVAT tight object RLE to COCO-style whole image mask + rle = cvat_rle + mask = np.zeros((img_h, img_w), dtype=np.uint8) + value = 0 + offset = 0 + for rle_count in rle: + while rle_count > 0: + y, x = divmod(offset, width) + mask[y + top][x + left] = value + rle_count -= 1 + offset += 1 + value = 1 - value + + return mask + + @staticmethod + def mask_to_cvat_rle(binary_mask: np.ndarray) -> np.array: + counts = [] + for i, (value, elements) in enumerate( + itertools.groupby(binary_mask.ravel(order="C")) + ): + if i == 0 and value == 1: + counts.append(0) + counts.append(len(list(elements))) + return counts + + class HasCVATPoints(object): """Mixin for CVAT annotations that store a list of ``(x, y)`` pixel coordinates. @@ -5905,7 +5938,7 @@ def _parse_annotation( ): # A piece of an instance mask label_type = "detections" - label = cvat_shape.to_polyline(closed=True, filled=True) + label = cvat_shape.to_instance_detection() else: # A regular polyline or polygon if expected_label_type in ("polyline", "polylines"): @@ -6402,32 +6435,29 @@ def _create_detection_shapes( elif label_type in ("instance", "instances"): if det.mask is None: continue - - polygon = det.to_polyline() - for points in polygon.points: - if len(points) < 3: - continue # CVAT polygons must contain >= 3 points - - abs_points = HasCVATPoints._to_abs_points( - points, frame_size - ) - flattened_points = list( - itertools.chain.from_iterable(abs_points) - ) - - curr_shapes.append( - { - "type": "polygon", - "occluded": is_occluded, - "z_order": 0, - "points": flattened_points, - "label_id": class_name, - "group": group_id, - "frame": frame_id, - "source": "manual", - "attributes": deepcopy(attributes), - } - ) + x, y, _, _ = det.bounding_box + frame_width, frame_height = frame_size + mask_height, mask_width = det.mask.shape + xtl, ytl = round(x * frame_width), round(y * frame_height) + xbr, ybr = xtl + mask_width, ytl + mask_height + + rle = HasCVATBinMask.mask_to_cvat_rle(det.mask) + rle.extend( # Necessary as per CVAT API + [xtl, ytl, xbr - 1, ybr - 1] + ) + curr_shapes.append( + { + "type": "mask", + "occluded": is_occluded, + "z_order": 0, + "points": rle, + "label_id": class_name, + "group": group_id, + "frame": frame_id, + "source": "manual", + "attributes": deepcopy(attributes), + } + ) if not curr_shapes: continue @@ -7076,6 +7106,36 @@ def to_detection(self): self._set_attributes(label) return label + def to_instance_detection(self): + """Converts this shape to a :class:`fiftyone.core.labels.Detection`. + Special case where we also have a mask + + Returns: + a :class:`fiftyone.core.labels.Detection` + """ + + xtl, ytl, xbr, ybr = self.points[-4:] + rel = self.points[:-4] + width, height = self.frame_size + mask = HasCVATBinMask.cvat_rle_to_binary_image_mask( + rel, top=ytl, left=xtl, width=xbr - xtl, img_h=height, img_w=width + ) + cropped_mask = mask[ytl:ybr, xtl:xbr] + bbox = [ + xtl / width, + ytl / height, + (xbr - xtl) / width, + (ybr - ytl) / height, + ] + label = fol.Detection( + label=self.label, + bounding_box=bbox, + index=self.index, + mask=cropped_mask, + ) + self._set_attributes(label) + return label + def to_polyline(self, closed=False, filled=False): """Converts this shape to a :class:`fiftyone.core.labels.Polyline`. From 97b900057fcf0e9bbae7325a67e522f8edf49c31 Mon Sep 17 00:00:00 2001 From: NicDionne Date: Thu, 17 Oct 2024 20:20:18 -0400 Subject: [PATCH 2/5] Can now download annotation of mask --- fiftyone/utils/cvat.py | 71 ++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py index 6c33d69ca8..f918bc2023 100644 --- a/fiftyone/utils/cvat.py +++ b/fiftyone/utils/cvat.py @@ -1589,24 +1589,14 @@ def from_image_dict(cls, d): class HasCVATBinMask: @staticmethod - def cvat_rle_to_binary_image_mask( - cvat_rle, left, top, width, img_h: int, img_w: int - ) -> np.ndarray: - # Source https://github.com/cvat-ai/cvat/issues/6487#issuecomment-1640097518 - # convert CVAT tight object RLE to COCO-style whole image mask - rle = cvat_rle - mask = np.zeros((img_h, img_w), dtype=np.uint8) - value = 0 - offset = 0 - for rle_count in rle: - while rle_count > 0: - y, x = divmod(offset, width) - mask[y + top][x + left] = value - rle_count -= 1 - offset += 1 - value = 1 - value - - return mask + def rle_to_binary_image_mask(rle, mask_width, mask_height) -> np.ndarray: + mask = np.zeros(mask_width * mask_height, dtype=np.uint8) + counter = 0 + for i, val in enumerate(rle): + if i % 2 == 1: + mask[counter : counter + val] = 1 + counter += val + return mask.reshape(mask_width, mask_height) @staticmethod def mask_to_cvat_rle(binary_mask: np.ndarray) -> np.array: @@ -5925,6 +5915,9 @@ def _parse_annotation( if shape_type == "rectangle": label_type = "detections" label = cvat_shape.to_detection() + elif shape_type == "mask": + label_type = "detections" + label = cvat_shape.to_instance_detection() elif shape_type == "polygon": if expected_label_type == "segmentation": # A piece of a segmentation mask @@ -5938,7 +5931,7 @@ def _parse_annotation( ): # A piece of an instance mask label_type = "detections" - label = cvat_shape.to_instance_detection() + label = cvat_shape.to_polyline(closed=True, filled=True) else: # A regular polyline or polygon if expected_label_type in ("polyline", "polylines"): @@ -6445,6 +6438,23 @@ def _create_detection_shapes( rle.extend( # Necessary as per CVAT API [xtl, ytl, xbr - 1, ybr - 1] ) + print( + xbr, + frame_width, + xbr / frame_width, + type(xbr), + type(frame_width), + ) + print( + " Beginning box : ", + det.bounding_box, + "mask_W : ", + mask_width, + "frame_size ", + frame_size, + "bbox : ", + [xtl, ytl, xbr - 1, ybr - 1], + ) curr_shapes.append( { "type": "mask", @@ -7113,25 +7123,26 @@ def to_instance_detection(self): Returns: a :class:`fiftyone.core.labels.Detection` """ - xtl, ytl, xbr, ybr = self.points[-4:] - rel = self.points[:-4] - width, height = self.frame_size - mask = HasCVATBinMask.cvat_rle_to_binary_image_mask( - rel, top=ytl, left=xtl, width=xbr - xtl, img_h=height, img_w=width + rel = np.array(self.points[:-4], dtype=int) + frame_width, frame_height = self.frame_size + mask = HasCVATBinMask.rle_to_binary_image_mask( + rel, + mask_width=round(xbr - xtl) + 1, + mask_height=round(ybr - ytl) + + 1, # We need to add 1 because cvat uses - 1 ) - cropped_mask = mask[ytl:ybr, xtl:xbr] bbox = [ - xtl / width, - ytl / height, - (xbr - xtl) / width, - (ybr - ytl) / height, + xtl / frame_width, + ytl / frame_height, + (xbr - xtl) / frame_width, + (ybr - ytl) / frame_height, ] label = fol.Detection( label=self.label, bounding_box=bbox, index=self.index, - mask=cropped_mask, + mask=mask, ) self._set_attributes(label) return label From 1df8997a577e57ea6cc487cff91b196979665986 Mon Sep 17 00:00:00 2001 From: NicDionne Date: Thu, 17 Oct 2024 20:55:28 -0400 Subject: [PATCH 3/5] Fix --- fiftyone/utils/cvat.py | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py index f918bc2023..8e8d72e687 100644 --- a/fiftyone/utils/cvat.py +++ b/fiftyone/utils/cvat.py @@ -1592,11 +1592,16 @@ class HasCVATBinMask: def rle_to_binary_image_mask(rle, mask_width, mask_height) -> np.ndarray: mask = np.zeros(mask_width * mask_height, dtype=np.uint8) counter = 0 + for i, val in enumerate(rle): if i % 2 == 1: mask[counter : counter + val] = 1 counter += val + return mask.reshape(mask_width, mask_height) + # mask = np.zeros(mask_width * mask_height, dtype=np.uint8) + # mask[np.add.accumulate(rle)[::2]] = 1 + # return mask.reshape(mask_width, mask_height) @staticmethod def mask_to_cvat_rle(binary_mask: np.ndarray) -> np.array: @@ -6438,23 +6443,6 @@ def _create_detection_shapes( rle.extend( # Necessary as per CVAT API [xtl, ytl, xbr - 1, ybr - 1] ) - print( - xbr, - frame_width, - xbr / frame_width, - type(xbr), - type(frame_width), - ) - print( - " Beginning box : ", - det.bounding_box, - "mask_W : ", - mask_width, - "frame_size ", - frame_size, - "bbox : ", - [xtl, ytl, xbr - 1, ybr - 1], - ) curr_shapes.append( { "type": "mask", @@ -7126,11 +7114,12 @@ def to_instance_detection(self): xtl, ytl, xbr, ybr = self.points[-4:] rel = np.array(self.points[:-4], dtype=int) frame_width, frame_height = self.frame_size + mask_w, mask_h = ( + round(xbr - xtl) + 1, + round(ybr - ytl) + 1, + ) # We need to add 1 because cvat uses - 1 mask = HasCVATBinMask.rle_to_binary_image_mask( - rel, - mask_width=round(xbr - xtl) + 1, - mask_height=round(ybr - ytl) - + 1, # We need to add 1 because cvat uses - 1 + rel, mask_width=mask_h, mask_height=mask_w ) bbox = [ xtl / frame_width, From 513b6338101e34b6cdceb3773be8fb4fda9e3aa6 Mon Sep 17 00:00:00 2001 From: NicDionne Date: Thu, 17 Oct 2024 22:20:34 -0400 Subject: [PATCH 4/5] Fix code rabbit confusing variable name --- fiftyone/utils/cvat.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py index 8e8d72e687..aeb1da1e5a 100644 --- a/fiftyone/utils/cvat.py +++ b/fiftyone/utils/cvat.py @@ -1598,10 +1598,7 @@ def rle_to_binary_image_mask(rle, mask_width, mask_height) -> np.ndarray: mask[counter : counter + val] = 1 counter += val - return mask.reshape(mask_width, mask_height) - # mask = np.zeros(mask_width * mask_height, dtype=np.uint8) - # mask[np.add.accumulate(rle)[::2]] = 1 - # return mask.reshape(mask_width, mask_height) + return mask.reshape(mask_height, mask_width) @staticmethod def mask_to_cvat_rle(binary_mask: np.ndarray) -> np.array: @@ -7119,7 +7116,7 @@ def to_instance_detection(self): round(ybr - ytl) + 1, ) # We need to add 1 because cvat uses - 1 mask = HasCVATBinMask.rle_to_binary_image_mask( - rel, mask_width=mask_h, mask_height=mask_w + rel, mask_height=mask_h, mask_width=mask_w ) bbox = [ xtl / frame_width, From 302cf52855c0b7537cb593d55718b4d7e164154a Mon Sep 17 00:00:00 2001 From: brimoor Date: Thu, 26 Dec 2024 17:42:47 -0600 Subject: [PATCH 5/5] mask type is only supported by CVAT>=2.3 --- fiftyone/utils/cvat.py | 102 ++++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 37 deletions(-) diff --git a/fiftyone/utils/cvat.py b/fiftyone/utils/cvat.py index edc372e28f..21fde1167c 100644 --- a/fiftyone/utils/cvat.py +++ b/fiftyone/utils/cvat.py @@ -1587,21 +1587,21 @@ def from_image_dict(cls, d): ) -class HasCVATBinMask: +class HasCVATBinaryMask(object): + """Mixin for CVAT annotations that store RLE format instance masks.""" + @staticmethod - def rle_to_binary_image_mask(rle, mask_width, mask_height) -> np.ndarray: + def _rle_to_binary_image_mask(rle, mask_width, mask_height): mask = np.zeros(mask_width * mask_height, dtype=np.uint8) counter = 0 - for i, val in enumerate(rle): if i % 2 == 1: mask[counter : counter + val] = 1 counter += val - return mask.reshape(mask_height, mask_width) @staticmethod - def mask_to_cvat_rle(binary_mask: np.ndarray) -> np.array: + def _mask_to_cvat_rle(binary_mask): counts = [] for i, (value, elements) in enumerate( itertools.groupby(binary_mask.ravel(order="C")) @@ -5946,7 +5946,7 @@ def _parse_annotation( label = cvat_shape.to_detection() elif shape_type == "mask": label_type = "detections" - label = cvat_shape.to_instance_detection() + label = cvat_shape.to_instance() elif shape_type == "polygon": if expected_label_type == "segmentation": # A piece of a segmentation mask @@ -6457,29 +6457,57 @@ def _create_detection_shapes( elif label_type in ("instance", "instances"): if det.has_mask is None: continue - x, y, _, _ = det.bounding_box - frame_width, frame_height = frame_size - mask_height, mask_width = det.mask.shape - xtl, ytl = round(x * frame_width), round(y * frame_height) - xbr, ybr = xtl + mask_width, ytl + mask_height - - rle = HasCVATBinMask.mask_to_cvat_rle(det.mask) - rle.extend( # Necessary as per CVAT API - [xtl, ytl, xbr - 1, ybr - 1] - ) - curr_shapes.append( - { - "type": "mask", - "occluded": is_occluded, - "z_order": 0, - "points": rle, - "label_id": class_name, - "group": group_id, - "frame": frame_id, - "source": "manual", - "attributes": deepcopy(attributes), - } - ) + + if self._server_version >= Version("2.3"): + x, y, _, _ = det.bounding_box + frame_width, frame_height = frame_size + mask_height, mask_width = det.mask.shape + xtl, ytl = round(x * frame_width), round(y * frame_height) + xbr, ybr = xtl + mask_width, ytl + mask_height + + # -1 to convert from CVAT indexing + rle = HasCVATBinaryMask._mask_to_cvat_rle(det.mask) + rle.extend([xtl, ytl, xbr - 1, ybr - 1]) + + curr_shapes.append( + { + "type": "mask", + "occluded": is_occluded, + "z_order": 0, + "points": rle, + "label_id": class_name, + "group": group_id, + "frame": frame_id, + "source": "manual", + "attributes": deepcopy(attributes), + } + ) + else: + polygon = det.to_polyline() + for points in polygon.points: + if len(points) < 3: + continue # CVAT polygons must contain >= 3 points + + abs_points = HasCVATPoints._to_abs_points( + points, frame_size + ) + flattened_points = list( + itertools.chain.from_iterable(abs_points) + ) + + curr_shapes.append( + { + "type": "polygon", + "occluded": is_occluded, + "z_order": 0, + "points": flattened_points, + "label_id": class_name, + "group": group_id, + "frame": frame_id, + "source": "manual", + "attributes": deepcopy(attributes), + } + ) if not curr_shapes: continue @@ -7128,9 +7156,9 @@ def to_detection(self): self._set_attributes(label) return label - def to_instance_detection(self): - """Converts this shape to a :class:`fiftyone.core.labels.Detection`. - Special case where we also have a mask + def to_instance(self): + """Converts this shape to a :class:`fiftyone.core.labels.Detection` + with instance mask. Returns: a :class:`fiftyone.core.labels.Detection` @@ -7138,13 +7166,13 @@ def to_instance_detection(self): xtl, ytl, xbr, ybr = self.points[-4:] rel = np.array(self.points[:-4], dtype=int) frame_width, frame_height = self.frame_size - mask_w, mask_h = ( - round(xbr - xtl) + 1, - round(ybr - ytl) + 1, - ) # We need to add 1 because cvat uses - 1 - mask = HasCVATBinMask.rle_to_binary_image_mask( + + # +1 to convert from CVAT indexing + mask_w, mask_h = round(xbr - xtl) + 1, round(ybr - ytl) + 1 + mask = HasCVATBinaryMask._rle_to_binary_image_mask( rel, mask_height=mask_h, mask_width=mask_w ) + bbox = [ xtl / frame_width, ytl / frame_height,