From 00a9101a1491332dcb04054da3392ce6d0cb7a22 Mon Sep 17 00:00:00 2001 From: Eugene Khvedchenya Date: Wed, 6 Mar 2024 12:29:51 +0200 Subject: [PATCH 1/4] Cherry pick COCO parsing from feature/ALG-000_memory-efficient-coco-dataset --- .../coco_format_detection.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py index 5e78f4b7af..b4086a5006 100644 --- a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py +++ b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py @@ -2,6 +2,7 @@ import dataclasses import json import os +from collections import defaultdict import numpy as np from typing import List, Optional, Tuple @@ -215,9 +216,9 @@ def parse_coco_into_detection_annotations( ann_image_ids = np.array([annotation["image_id"] for annotation in coco["annotations"]], dtype=int) # Extract image stuff - img_ids = np.array([img["id"] for img in coco["images"]], dtype=int) - img_paths = np.array([img["file_name"] if "file_name" in img else "{:012}".format(img["id"]) + ".jpg" for img in coco["images"]], dtype=str) - img_width_height = np.array([(img["width"], img["height"]) for img in coco["images"]], dtype=int) + img_ids = [img["id"] for img in coco["images"]] + img_paths = [img["file_name"] if "file_name" in img else "{:012}".format(img["id"]) + ".jpg" for img in coco["images"]] + img_width_height = [(img["width"], img["height"]) for img in coco["images"]] # Now, we can drop the annotations that belongs to the excluded classes if int(class_ids_to_ignore is not None) + int(exclude_classes is not None) + int(include_classes is not None) > 1: @@ -273,9 +274,15 @@ def parse_coco_into_detection_annotations( annotations = [] - for img_id, image_path, (image_width, image_height) in zip(img_ids, img_paths, img_width_height): - mask = ann_image_ids == img_id + img_id2ann_box_xyxy = defaultdict(list) + img_id2ann_iscrowd = defaultdict(list) + img_id2ann_category_id = defaultdict(list) + for ann_image_id, _ann_box_xyxy, _ann_iscrowd, _ann_category_id in zip(ann_image_ids, ann_box_xyxy, ann_iscrowd, ann_category_id): + img_id2ann_box_xyxy[ann_image_id].append(_ann_box_xyxy) + img_id2ann_iscrowd[ann_image_id].append(_ann_iscrowd) + img_id2ann_category_id[ann_image_id].append(_ann_category_id) + for img_id, image_path, (image_width, image_height) in zip(img_ids, img_paths, img_width_height): if image_path_prefix is not None: image_path = os.path.join(image_path_prefix, image_path) @@ -284,9 +291,9 @@ def parse_coco_into_detection_annotations( image_path=image_path, image_width=image_width, image_height=image_height, - ann_boxes_xyxy=ann_box_xyxy[mask], - ann_is_crowd=ann_iscrowd[mask], - ann_labels=ann_category_id[mask], + ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id]), + ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id]), + ann_labels=np.asarray(img_id2ann_category_id[img_id]), ) annotations.append(ann) From e0007537f48027ea575bdbaa119e76a8508f1f4f Mon Sep 17 00:00:00 2001 From: Eugene Khvedchenya Date: Wed, 6 Mar 2024 14:05:19 +0200 Subject: [PATCH 2/4] Provide explicit shape for extracted ann boxes to address an edge case when no boxes exists in annotation at all. --- .../datasets/detection_datasets/coco_format_detection.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py index b4086a5006..0c5beee0f0 100644 --- a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py +++ b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py @@ -209,11 +209,11 @@ def parse_coco_into_detection_annotations( category_names = np.array([category["name"] for category in coco["categories"]], dtype=str) # Extract box annotations - ann_box_xyxy = xywh_to_xyxy_inplace(np.array([annotation["bbox"] for annotation in coco["annotations"]], dtype=np.float32), image_shape=None) + ann_box_xyxy = xywh_to_xyxy_inplace(np.array([annotation["bbox"] for annotation in coco["annotations"]], dtype=np.float32).reshape(-1, 4), image_shape=None) - ann_category_id = np.array([annotation["category_id"] for annotation in coco["annotations"]], dtype=int) - ann_iscrowd = np.array([annotation["iscrowd"] for annotation in coco["annotations"]], dtype=bool) - ann_image_ids = np.array([annotation["image_id"] for annotation in coco["annotations"]], dtype=int) + ann_category_id = np.array([annotation["category_id"] for annotation in coco["annotations"]], dtype=int).reshape(-1) + ann_iscrowd = np.array([annotation["iscrowd"] for annotation in coco["annotations"]], dtype=bool).reshape(-1) + ann_image_ids = np.array([annotation["image_id"] for annotation in coco["annotations"]], dtype=int).reshape(-1) # Extract image stuff img_ids = [img["id"] for img in coco["images"]] From b9d6a22fd937388036629015853026e62dcb852b Mon Sep 17 00:00:00 2001 From: Eugene Khvedchenya Date: Wed, 6 Mar 2024 15:32:47 +0200 Subject: [PATCH 3/4] Ensure empty annotations has bboxes of shape [0,4] for broadcasting to work as expected --- .../datasets/detection_datasets/coco_format_detection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py index 0c5beee0f0..3bf2b12da3 100644 --- a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py +++ b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py @@ -291,9 +291,9 @@ def parse_coco_into_detection_annotations( image_path=image_path, image_width=image_width, image_height=image_height, - ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id]), - ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id]), - ann_labels=np.asarray(img_id2ann_category_id[img_id]), + ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id]).reshape(-1, 4), + ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id]).reshape(-1), + ann_labels=np.asarray(img_id2ann_category_id[img_id]).reshape(-1), ) annotations.append(ann) From 6194a4320a0ae4ccfc6449aba21d36ccea0d84de Mon Sep 17 00:00:00 2001 From: Eugene Khvedchenya Date: Wed, 6 Mar 2024 15:49:59 +0200 Subject: [PATCH 4/4] Added explicit dtypes --- .../datasets/detection_datasets/coco_format_detection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py index 3bf2b12da3..e96276ec5b 100644 --- a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py +++ b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py @@ -291,9 +291,9 @@ def parse_coco_into_detection_annotations( image_path=image_path, image_width=image_width, image_height=image_height, - ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id]).reshape(-1, 4), - ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id]).reshape(-1), - ann_labels=np.asarray(img_id2ann_category_id[img_id]).reshape(-1), + ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id], dtype=np.float32).reshape(-1, 4), + ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id], dtype=bool).reshape(-1), + ann_labels=np.asarray(img_id2ann_category_id[img_id], dtype=int).reshape(-1), ) annotations.append(ann)