From 00a9101a1491332dcb04054da3392ce6d0cb7a22 Mon Sep 17 00:00:00 2001
From: Eugene Khvedchenya <ekhvedchenya@gmail.com>
Date: Wed, 6 Mar 2024 12:29:51 +0200
Subject: [PATCH 1/4] Cherry pick COCO parsing from
 feature/ALG-000_memory-efficient-coco-dataset

---
 .../coco_format_detection.py                  | 23 ++++++++++++-------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
index 5e78f4b7af..b4086a5006 100644
--- a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
+++ b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
@@ -2,6 +2,7 @@
 import dataclasses
 import json
 import os
+from collections import defaultdict
 
 import numpy as np
 from typing import List, Optional, Tuple
@@ -215,9 +216,9 @@ def parse_coco_into_detection_annotations(
     ann_image_ids = np.array([annotation["image_id"] for annotation in coco["annotations"]], dtype=int)
 
     # Extract image stuff
-    img_ids = np.array([img["id"] for img in coco["images"]], dtype=int)
-    img_paths = np.array([img["file_name"] if "file_name" in img else "{:012}".format(img["id"]) + ".jpg" for img in coco["images"]], dtype=str)
-    img_width_height = np.array([(img["width"], img["height"]) for img in coco["images"]], dtype=int)
+    img_ids = [img["id"] for img in coco["images"]]
+    img_paths = [img["file_name"] if "file_name" in img else "{:012}".format(img["id"]) + ".jpg" for img in coco["images"]]
+    img_width_height = [(img["width"], img["height"]) for img in coco["images"]]
 
     # Now, we can drop the annotations that belongs to the excluded classes
     if int(class_ids_to_ignore is not None) + int(exclude_classes is not None) + int(include_classes is not None) > 1:
@@ -273,9 +274,15 @@ def parse_coco_into_detection_annotations(
 
     annotations = []
 
-    for img_id, image_path, (image_width, image_height) in zip(img_ids, img_paths, img_width_height):
-        mask = ann_image_ids == img_id
+    img_id2ann_box_xyxy = defaultdict(list)
+    img_id2ann_iscrowd = defaultdict(list)
+    img_id2ann_category_id = defaultdict(list)
+    for ann_image_id, _ann_box_xyxy, _ann_iscrowd, _ann_category_id in zip(ann_image_ids, ann_box_xyxy, ann_iscrowd, ann_category_id):
+        img_id2ann_box_xyxy[ann_image_id].append(_ann_box_xyxy)
+        img_id2ann_iscrowd[ann_image_id].append(_ann_iscrowd)
+        img_id2ann_category_id[ann_image_id].append(_ann_category_id)
 
+    for img_id, image_path, (image_width, image_height) in zip(img_ids, img_paths, img_width_height):
         if image_path_prefix is not None:
             image_path = os.path.join(image_path_prefix, image_path)
 
@@ -284,9 +291,9 @@ def parse_coco_into_detection_annotations(
             image_path=image_path,
             image_width=image_width,
             image_height=image_height,
-            ann_boxes_xyxy=ann_box_xyxy[mask],
-            ann_is_crowd=ann_iscrowd[mask],
-            ann_labels=ann_category_id[mask],
+            ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id]),
+            ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id]),
+            ann_labels=np.asarray(img_id2ann_category_id[img_id]),
         )
         annotations.append(ann)
 

From e0007537f48027ea575bdbaa119e76a8508f1f4f Mon Sep 17 00:00:00 2001
From: Eugene Khvedchenya <ekhvedchenya@gmail.com>
Date: Wed, 6 Mar 2024 14:05:19 +0200
Subject: [PATCH 2/4] Provide explicit shape for extracted ann boxes to address
 an edge case when no boxes exists in annotation at all.

---
 .../datasets/detection_datasets/coco_format_detection.py  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
index b4086a5006..0c5beee0f0 100644
--- a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
+++ b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
@@ -209,11 +209,11 @@ def parse_coco_into_detection_annotations(
     category_names = np.array([category["name"] for category in coco["categories"]], dtype=str)
 
     # Extract box annotations
-    ann_box_xyxy = xywh_to_xyxy_inplace(np.array([annotation["bbox"] for annotation in coco["annotations"]], dtype=np.float32), image_shape=None)
+    ann_box_xyxy = xywh_to_xyxy_inplace(np.array([annotation["bbox"] for annotation in coco["annotations"]], dtype=np.float32).reshape(-1, 4), image_shape=None)
 
-    ann_category_id = np.array([annotation["category_id"] for annotation in coco["annotations"]], dtype=int)
-    ann_iscrowd = np.array([annotation["iscrowd"] for annotation in coco["annotations"]], dtype=bool)
-    ann_image_ids = np.array([annotation["image_id"] for annotation in coco["annotations"]], dtype=int)
+    ann_category_id = np.array([annotation["category_id"] for annotation in coco["annotations"]], dtype=int).reshape(-1)
+    ann_iscrowd = np.array([annotation["iscrowd"] for annotation in coco["annotations"]], dtype=bool).reshape(-1)
+    ann_image_ids = np.array([annotation["image_id"] for annotation in coco["annotations"]], dtype=int).reshape(-1)
 
     # Extract image stuff
     img_ids = [img["id"] for img in coco["images"]]

From b9d6a22fd937388036629015853026e62dcb852b Mon Sep 17 00:00:00 2001
From: Eugene Khvedchenya <ekhvedchenya@gmail.com>
Date: Wed, 6 Mar 2024 15:32:47 +0200
Subject: [PATCH 3/4] Ensure empty annotations has bboxes of shape [0,4] for
 broadcasting to work as expected

---
 .../datasets/detection_datasets/coco_format_detection.py    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
index 0c5beee0f0..3bf2b12da3 100644
--- a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
+++ b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
@@ -291,9 +291,9 @@ def parse_coco_into_detection_annotations(
             image_path=image_path,
             image_width=image_width,
             image_height=image_height,
-            ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id]),
-            ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id]),
-            ann_labels=np.asarray(img_id2ann_category_id[img_id]),
+            ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id]).reshape(-1, 4),
+            ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id]).reshape(-1),
+            ann_labels=np.asarray(img_id2ann_category_id[img_id]).reshape(-1),
         )
         annotations.append(ann)
 

From 6194a4320a0ae4ccfc6449aba21d36ccea0d84de Mon Sep 17 00:00:00 2001
From: Eugene Khvedchenya <ekhvedchenya@gmail.com>
Date: Wed, 6 Mar 2024 15:49:59 +0200
Subject: [PATCH 4/4] Added explicit dtypes

---
 .../datasets/detection_datasets/coco_format_detection.py    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
index 3bf2b12da3..e96276ec5b 100644
--- a/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
+++ b/src/super_gradients/training/datasets/detection_datasets/coco_format_detection.py
@@ -291,9 +291,9 @@ def parse_coco_into_detection_annotations(
             image_path=image_path,
             image_width=image_width,
             image_height=image_height,
-            ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id]).reshape(-1, 4),
-            ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id]).reshape(-1),
-            ann_labels=np.asarray(img_id2ann_category_id[img_id]).reshape(-1),
+            ann_boxes_xyxy=np.asarray(img_id2ann_box_xyxy[img_id], dtype=np.float32).reshape(-1, 4),
+            ann_is_crowd=np.asarray(img_id2ann_iscrowd[img_id], dtype=bool).reshape(-1),
+            ann_labels=np.asarray(img_id2ann_category_id[img_id], dtype=int).reshape(-1),
         )
         annotations.append(ann)