From 3fe21657682a7bb86b53c018398de9085aedb195 Mon Sep 17 00:00:00 2001
From: Zhiltsov Max <zhiltsov.max35@gmail.com>
Date: Fri, 27 Dec 2019 12:19:12 +0300
Subject: [PATCH] Fixes in COCO

---
 .../datumaro/components/extractors/ms_coco.py | 17 +++++++-------
 datumaro/tests/test_coco_format.py            | 22 +++++++++----------
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/datumaro/datumaro/components/extractors/ms_coco.py b/datumaro/datumaro/components/extractors/ms_coco.py
index 80a2975d070a..f6d1f9e1c83e 100644
--- a/datumaro/datumaro/components/extractors/ms_coco.py
+++ b/datumaro/datumaro/components/extractors/ms_coco.py
@@ -120,8 +120,7 @@ def _load_items(self, loader):
 
             anns = loader.getAnnIds(imgIds=img_id)
             anns = loader.loadAnns(anns)
-            anns = list(chain(*(
-                self._load_annotations(ann, image_info) for ann in anns)))
+            anns = sum((self._load_annotations(a, image_info) for a in anns), [])
 
             items[img_id] = DatasetItem(id=img_id, subset=self._subset,
                 image=image, annotations=anns)
@@ -143,16 +142,16 @@ def _load_annotations(self, ann, image_info=None):
         if 'score' in ann:
             attributes['score'] = ann['score']
 
+        group = ann_id # make sure all tasks' annotations are merged
+
         if self._task in [CocoTask.instances, CocoTask.person_keypoints]:
             x, y, w, h = ann['bbox']
             label_id = self._get_label_id(ann)
-            group = None
 
             is_crowd = bool(ann['iscrowd'])
             attributes['is_crowd'] = is_crowd
 
             if self._task is CocoTask.person_keypoints:
-                group = ann_id
                 keypoints = ann['keypoints']
                 points = [p for i, p in enumerate(keypoints) if i % 3 != 2]
                 visibility = keypoints[2::3]
@@ -163,7 +162,6 @@ def _load_annotations(self, ann, image_info=None):
 
             segmentation = ann.get('segmentation')
             if segmentation is not None:
-                group = ann_id
                 rle = None
 
                 if isinstance(segmentation, list):
@@ -171,7 +169,7 @@ def _load_annotations(self, ann, image_info=None):
                     for polygon_points in segmentation:
                         parsed_annotations.append(PolygonObject(
                             points=polygon_points, label=label_id,
-                            id=ann_id, group=group, attributes=attributes
+                            id=ann_id, attributes=attributes, group=group
                         ))
 
                     if self._merge_instance_polygons:
@@ -190,7 +188,7 @@ def _load_annotations(self, ann, image_info=None):
 
                 if rle is not None:
                     parsed_annotations.append(RleMask(rle=rle, label=label_id,
-                        id=ann_id, group=group, attributes=attributes
+                        id=ann_id, attributes=attributes, group=group
                     ))
 
             parsed_annotations.append(
@@ -200,13 +198,14 @@ def _load_annotations(self, ann, image_info=None):
         elif self._task is CocoTask.labels:
             label_id = self._get_label_id(ann)
             parsed_annotations.append(
-                LabelObject(label=label_id, id=ann_id, attributes=attributes)
+                LabelObject(label=label_id,
+                    id=ann_id, attributes=attributes, group=group)
             )
         elif self._task is CocoTask.captions:
             caption = ann['caption']
             parsed_annotations.append(
                 CaptionObject(caption,
-                    id=ann_id, attributes=attributes)
+                    id=ann_id, attributes=attributes, group=group)
             )
         else:
             raise NotImplementedError()
diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py
index 342a4ab3eaa8..1631434e8542 100644
--- a/datumaro/tests/test_coco_format.py
+++ b/datumaro/tests/test_coco_format.py
@@ -176,17 +176,17 @@ def __iter__(self):
                 return iter([
                     DatasetItem(id=1, subset='train',
                         annotations=[
-                            CaptionObject('hello', id=1),
-                            CaptionObject('world', id=2),
+                            CaptionObject('hello', id=1, group=1),
+                            CaptionObject('world', id=2, group=2),
                         ]),
                     DatasetItem(id=2, subset='train',
                         annotations=[
-                            CaptionObject('test', id=3),
+                            CaptionObject('test', id=3, group=3),
                         ]),
 
                     DatasetItem(id=3, subset='val',
                         annotations=[
-                            CaptionObject('word', id=1),
+                            CaptionObject('word', id=1, group=1),
                         ]
                     ),
                 ])
@@ -382,7 +382,7 @@ def test_can_convert_polygons_to_mask(self):
         class SrcTestExtractor(Extractor):
             def __iter__(self):
                 return iter([
-                    DatasetItem(id=0, image=np.zeros((5, 10, 3)),
+                    DatasetItem(id=1, image=np.zeros((6, 10, 3)),
                         annotations=[
                             PolygonObject([0, 0, 4, 0, 4, 4],
                                 label=3, id=4, group=4),
@@ -504,17 +504,17 @@ def __iter__(self):
                 return iter([
                     DatasetItem(id=1, subset='train',
                         annotations=[
-                            LabelObject(4, id=1),
-                            LabelObject(9, id=2),
+                            LabelObject(4, id=1, group=1),
+                            LabelObject(9, id=2, group=2),
                         ]),
                     DatasetItem(id=2, subset='train',
                         annotations=[
-                            LabelObject(4, id=4),
+                            LabelObject(4, id=4, group=4),
                         ]),
 
                     DatasetItem(id=3, subset='val',
                         annotations=[
-                            LabelObject(2, id=1),
+                            LabelObject(2, id=1, group=1),
                         ]),
                 ])
 
@@ -634,11 +634,11 @@ class TestExtractor(Extractor):
             def __iter__(self):
                 return iter([
                     DatasetItem(id=1, annotations=[
-                        LabelObject(2, id=1),
+                        LabelObject(2, id=1, group=1),
                     ]),
 
                     DatasetItem(id=2, annotations=[
-                        LabelObject(3, id=2),
+                        LabelObject(3, id=2, group=2),
                     ]),
                 ])