[Datumaro] Fixes (#1953)

* Add absolute image path in rest api extractor * Add default split for random split * Fix image path in datumaro format * Preserve bboxes in coco format * update changelog Co-authored-by: Nikita Manovich <[email protected]>
cvat-ai · Jul 29, 2020 · 0062ecd · 0062ecd
1 parent e7585b8
commit 0062ecd
Show file tree

Hide file tree

Showing 7 changed files with 74 additions and 71 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 - Smaller object details (<https://github.com/opencv/cvat/pull/1877>)
+- `COCO` format does not convert bboxes to polygons on export (<https://github.com/opencv/cvat/pull/1953>)
 - It is impossible to submit a DL model in OpenVINO format using UI. Now you can deploy new models on the server using serverless functions (<https://github.com/opencv/cvat/pull/1767>)
 - Files and folders under share path are now alphabetically sorted
 

diff --git a/...ps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py b/...ps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py
@@ -100,7 +100,7 @@ def __init__(self, url):
             if entry.get('height') and entry.get('width'):
                 size = (entry['height'], entry['width'])
             image = Image(data=self._make_image_loader(item_id),
-                path=item_filename, size=size)
+                path=self._image_local_path(item_id), size=size)
             item = DatasetItem(id=item_id, image=image)
             items.append((item.id, item))
 

diff --git a/datumaro/datumaro/plugins/coco_format/converter.py b/datumaro/datumaro/plugins/coco_format/converter.py
@@ -298,8 +298,8 @@ def convert_instance(self, instance, item):
                 rles = mask_utils.merge(rles)
             area = mask_utils.area(rles)
         else:
-            x, y, w, h = bbox
-            segmentation = [[x, y, x + w, y, x + w, y + h, x, y + h]]
+            _, _, w, h = bbox
+            segmentation = []
             area = w * h
 
         elem = {

diff --git a/datumaro/datumaro/plugins/datumaro_format/converter.py b/datumaro/datumaro/plugins/datumaro_format/converter.py
@@ -54,7 +54,8 @@ def write_item(self, item):
         if item.has_image:
             path = item.image.path
             if self._context._save_images:
-                path = self._context._save_image(item)
+                path = self._context._make_image_filename(item)
+                self._context._save_image(item, path)
 
             item_desc['image'] = {
                 'size': item.image.size,

diff --git a/datumaro/datumaro/plugins/transforms.py b/datumaro/datumaro/plugins/transforms.py
@@ -322,6 +322,7 @@ def build_cmdline_parser(cls, **kwargs):
         parser = super().build_cmdline_parser(**kwargs)
         parser.add_argument('-s', '--subset', action='append',
             type=cls._split_arg, dest='splits',
+            default=[('train', 0.67), ('test', 0.33)],
             help="Subsets in the form of: '<subset>:<ratio>' (repeatable)")
         parser.add_argument('--seed', type=int, help="Random seed")
         return parser

diff --git a/datumaro/tests/test_coco_format.py b/datumaro/tests/test_coco_format.py
@@ -4,8 +4,8 @@
 
 from unittest import TestCase
 
-from datumaro.components.project import (Project, Dataset)
-from datumaro.components.extractor import (Extractor, DatasetItem,
+from datumaro.components.project import Project, Dataset
+from datumaro.components.extractor import (DatasetItem,
     AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption,
     LabelCategories, PointsCategories
 )
@@ -26,7 +26,6 @@
 
 class CocoImporterTest(TestCase):
     def test_can_import(self):
-
         expected_dataset = Dataset.from_iterable([
             DatasetItem(id='000000000001', image=np.ones((10, 5, 3)),
                 subset='val', attributes={'id': 1},
@@ -349,7 +348,6 @@ def test_can_save_and_load_labels(self):
                 CocoLabelsConverter.convert, test_dir)
 
     def test_can_save_and_load_keypoints(self):
-
         source_dataset = Dataset.from_iterable([
             DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)),
                 annotations=[
@@ -373,11 +371,11 @@ def test_can_save_and_load_keypoints(self):
                     Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5),
                 ]),
             ], categories={
-                    AnnotationType.label: LabelCategories.from_iterable(
-                        str(i) for i in range(10)),
-                    AnnotationType.points: PointsCategories.from_iterable(
-                        (i, None, [[0, 1], [1, 2]]) for i in range(10)
-                    ),
+                AnnotationType.label: LabelCategories.from_iterable(
+                    str(i) for i in range(10)),
+                AnnotationType.points: PointsCategories.from_iterable(
+                    (i, None, [[0, 1], [1, 2]]) for i in range(10)
+                ),
             })
 
         target_dataset = Dataset.from_iterable([
@@ -393,30 +391,30 @@ def test_can_save_and_load_keypoints(self):
                     Points([1, 2, 3, 4, 2, 3],
                         group=2, id=2,
                         attributes={'is_crowd': False}),
-                    Polygon([1, 2, 3, 2, 3, 4, 1, 4],
+                    Bbox(1, 2, 2, 2,
                         group=2, id=2,
                         attributes={'is_crowd': False}),
 
                     Points([1, 2, 0, 2, 4, 1],
                         label=5, group=3, id=3,
                         attributes={'is_crowd': False}),
-                    Polygon([0, 1, 4, 1, 4, 2, 0, 2],
+                    Bbox(0, 1, 4, 1,
                         label=5, group=3, id=3,
                         attributes={'is_crowd': False}),
 
                     Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
                         group=5, id=5,
                         attributes={'is_crowd': False}),
-                    Polygon([1, 2, 3, 2, 3, 4, 1, 4],
+                    Bbox(1, 2, 2, 2,
                         group=5, id=5,
                         attributes={'is_crowd': False}),
                 ], attributes={'id': 1}),
             ], categories={
-                    AnnotationType.label: LabelCategories.from_iterable(
-                        str(i) for i in range(10)),
-                    AnnotationType.points: PointsCategories.from_iterable(
-                        (i, None, [[0, 1], [1, 2]]) for i in range(10)
-                    ),
+                AnnotationType.label: LabelCategories.from_iterable(
+                    str(i) for i in range(10)),
+                AnnotationType.points: PointsCategories.from_iterable(
+                    (i, None, [[0, 1], [1, 2]]) for i in range(10)
+                ),
             })
 
         with TestDir() as test_dir:

diff --git a/datumaro/tests/test_datumaro_format.py b/datumaro/tests/test_datumaro_format.py
@@ -32,56 +32,58 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
         compare_datasets_strict(self,
             expected=target_dataset, actual=parsed_dataset)
 
-    label_categories = LabelCategories()
-    for i in range(5):
-        label_categories.add('cat' + str(i))
-
-    mask_categories = MaskCategories(
-        generate_colormap(len(label_categories.items)))
-
-    points_categories = PointsCategories()
-    for index, _ in enumerate(label_categories.items):
-        points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
-
-    test_dataset = Dataset.from_iterable([
-        DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
-            annotations=[
-                Caption('hello', id=1),
-                Caption('world', id=2, group=5),
-                Label(2, id=3, attributes={
-                    'x': 1,
-                    'y': '2',
-                }),
-                Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
-                    'score': 1.0,
-                }),
-                Bbox(5, 6, 7, 8, id=5, group=5),
-                Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
-                Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
-            ]),
-        DatasetItem(id=21, subset='train',
-            annotations=[
-                Caption('test'),
-                Label(2),
-                Bbox(1, 2, 3, 4, 5, id=42, group=42)
-            ]),
-
-        DatasetItem(id=2, subset='val',
-            annotations=[
-                PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
-                Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
-            ]),
-
-        DatasetItem(id=42, subset='test',
-            attributes={'a1': 5, 'a2': '42'}),
-
-        DatasetItem(id=42),
-        DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
-    ], categories={
-        AnnotationType.label: label_categories,
-        AnnotationType.mask: mask_categories,
-        AnnotationType.points: points_categories,
-    })
+    @property
+    def test_dataset(self):
+        label_categories = LabelCategories()
+        for i in range(5):
+            label_categories.add('cat' + str(i))
+
+        mask_categories = MaskCategories(
+            generate_colormap(len(label_categories.items)))
+
+        points_categories = PointsCategories()
+        for index, _ in enumerate(label_categories.items):
+            points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])
+
+        return Dataset.from_iterable([
+            DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
+                annotations=[
+                    Caption('hello', id=1),
+                    Caption('world', id=2, group=5),
+                    Label(2, id=3, attributes={
+                        'x': 1,
+                        'y': '2',
+                    }),
+                    Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
+                        'score': 1.0,
+                    }),
+                    Bbox(5, 6, 7, 8, id=5, group=5),
+                    Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
+                    Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
+                ]),
+            DatasetItem(id=21, subset='train',
+                annotations=[
+                    Caption('test'),
+                    Label(2),
+                    Bbox(1, 2, 3, 4, 5, id=42, group=42)
+                ]),
+
+            DatasetItem(id=2, subset='val',
+                annotations=[
+                    PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
+                    Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
+                ]),
+
+            DatasetItem(id=42, subset='test',
+                attributes={'a1': 5, 'a2': '42'}),
+
+            DatasetItem(id=42),
+            DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
+        ], categories={
+            AnnotationType.label: label_categories,
+            AnnotationType.mask: mask_categories,
+            AnnotationType.points: points_categories,
+        })
 
     def test_can_save_and_load(self):
         with TestDir() as test_dir: