From de2ce7e86786d319276629bceffe0c6ebccc502e Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 15 Jul 2021 14:27:25 +0300 Subject: [PATCH] Update Datumaro dependency to 1.10 (#3411) * Update formats wrt Datumaro Transforms API changes * Add mismatching key output in tests * Strip label dir in vggface2 import * Update dependency * update patch --- cvat/apps/dataset_manager/bindings.py | 8 +++++-- .../formats/datumaro/__init__.py | 6 +++-- .../plugins/cvat_rest_api_task_images.py | 9 ++++---- cvat/apps/dataset_manager/formats/icdar.py | 8 +++---- .../dataset_manager/formats/market1501.py | 17 +++++++------- cvat/apps/dataset_manager/formats/mots.py | 4 ++-- cvat/apps/dataset_manager/formats/registry.py | 8 +++++++ cvat/apps/dataset_manager/formats/vggface2.py | 1 + cvat/apps/engine/tests/test_rest_api.py | 23 ++++++++++++------- cvat/requirements/base.txt | 2 +- 10 files changed, 55 insertions(+), 31 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index b800e18ca3cf..e11dc285b4e8 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -624,11 +624,15 @@ def match_dm_item(item, task_data, root_hint=None): return frame_number def find_dataset_root(dm_dataset, task_data): - longest_path = max(dm_dataset, key=lambda x: len(Path(x.id).parts)).id + longest_path = max(dm_dataset, key=lambda x: len(Path(x.id).parts), + default=None) + if longest_path is None: + return None + longest_path = longest_path.id + longest_match = task_data.match_frame_fuzzy(longest_path) if longest_match is None: return None - longest_match = osp.dirname(task_data.frame_info[longest_match]['path']) prefix = longest_match[:-len(osp.dirname(longest_path)) or None] if prefix.endswith('/'): diff --git a/cvat/apps/dataset_manager/formats/datumaro/__init__.py b/cvat/apps/dataset_manager/formats/datumaro/__init__.py index 3e5b1e6cb567..0f351f83b608 100644 --- a/cvat/apps/dataset_manager/formats/datumaro/__init__.py +++ b/cvat/apps/dataset_manager/formats/datumaro/__init__.py @@ -41,9 +41,11 @@ def _save_image_info(save_dir, task_data): 'height': frame['height'], }) - with open(osp.join(save_dir, 'config.json'), 'w') as config_file: + with open(osp.join(save_dir, 'config.json'), + 'w', encoding='utf-8') as config_file: json.dump(config, config_file) - with open(osp.join(save_dir, 'images_meta.json'), 'w') as images_file: + with open(osp.join(save_dir, 'images_meta.json'), + 'w', encoding='utf-8') as images_file: json.dump(images_meta, images_file) def _export(self, task_data, save_dir, save_images=False): diff --git a/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py b/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py index 9a7a9f06af85..359209cc352f 100644 --- a/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py +++ b/cvat/apps/dataset_manager/formats/datumaro/export_templates/plugins/cvat_rest_api_task_images.py @@ -1,5 +1,4 @@ - -# Copyright (C) 2020 Intel Corporation +# Copyright (C) 2020-2021 Intel Corporation # # SPDX-License-Identifier: MIT @@ -70,12 +69,14 @@ def __init__(self, url): self._local_dir = local_dir self._cache_dir = osp.join(local_dir, 'images') - with open(osp.join(url, 'config.json'), 'r') as config_file: + with open(osp.join(url, 'config.json'), + 'r', encoding='utf-8') as config_file: config = json.load(config_file) config = Config(config, schema=CONFIG_SCHEMA) self._config = config - with open(osp.join(url, 'images_meta.json'), 'r') as images_file: + with open(osp.join(url, 'images_meta.json'), + 'r', encoding='utf-8') as images_file: images_meta = json.load(images_file) image_list = images_meta['images'] diff --git a/cvat/apps/dataset_manager/formats/icdar.py b/cvat/apps/dataset_manager/formats/icdar.py index 8df8f3e40fd8..03eda245432a 100644 --- a/cvat/apps/dataset_manager/formats/icdar.py +++ b/cvat/apps/dataset_manager/formats/icdar.py @@ -7,7 +7,7 @@ from datumaro.components.dataset import Dataset from datumaro.components.extractor import (AnnotationType, Caption, Label, - LabelCategories, Transform) + LabelCategories, ItemTransform) from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, import_dm_annotations) @@ -16,7 +16,7 @@ from .registry import dm_env, exporter, importer -class AddLabelToAnns(Transform): +class AddLabelToAnns(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -39,7 +39,7 @@ def transform_item(self, item): ann.label = self._label return item.wrap(annotations=annotations) -class CaptionToLabel(Transform): +class CaptionToLabel(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -64,7 +64,7 @@ def transform_item(self, item): annotations.remove(ann) return item.wrap(annotations=annotations) -class LabelToCaption(Transform): +class LabelToCaption(ItemTransform): def transform_item(self, item): annotations = item.annotations anns = [p for p in annotations diff --git a/cvat/apps/dataset_manager/formats/market1501.py b/cvat/apps/dataset_manager/formats/market1501.py index 1b6e24f43f1a..f94d65dca88a 100644 --- a/cvat/apps/dataset_manager/formats/market1501.py +++ b/cvat/apps/dataset_manager/formats/market1501.py @@ -7,7 +7,7 @@ from datumaro.components.dataset import Dataset from datumaro.components.extractor import (AnnotationType, Label, - LabelCategories, Transform) + LabelCategories, ItemTransform) from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, import_dm_annotations) @@ -15,7 +15,7 @@ from .registry import dm_env, exporter, importer -class AttrToLabelAttr(Transform): +class AttrToLabelAttr(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -31,13 +31,14 @@ def categories(self): return self._categories def transform_item(self, item): - annotations = item.annotations + annotations = list(item.annotations) + attributes = item.attributes if item.attributes: annotations.append(Label(self._label, attributes=item.attributes)) - item.attributes = {} - return item.wrap(annotations=annotations) + attributes = {} + return item.wrap(annotations=annotations, attributes=attributes) -class LabelAttrToAttr(Transform): +class LabelAttrToAttr(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -46,8 +47,8 @@ def __init__(self, extractor, label): self._label = label_cat.find(label)[0] def transform_item(self, item): - annotations = item.annotations - attributes = item.attributes + annotations = list(item.annotations) + attributes = dict(item.attributes) if self._label != None: labels = [ann for ann in annotations if ann.type == AnnotationType.label \ diff --git a/cvat/apps/dataset_manager/formats/mots.py b/cvat/apps/dataset_manager/formats/mots.py index 22b9dd08c7ea..b8b562ec900a 100644 --- a/cvat/apps/dataset_manager/formats/mots.py +++ b/cvat/apps/dataset_manager/formats/mots.py @@ -5,7 +5,7 @@ from tempfile import TemporaryDirectory from datumaro.components.dataset import Dataset -from datumaro.components.extractor import AnnotationType, Transform +from datumaro.components.extractor import AnnotationType, ItemTransform from pyunpack import Archive from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor, @@ -15,7 +15,7 @@ from .registry import dm_env, exporter, importer -class KeepTracks(Transform): +class KeepTracks(ItemTransform): def transform_item(self, item): return item.wrap(annotations=[a for a in item.annotations if 'track_id' in a.attributes]) diff --git a/cvat/apps/dataset_manager/formats/registry.py b/cvat/apps/dataset_manager/formats/registry.py index e662485498cf..e8a3884b7885 100644 --- a/cvat/apps/dataset_manager/formats/registry.py +++ b/cvat/apps/dataset_manager/formats/registry.py @@ -82,6 +82,14 @@ def make_importer(name): def make_exporter(name): return EXPORT_FORMATS[name]() + +# Add checking for TF availability to avoid CVAT sever instance / interpreter +# crash and provide a meaningful diagnistic message in the case of AVX +# instructions unavailability: +# https://github.com/openvinotoolkit/cvat/pull/1567 +import datumaro.util.tf_util as TF +TF.enable_tf_check = True + # pylint: disable=unused-import import cvat.apps.dataset_manager.formats.coco import cvat.apps.dataset_manager.formats.cvat diff --git a/cvat/apps/dataset_manager/formats/vggface2.py b/cvat/apps/dataset_manager/formats/vggface2.py index 528f52c76d7a..0ae6d9a9e531 100644 --- a/cvat/apps/dataset_manager/formats/vggface2.py +++ b/cvat/apps/dataset_manager/formats/vggface2.py @@ -29,4 +29,5 @@ def _import(src_file, task_data): zipfile.ZipFile(src_file).extractall(tmp_dir) dataset = Dataset.import_from(tmp_dir, 'vgg_face2', env=dm_env) + dataset.transform('rename', r"|([^/]+/)?(.+)|\2|") import_dm_annotations(dataset, task_data) diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index f07df7066f07..d87a2a1b7ab8 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -3331,9 +3331,13 @@ def test_api_v1_tasks_id_data_no_auth(self): response = self._create_task(None, data) self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) -def compare_objects(self, obj1, obj2, ignore_keys, fp_tolerance=.001): +def compare_objects(self, obj1, obj2, ignore_keys, fp_tolerance=.001, + current_key=None): + key_info = "{}: ".format(current_key) if current_key else "" + if isinstance(obj1, dict): - self.assertTrue(isinstance(obj2, dict), "{} != {}".format(obj1, obj2)) + self.assertTrue(isinstance(obj2, dict), + "{}{} != {}".format(key_info, obj1, obj2)) for k, v1 in obj1.items(): if k in ignore_keys: continue @@ -3342,17 +3346,20 @@ def compare_objects(self, obj1, obj2, ignore_keys, fp_tolerance=.001): key = lambda a: a['spec_id'] if 'spec_id' in a else a['id'] v1.sort(key=key) v2.sort(key=key) - compare_objects(self, v1, v2, ignore_keys) + compare_objects(self, v1, v2, ignore_keys, current_key=k) elif isinstance(obj1, list): - self.assertTrue(isinstance(obj2, list), "{} != {}".format(obj1, obj2)) - self.assertEqual(len(obj1), len(obj2), "{} != {}".format(obj1, obj2)) + self.assertTrue(isinstance(obj2, list), + "{}{} != {}".format(key_info, obj1, obj2)) + self.assertEqual(len(obj1), len(obj2), + "{}{} != {}".format(key_info, obj1, obj2)) for v1, v2 in zip(obj1, obj2): - compare_objects(self, v1, v2, ignore_keys) + compare_objects(self, v1, v2, ignore_keys, current_key=current_key) else: if isinstance(obj1, float) or isinstance(obj2, float): - self.assertAlmostEqual(obj1, obj2, delta=fp_tolerance) + self.assertAlmostEqual(obj1, obj2, delta=fp_tolerance, + msg=current_key) else: - self.assertEqual(obj1, obj2) + self.assertEqual(obj1, obj2, msg=current_key) class JobAnnotationAPITestCase(APITestCase): def setUp(self): diff --git a/cvat/requirements/base.txt b/cvat/requirements/base.txt index d83466b53da7..2ec265159666 100644 --- a/cvat/requirements/base.txt +++ b/cvat/requirements/base.txt @@ -52,4 +52,4 @@ azure-storage-blob==12.8.1 # --no-binary=pycocotools: workaround for binary incompatibility on numpy 1.20 # of pycocotools and tensorflow 2.4.1 # when pycocotools is installed by wheel in python 3.8+ -datumaro==0.1.9 --no-binary=datumaro --no-binary=pycocotools +datumaro==0.1.10.1 --no-binary=datumaro --no-binary=pycocotools