Merge pull request #185 from openvinotoolkit/develop

Release v0.1.7
openvinotoolkit · Mar 24, 2021 · 9580d5d · 9580d5d
2 parents e474187 + f21f71d
commit 9580d5d
Show file tree

Hide file tree

Showing 80 changed files with 6,892 additions and 870 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -31,6 +31,7 @@ matrix:
 install:
   - pip install -e ./
   - pip install tensorflow
+  - pip install pandas
 
 script:
   - python -m unittest discover -v

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,33 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 
+## 24/03/2021 - Release v0.1.7
+### Added
+- OpenVINO plugin examples (<https://github.com/openvinotoolkit/datumaro/pull/159>)
+- Dataset validation for classification and detection datasets (<https://github.com/openvinotoolkit/datumaro/pull/160>)
+- Arbitrary image extensions in formats (import and export) (<https://github.com/openvinotoolkit/datumaro/issues/166>)
+- Ability to set a custom subset name for an imported dataset (<https://github.com/openvinotoolkit/datumaro/issues/166>)
+- CLI support for NDR(<https://github.com/openvinotoolkit/datumaro/pull/178>)
+
+### Changed
+- Common ICDAR format is split into 3 sub-formats (<https://github.com/openvinotoolkit/datumaro/pull/174>)
+
+### Deprecated
+-
+
+### Removed
+-
+
+### Fixed
+- The ability to work with file names containing Cyrillic and spaces (<https://github.com/openvinotoolkit/datumaro/pull/148>)
+- Image reading and saving in ICDAR formats (<https://github.com/openvinotoolkit/datumaro/pull/174>)
+- Unnecessary image loading on dataset saving (<https://github.com/openvinotoolkit/datumaro/pull/176>)
+- Allowed spaces in ICDAR captions (<https://github.com/openvinotoolkit/datumaro/pull/182>)
+- Saving of masks in VOC when masks are not requested (<https://github.com/openvinotoolkit/datumaro/pull/184>)
+
+### Security
+-
+
 ## 03/02/2021 - Release v0.1.6.1 (hotfix)
 ### Added
 -
@@ -34,6 +61,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - `LFW` dataset format (<https://github.com/openvinotoolkit/datumaro/pull/110>)
 - Support of polygons' and masks' confusion matrices and mismathing classes in `diff` command (<https://github.com/openvinotoolkit/datumaro/pull/117>)
 - Add near duplicate image removal plugin (<https://github.com/openvinotoolkit/datumaro/pull/113>)
+- Sampler Plugin that analyzes inference result from the given dataset and selects samples for annotation(<https://github.com/openvinotoolkit/datumaro/pull/115>)
 
 ### Changed
 - OpenVINO model launcher is updated for OpenVINO r2021.1 (<https://github.com/openvinotoolkit/datumaro/pull/100>)

diff --git a/README.md b/README.md
@@ -158,6 +158,11 @@ CVAT annotations                             ---> Publication, statistics etc.
       - for detection task, based on bboxes
       - for re-identification task, based on labels,
         avoiding having same IDs in training and test splits
+  - Sampling a dataset
+    - analyzes inference result from the given dataset 
+      and selects the ‘best’ and the ‘least amount of’ samples for annotation.
+    - Select the sample that best suits model training.
+      - sampling with Entropy based algorithm
 - Dataset quality checking
   - Simple checking for errors
   - Comparison with model infernece

diff --git a/datumaro/cli/__main__.py b/datumaro/cli/__main__.py
@@ -77,6 +77,7 @@ def make_parser():
         ('stats', commands.stats, "Compute project statistics"),
         ('info', commands.info, "Print project info"),
         ('explain', commands.explain, "Run Explainable AI algorithm for model"),
+        ('validate', commands.validate, "Validate project")
     ]
 
     # Argparse doesn't support subparser groups:

diff --git a/datumaro/cli/commands/__init__.py b/datumaro/cli/commands/__init__.py
@@ -9,5 +9,5 @@
     explain,
     export, merge, convert, transform, filter,
     diff, ediff, stats,
-    info
+    info, validate
 )
diff --git a/datumaro/cli/commands/validate.py b/datumaro/cli/commands/validate.py
@@ -0,0 +1,7 @@
+# Copyright (C) 2020-2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+# pylint: disable=unused-import
+
+from ..contexts.project import build_validate_parser as build_parser
diff --git a/datumaro/cli/contexts/project/__init__.py b/datumaro/cli/contexts/project/__init__.py
@@ -10,14 +10,14 @@
 import shutil
 from enum import Enum
 
-from datumaro.components.cli_plugin import CliPlugin
 from datumaro.components.dataset_filter import DatasetItemEncoder
 from datumaro.components.extractor import AnnotationType
 from datumaro.components.operations import (DistanceComparator,
-    ExactComparator, compute_ann_statistics, compute_image_statistics, mean_std)
+    ExactComparator, compute_ann_statistics, compute_image_statistics)
 from datumaro.components.project import \
     PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG
 from datumaro.components.project import Environment, Project
+from datumaro.components.validator import validate_annotations, TaskType
 from datumaro.util import error_rollback
 
 from ...util import (CliException, MultilineFormatter, add_subparser,
@@ -791,6 +791,51 @@ def print_extractor_info(extractor, indent=''):
 
     return 0
 
+def build_validate_parser(parser_ctor=argparse.ArgumentParser):
+    parser = parser_ctor(help="Validate project",
+        description="""
+            Validates project based on specified task type and stores
+            results like statistics, reports and summary in JSON file.
+        """,
+        formatter_class=MultilineFormatter)
+
+    parser.add_argument('task_type',
+        choices=[task_type.name for task_type in TaskType],
+        help="Task type for validation")
+    parser.add_argument('-s', '--subset', dest='subset_name', default=None,
+        help="Subset to validate (default: None)")
+    parser.add_argument('-p', '--project', dest='project_dir', default='.',
+        help="Directory of the project to validate (default: current dir)")
+    parser.set_defaults(command=validate_command)
+
+    return parser
+
+def validate_command(args):
+    project = load_project(args.project_dir)
+    task_type = args.task_type
+    subset_name = args.subset_name
+    dst_file_name = 'validation_results'
+
+    dataset = project.make_dataset()
+    if subset_name is not None:
+        dataset = dataset.get_subset(subset_name)
+        dst_file_name += f'-{subset_name}'
+    validation_results = validate_annotations(dataset, task_type)
+
+    def _convert_tuple_keys_to_str(d):
+        for key, val in list(d.items()):
+            if isinstance(key, tuple):
+                d[str(key)] = val
+                d.pop(key)
+            if isinstance(val, dict):
+                _convert_tuple_keys_to_str(val)
+
+    _convert_tuple_keys_to_str(validation_results)
+
+    dst_file = generate_next_file_name(dst_file_name, ext='.json')
+    log.info("Writing project validation results to '%s'" % dst_file)
+    with open(dst_file, 'w') as f:
+        json.dump(validation_results, f, indent=4, sort_keys=True)
 
 def build_parser(parser_ctor=argparse.ArgumentParser):
     parser = parser_ctor(
@@ -814,5 +859,6 @@ def build_parser(parser_ctor=argparse.ArgumentParser):
     add_subparser(subparsers, 'transform', build_transform_parser)
     add_subparser(subparsers, 'info', build_info_parser)
     add_subparser(subparsers, 'stats', build_stats_parser)
+    add_subparser(subparsers, 'validate', build_validate_parser)
 
     return parser
diff --git a/datumaro/components/converter.py b/datumaro/components/converter.py
@@ -57,15 +57,23 @@ def _find_image_ext(self, item):
 
         return self._image_ext or src_ext or self._default_image_ext
 
-    def _make_image_filename(self, item):
-        return item.id + self._find_image_ext(item)
+    def _make_image_filename(self, item, *, name=None, subdir=None):
+        name = name or item.id
+        subdir = subdir or ''
+        return osp.join(subdir, name + self._find_image_ext(item))
+
+    def _save_image(self, item, path=None, *,
+            name=None, subdir=None, basedir=None):
+        assert not ((subdir or name or basedir) and path), \
+            "Can't use both subdir or name or basedir and path arguments"
 
-    def _save_image(self, item, path=None):
         if not item.image.has_data:
             log.warning("Item '%s' has no image", item.id)
             return
 
-        path = path or self._make_image_filename(item)
+        basedir = basedir or self._save_dir
+        path = path or osp.join(basedir,
+            self._make_image_filename(item, name=name, subdir=subdir))
         path = osp.abspath(path)
 
         src_ext = item.image.ext.lower()