diff --git a/CHANGELOG.md b/CHANGELOG.md index 39ef60adbd63..d80a1ee8e1a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `ItemTransform` class, which describes item-wise dataset `Transform`s () - `keep-empty` export parameter in VOC format () - A base class for dataset validation plugins () +- Partial support for the Open Images format; + only reading is supported, and only images and image-level labels can be read + (). ### Changed - Tensorflow AVX check is made optional in API and is disabled by default () diff --git a/datumaro/plugins/open_images_format.py b/datumaro/plugins/open_images_format.py new file mode 100644 index 000000000000..2f823eef89bf --- /dev/null +++ b/datumaro/plugins/open_images_format.py @@ -0,0 +1,216 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import contextlib +import csv +import fnmatch +import glob +import json +import os +import os.path as osp +import re + +from attr import attrs + +from datumaro.components.errors import DatasetError, RepeatedItemError, UndefinedLabel +from datumaro.components.extractor import ( + AnnotationType, DatasetItem, Importer, Label, LabelCategories, Extractor, +) +from datumaro.components.validator import Severity +from datumaro.util.image import find_images + +# A regex to check whether a subset name can be used as a "normal" path +# component. +# Accepting a subset name that doesn't match this regex could lead +# to accessing data outside of the expected directory, so it's best +# to reject them. +_RE_INVALID_SUBSET = re.compile(r''' + # empty + | \.\.? # special path component + | .*[/\\\0].* # contains special characters +''', re.VERBOSE) + +@attrs(auto_attribs=True) +class UnsupportedSubsetNameError(DatasetError): + subset: str + + def __str__(self): + return "Item %s has an unsupported subset name %r." % (self.item_id, self.subset) + +class OpenImagesPath: + ANNOTATIONS_DIR = 'annotations' + FULL_IMAGE_DESCRIPTION_NAME = 'image_ids_and_rotation.csv' + SUBSET_IMAGE_DESCRIPTION_PATTERNS = ( + '*-images-with-rotation.csv', + '*-images-with-labels-with-rotation.csv', + ) + +class OpenImagesExtractor(Extractor): + def __init__(self, path): + if not osp.isdir(path): + raise FileNotFoundError("Can't read dataset directory '%s'" % path) + + super().__init__() + + self._dataset_dir = path + + self._annotation_files = os.listdir( + osp.join(path, OpenImagesPath.ANNOTATIONS_DIR)) + + self._categories = {} + self._items = [] + + self._load_categories() + self._load_items() + + def __iter__(self): + return iter(self._items) + + def categories(self): + return self._categories + + @contextlib.contextmanager + def _open_csv_annotation(self, file_name): + absolute_path = osp.join(self._dataset_dir, OpenImagesPath.ANNOTATIONS_DIR, file_name) + + with open(absolute_path, 'r', encoding='utf-8', newline='') as f: + yield csv.DictReader(f) + + def _glob_annotations(self, pattern): + for annotation_file in self._annotation_files: + if fnmatch.fnmatch(annotation_file, pattern): + yield annotation_file + + def _load_categories(self): + label_categories = LabelCategories() + + # In OID v6, the class description file is prefixed with `oidv6-`, whereas + # in the previous versions, it isn't. We try to find it regardless. + # We use a wildcard so that if, say, OID v7 is released in the future with + # a similar layout as v6, it's automatically supported. + # If the file doesn't exist with either name, we'll fail trying to open + # `class-descriptions.csv`. + + V5_CLASS_DESCRIPTIONS = 'class-descriptions.csv' + + annotation_name = [ + *self._glob_annotations('oidv*-class-descriptions.csv'), + V5_CLASS_DESCRIPTIONS, + ][0] + + with self._open_csv_annotation(annotation_name) as class_description_reader: + # Prior to OID v6, this file didn't contain a header row. + if annotation_name == V5_CLASS_DESCRIPTIONS: + class_description_reader.fieldnames = ('LabelName', 'DisplayName') + + for class_description in class_description_reader: + label_name = class_description['LabelName'] + label_categories.add(label_name) + + self._categories[AnnotationType.label] = label_categories + + self._load_label_category_parents() + + def _load_label_category_parents(self): + label_categories = self._categories[AnnotationType.label] + + hierarchy_path = osp.join( + self._dataset_dir, OpenImagesPath.ANNOTATIONS_DIR, 'bbox_labels_600_hierarchy.json') + + try: + with open(hierarchy_path, 'rb') as hierarchy_file: + root_node = json.load(hierarchy_file) + except FileNotFoundError: + return + + def set_parents_from_node(node, category): + for child_node in node.get('Subcategory', []): + _, child_category = label_categories.find(child_node['LabelName']) + + if category is not None and child_category is not None: + child_category.parent = category.name + + set_parents_from_node(child_node, child_category) + + _, root_category = label_categories.find(root_node['LabelName']) + set_parents_from_node(root_node, root_category) + + def _load_items(self): + image_paths_by_id = { + osp.splitext(osp.basename(path))[0]: path + for path in find_images( + osp.join(self._dataset_dir, 'images'), + recursive=True, max_depth=1) + } + + items_by_id = {} + + def load_from(annotation_name): + with self._open_csv_annotation(annotation_name) as image_reader: + for image_description in image_reader: + image_id = image_description['ImageID'] + if image_id in items_by_id: + raise RepeatedItemError(item_id=image_id) + + subset = image_description['Subset'] + + if _RE_INVALID_SUBSET.fullmatch(subset): + raise UnsupportedSubsetNameError(item_id=image_id, subset=subset) + + items_by_id[image_id] = DatasetItem( + id=image_id, + image=image_paths_by_id.get(image_id), + subset=subset, + ) + + # It's preferable to load the combined image description file, + # because it contains descriptions for training images without human-annotated labels + # (the file specific to the training set doesn't). + # However, if it's missing, we'll try loading subset-specific files instead, so that + # this extractor can be used on individual subsets of the dataset. + try: + load_from(OpenImagesPath.FULL_IMAGE_DESCRIPTION_NAME) + except FileNotFoundError: + for pattern in OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_PATTERNS: + for path in self._glob_annotations(pattern): + load_from(path) + + self._items.extend(items_by_id.values()) + + self._load_labels(items_by_id) + + def _load_labels(self, items_by_id): + label_categories = self._categories[AnnotationType.label] + + # TODO: implement reading of machine-annotated labels + + for label_path in self._glob_annotations('*-human-imagelabels.csv'): + with self._open_csv_annotation(label_path) as label_reader: + for label_description in label_reader: + image_id = label_description['ImageID'] + item = items_by_id[image_id] + + confidence = float(label_description['Confidence']) + + label_name = label_description['LabelName'] + label_index, _ = label_categories.find(label_name) + if label_index is None: + raise UndefinedLabel( + item_id=item.id, subset=item.subset, + label_name=label_name, severity=Severity.error) + item.annotations.append(Label( + label=label_index, attributes={'score': confidence})) + + +class OpenImagesImporter(Importer): + @classmethod + def find_sources(cls, path): + for pattern in [ + OpenImagesPath.FULL_IMAGE_DESCRIPTION_NAME, + *OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_PATTERNS, + ]: + if glob.glob(osp.join(glob.escape(path), OpenImagesPath.ANNOTATIONS_DIR, pattern)): + return [{'url': path, 'format': 'open_images'}] + + return [] diff --git a/docs/formats/open_images_user_manual.md b/docs/formats/open_images_user_manual.md new file mode 100644 index 000000000000..9d93f816d7a4 --- /dev/null +++ b/docs/formats/open_images_user_manual.md @@ -0,0 +1,135 @@ +# Open Images user manual + +## Contents + +- [Format specification](#format-specification) +- [Load Open Images dataset](#load-open-images-dataset) +- [Export to other formats](#export-to-other-formats) +- [Export to Open Images](#export-to-open-images) +- [Particular use cases](#particular-use-cases) + +## Format specification + +A description of the Open Images Dataset (OID) format is available +on [its website](https://storage.googleapis.com/openimages/web/download.html). +Datumaro supports versions 4, 5 and 6. + +Datumaro currently supports only the human-verified image-level label annotations from this dataset. + +## Load Open Images dataset + +The Open Images dataset is available for free download. + +See the [`open-images-dataset` GitHub repository](https://github.com/cvdfoundation/open-images-dataset) +for information on how to download the images. + +Datumaro also requires the image description files, +which can be downloaded from the following URLs: + +- [complete set](https://storage.googleapis.com/openimages/2018_04/image_ids_and_rotation.csv) +- [train set](https://storage.googleapis.com/openimages/v6/oidv6-train-images-with-labels-with-rotation.csv) +- [validation set](https://storage.googleapis.com/openimages/2018_04/test/test-images-with-rotation.csv) +- [test set](https://storage.googleapis.com/openimages/2018_04/validation/validation-images-with-rotation.csv) + +Datumaro expects at least one of the files above to be present. + +In addition, the following metadata file must be present as well: + +- [class descriptions](https://storage.googleapis.com/openimages/v6/oidv6-class-descriptions.csv) + +You can optionally download the following additional metadata file: + +- [class hierarchy](https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json) + +Annotations can be downloaded from the following URLs: + +- [train image labels](https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-human-imagelabels.csv) +- [validation image labels](https://storage.googleapis.com/openimages/v5/validation-annotations-human-imagelabels.csv) +- [test image labels](https://storage.googleapis.com/openimages/v5/test-annotations-human-imagelabels.csv) + +The annotations are optional. + +There are two ways to create Datumaro project and add OID to it: + +``` bash +datum import --format open_images --input-path +# or +datum create +datum add path -f open_images +``` + +It is possible to specify project name and project directory; run +`datum create --help` for more information. + +Open Images dataset directory should have the following structure: + +``` +└─ Dataset/ + ├── annotations/ + │ └── bbox_labels_600_hierarchy.json + │ └── image_ids_and_rotation.csv + │ └── oidv6-class-descriptions.csv + │ └── *-human-imagelabels.csv + └── images/ + ├── test + │ ├── + │ ├── + │ └── ... + ├── train + │ ├── + │ ├── + │ └── ... + └── validation + ├── + ├── + └── ... +``` + +To use per-subset image description files instead of `image_ids_and_rotation.csv`, +place them in the `annotations` subdirectory. + +## Export to other formats + +Datumaro can convert OID into any other format [Datumaro supports](../user_manual.md#supported-formats). +To get the expected result, the dataset needs to be converted to a format +that supports image-level labels. +There are a few ways to convert OID to other dataset format: + +``` bash +datum project import -f open_images -i +datum export -f cvat -o +# or +datum convert -if open_images -i -f cvat -o +``` + +Some formats provide extra options for conversion. +These options are passed after double dash (`--`) in the command line. +To get information about them, run + +`datum export -f -- -h` + +## Export to Open Images + +Converting datasets to the Open Images format is currently not supported. + +## Particular use cases + +Datumaro supports filtering, transformation, merging etc. for all formats +and for the Open Images format in particular. Follow +[user manual](../user_manual.md) +to get more information about these operations. + +Here is an example of using Datumaro operations to solve +a particular problem with the Open Images dataset: + +### Example. How to load the Open Images dataset and convert to the format used by CVAT + +```bash +datum create -o project +datum add path -p project -f open_images ./open-images-dataset/ +datum stats -p project +datum export -p project -o dataset -f cvat --overwrite -- --save-images +``` + +More examples of working with OID from code can be found in +[tests](../../tests/test_open_images_format.py). diff --git a/tests/assets/open_images_dataset_v5/annotations/class-descriptions.csv b/tests/assets/open_images_dataset_v5/annotations/class-descriptions.csv new file mode 100644 index 000000000000..c6f3141a6e2b --- /dev/null +++ b/tests/assets/open_images_dataset_v5/annotations/class-descriptions.csv @@ -0,0 +1,2 @@ +/m/0,Generic label #0 +/m/1,Generic label #1 diff --git a/tests/assets/open_images_dataset_v5/annotations/test-images-with-rotation.csv b/tests/assets/open_images_dataset_v5/annotations/test-images-with-rotation.csv new file mode 100644 index 000000000000..accbc944afbb --- /dev/null +++ b/tests/assets/open_images_dataset_v5/annotations/test-images-with-rotation.csv @@ -0,0 +1,2 @@ +ImageID,Subset,OriginalURL,OriginalLandingURL,License,AuthorProfileURL,Author,Title,OriginalSize,OriginalMD5,Thumbnail300KURL,Rotation +cc,test,,,,,Intel,Test Image CC,,,,0 diff --git a/tests/assets/open_images_dataset_v5/annotations/train-images-with-labels-with-rotation.csv b/tests/assets/open_images_dataset_v5/annotations/train-images-with-labels-with-rotation.csv new file mode 100644 index 000000000000..445b9864fdd7 --- /dev/null +++ b/tests/assets/open_images_dataset_v5/annotations/train-images-with-labels-with-rotation.csv @@ -0,0 +1,2 @@ +ImageID,Subset,OriginalURL,OriginalLandingURL,License,AuthorProfileURL,Author,Title,OriginalSize,OriginalMD5,Thumbnail300KURL,Rotation +aa,train,,,,,Intel,Test Image AA,,,,0 diff --git a/tests/assets/open_images_dataset_v5/images/test/cc.jpg b/tests/assets/open_images_dataset_v5/images/test/cc.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/open_images_dataset_v5/images/test/cc.jpg differ diff --git a/tests/assets/open_images_dataset_v5/images/train/aa.jpg b/tests/assets/open_images_dataset_v5/images/train/aa.jpg new file mode 100644 index 000000000000..864035b7f434 Binary files /dev/null and b/tests/assets/open_images_dataset_v5/images/train/aa.jpg differ diff --git a/tests/assets/open_images_dataset_v6/annotations/bbox_labels_600_hierarchy.json b/tests/assets/open_images_dataset_v6/annotations/bbox_labels_600_hierarchy.json new file mode 100644 index 000000000000..526c5765d8c1 --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/bbox_labels_600_hierarchy.json @@ -0,0 +1,13 @@ +{ + "LabelName": "/m/x", + "Subcategory": [ + { + "LabelName": "/m/0", + "Subcategory": [ + { + "LabelName": "/m/1" + } + ] + } + ] +} diff --git a/tests/assets/open_images_dataset_v6/annotations/image_ids_and_rotation.csv b/tests/assets/open_images_dataset_v6/annotations/image_ids_and_rotation.csv new file mode 100644 index 000000000000..0bd3d8c7c278 --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/image_ids_and_rotation.csv @@ -0,0 +1,5 @@ +ImageID,Subset,OriginalURL,OriginalLandingURL,License,AuthorProfileURL,Author,Title,OriginalSize,OriginalMD5,Thumbnail300KURL,Rotation +a,train,,,,,Intel,Test Image A,,,,0 +b,train,,,,,Intel,Test Image B,,,,0 +c,test,,,,,Intel,Test Image C,,,,0 +d,validation,,,,Intel,Test Image D,,,,0 diff --git a/tests/assets/open_images_dataset_v6/annotations/oidv6-class-descriptions.csv b/tests/assets/open_images_dataset_v6/annotations/oidv6-class-descriptions.csv new file mode 100644 index 000000000000..e73dc711361d --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/oidv6-class-descriptions.csv @@ -0,0 +1,5 @@ +LabelName,DisplayName +/m/0,Generic label #0 +/m/1,Generic label #1 +/m/2,Generic label #2 +/m/3,Generic label #3 diff --git a/tests/assets/open_images_dataset_v6/annotations/oidv6-train-annotations-human-imagelabels.csv b/tests/assets/open_images_dataset_v6/annotations/oidv6-train-annotations-human-imagelabels.csv new file mode 100644 index 000000000000..56d92fe4d288 --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/oidv6-train-annotations-human-imagelabels.csv @@ -0,0 +1,3 @@ +ImageID,Source,LabelName,Confidence +a,verification,/m/0,1 +b,verification,/m/0,0 diff --git a/tests/assets/open_images_dataset_v6/annotations/test-annotations-human-imagelabels.csv b/tests/assets/open_images_dataset_v6/annotations/test-annotations-human-imagelabels.csv new file mode 100644 index 000000000000..33317cc1d931 --- /dev/null +++ b/tests/assets/open_images_dataset_v6/annotations/test-annotations-human-imagelabels.csv @@ -0,0 +1,3 @@ +ImageID,Source,LabelName,Confidence +c,verification,/m/1,1 +c,verification,/m/3,1 diff --git a/tests/assets/open_images_dataset_v6/images/test/c.jpg b/tests/assets/open_images_dataset_v6/images/test/c.jpg new file mode 100644 index 000000000000..8bce84d3bf50 Binary files /dev/null and b/tests/assets/open_images_dataset_v6/images/test/c.jpg differ diff --git a/tests/assets/open_images_dataset_v6/images/train/a.jpg b/tests/assets/open_images_dataset_v6/images/train/a.jpg new file mode 100644 index 000000000000..864035b7f434 Binary files /dev/null and b/tests/assets/open_images_dataset_v6/images/train/a.jpg differ diff --git a/tests/assets/open_images_dataset_v6/images/train/b.jpg b/tests/assets/open_images_dataset_v6/images/train/b.jpg new file mode 100644 index 000000000000..0ab7dbe4a419 Binary files /dev/null and b/tests/assets/open_images_dataset_v6/images/train/b.jpg differ diff --git a/tests/assets/open_images_dataset_v6/images/validation/d.png b/tests/assets/open_images_dataset_v6/images/validation/d.png new file mode 100644 index 000000000000..528f10546704 Binary files /dev/null and b/tests/assets/open_images_dataset_v6/images/validation/d.png differ diff --git a/tests/test_open_images_format.py b/tests/test_open_images_format.py new file mode 100644 index 000000000000..a7492e0cb7d4 --- /dev/null +++ b/tests/test_open_images_format.py @@ -0,0 +1,79 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp + +from unittest.case import TestCase + +import numpy as np + +from datumaro.plugins.open_images_format import OpenImagesImporter +from datumaro.util.test_utils import compare_datasets_strict +from datumaro.components.extractor import AnnotationType, DatasetItem, Label, LabelCategories +from datumaro.components.dataset import Dataset +from tests.requirements import Requirements, mark_requirement + +ASSETS_DIR = osp.join(osp.dirname(__file__), 'assets') + +DUMMY_DATASET_DIR_V6 = osp.join(ASSETS_DIR, 'open_images_dataset_v6') +DUMMY_DATASET_DIR_V5 = osp.join(ASSETS_DIR, 'open_images_dataset_v5') + +class OpenImagesImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_import_v6(self): + expected_dataset = Dataset.from_iterable( + [ + DatasetItem(id='a', subset='train', image=np.zeros((8, 6, 3)), + annotations=[Label(label=0, attributes={'score': 1})]), + DatasetItem(id='b', subset='train', image=np.zeros((2, 8, 3)), + annotations=[Label(label=0, attributes={'score': 0})]), + DatasetItem(id='c', subset='test', image=np.ones((10, 5, 3)), + annotations=[ + Label(label=1, attributes={'score': 1}), + Label(label=3, attributes={'score': 1}), + ]), + DatasetItem(id='d', subset='validation', image=np.ones((1, 5, 3)), + annotations=[]), + ], + categories={ + AnnotationType.label: LabelCategories.from_iterable([ + # The hierarchy file in the test dataset also includes a fake label + # /m/x that is set to be /m/0's parent. This is to mimic the real + # Open Images dataset, that assigns a nonexistent label as a parent + # to all labels that don't have one. + '/m/0', + ('/m/1', '/m/0'), + '/m/2', + '/m/3', + ]), + }, + ) + + dataset = Dataset.import_from(DUMMY_DATASET_DIR_V6, 'open_images') + + compare_datasets_strict(self, expected_dataset, dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_import_v5(self): + expected_dataset = Dataset.from_iterable( + [ + DatasetItem(id='aa', subset='train', image=np.zeros((8, 6, 3))), + DatasetItem(id='cc', subset='test', image=np.ones((10, 5, 3))), + ], + categories={ + AnnotationType.label: LabelCategories.from_iterable([ + '/m/0', + '/m/1', + ]), + }, + ) + + dataset = Dataset.import_from(DUMMY_DATASET_DIR_V5, 'open_images') + + compare_datasets_strict(self, expected_dataset, dataset) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_detect(self): + self.assertTrue(OpenImagesImporter.detect(DUMMY_DATASET_DIR_V6)) + self.assertTrue(OpenImagesImporter.detect(DUMMY_DATASET_DIR_V5))