From 18a75a1bbae6e700165f153b734827c92ce79406 Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Wed, 6 Jan 2021 10:15:26 +0300 Subject: [PATCH] Move dataset tests to a separate file --- tests/test_dataset.py | 280 ++++++++++++++++++++++++++++++++++++++ tests/test_project.py | 308 +++--------------------------------------- 2 files changed, 300 insertions(+), 288 deletions(-) create mode 100644 tests/test_dataset.py diff --git a/tests/test_dataset.py b/tests/test_dataset.py new file mode 100644 index 0000000000..008f56b6d6 --- /dev/null +++ b/tests/test_dataset.py @@ -0,0 +1,280 @@ +import numpy as np + +from unittest import TestCase + +from datumaro.components.project import Environment +from datumaro.components.extractor import (Extractor, DatasetItem, + Label, Mask, Points, Polygon, PolyLine, Bbox, Caption, + LabelCategories, AnnotationType, Transform +) +from datumaro.util.image import Image +from datumaro.components.dataset_filter import \ + XPathDatasetFilter, XPathAnnotationsFilter, DatasetItemEncoder +from datumaro.components.dataset import Dataset, DEFAULT_FORMAT +from datumaro.util.test_utils import TestDir, compare_datasets + + +class DatasetTest(TestCase): + def test_create_from_extractors(self): + class SrcExtractor1(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='train', annotations=[ + Bbox(1, 2, 3, 4), + Label(4), + ]), + DatasetItem(id=1, subset='val', annotations=[ + Label(4), + ]), + ]) + + class SrcExtractor2(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='val', annotations=[ + Label(5), + ]), + ]) + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='train', annotations=[ + Bbox(1, 2, 3, 4), + Label(4), + ]), + DatasetItem(id=1, subset='val', annotations=[ + Label(4), + Label(5), + ]), + ]) + + dataset = Dataset.from_extractors(SrcExtractor1(), SrcExtractor2()) + + compare_datasets(self, DstExtractor(), dataset) + + def test_can_create_from_iterable(self): + class TestExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='train', annotations=[ + Bbox(1, 2, 3, 4, label=2), + Label(4), + ]), + DatasetItem(id=1, subset='val', annotations=[ + Label(3), + ]), + ]) + + def categories(self): + return { AnnotationType.label: LabelCategories.from_iterable( + ['a', 'b', 'c', 'd', 'e']) + } + + actual = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', annotations=[ + Bbox(1, 2, 3, 4, label=2), + Label(4), + ]), + DatasetItem(id=1, subset='val', annotations=[ + Label(3), + ]), + ], categories=['a', 'b', 'c', 'd', 'e']) + + compare_datasets(self, TestExtractor(), actual) + + def test_can_save_and_load(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ Label(2) ]), + ], categories=['a', 'b', 'c']) + + with TestDir() as test_dir: + source_dataset.save(test_dir) + + loaded_dataset = Dataset.load(test_dir) + + compare_datasets(self, source_dataset, loaded_dataset) + + def test_can_detect(self): + env = Environment() + env.importers.items = {DEFAULT_FORMAT: env.importers[DEFAULT_FORMAT]} + env.extractors.items = {DEFAULT_FORMAT: env.extractors[DEFAULT_FORMAT]} + + dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ Label(2) ]), + ], categories=['a', 'b', 'c']) + + with TestDir() as test_dir: + dataset.save(test_dir) + + detected_format = Dataset.detect(test_dir, env=env) + + self.assertEqual(DEFAULT_FORMAT, detected_format) + + def test_can_detect_and_import(self): + env = Environment() + env.importers.items = {DEFAULT_FORMAT: env.importers[DEFAULT_FORMAT]} + env.extractors.items = {DEFAULT_FORMAT: env.extractors[DEFAULT_FORMAT]} + + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ Label(2) ]), + ], categories=['a', 'b', 'c']) + + with TestDir() as test_dir: + source_dataset.save(test_dir) + + imported_dataset = Dataset.import_from(test_dir, env=env) + + compare_datasets(self, source_dataset, imported_dataset) + + def test_can_export_by_string_format_name(self): + env = Environment() + env.converters.items = {'qq': env.converters[DEFAULT_FORMAT]} + + dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ Label(2) ]), + ], categories=['a', 'b', 'c'], env=env) + + with TestDir() as test_dir: + dataset.export('qq', save_dir=test_dir) + + def test_can_transform_by_string_name(self): + expected = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ Label(2) ], attributes={'qq': 1}), + ], categories=['a', 'b', 'c']) + + class TestTransform(Transform): + def transform_item(self, item): + return self.wrap_item(item, attributes={'qq': 1}) + + env = Environment() + env.transforms.items = {'qq': TestTransform} + + dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ Label(2) ]), + ], categories=['a', 'b', 'c'], env=env) + + actual = dataset.transform('qq') + + self.assertTrue(isinstance(actual, Dataset)) + self.assertEqual(env, actual.env) + compare_datasets(self, expected, actual) + + +class DatasetItemTest(TestCase): + def test_ctor_requires_id(self): + with self.assertRaises(Exception): + # pylint: disable=no-value-for-parameter + DatasetItem() + # pylint: enable=no-value-for-parameter + + @staticmethod + def test_ctors_with_image(): + for args in [ + { 'id': 0, 'image': None }, + { 'id': 0, 'image': 'path.jpg' }, + { 'id': 0, 'image': np.array([1, 2, 3]) }, + { 'id': 0, 'image': lambda f: np.array([1, 2, 3]) }, + { 'id': 0, 'image': Image(data=np.array([1, 2, 3])) }, + ]: + DatasetItem(**args) + + +class DatasetFilterTest(TestCase): + @staticmethod + def test_item_representations(): + item = DatasetItem(id=1, subset='subset', path=['a', 'b'], + image=np.ones((5, 4, 3)), + annotations=[ + Label(0, attributes={'a1': 1, 'a2': '2'}, id=1, group=2), + Caption('hello', id=1), + Caption('world', group=5), + Label(2, id=3, attributes={ 'x': 1, 'y': '2' }), + Bbox(1, 2, 3, 4, label=4, id=4, attributes={ 'a': 1.0 }), + Bbox(5, 6, 7, 8, id=5, group=5), + Points([1, 2, 2, 0, 1, 1], label=0, id=5), + Mask(id=5, image=np.ones((3, 2))), + Mask(label=3, id=5, image=np.ones((2, 3))), + PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11), + Polygon([1, 2, 3, 4, 5, 6, 7, 8]), + ] + ) + + encoded = DatasetItemEncoder.encode(item) + DatasetItemEncoder.to_string(encoded) + + def test_item_filter_can_be_applied(self): + class TestExtractor(Extractor): + def __iter__(self): + for i in range(4): + yield DatasetItem(id=i, subset='train') + + extractor = TestExtractor() + + filtered = XPathDatasetFilter(extractor, '/item[id > 1]') + + self.assertEqual(2, len(filtered)) + + def test_annotations_filter_can_be_applied(self): + class SrcExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=0), + DatasetItem(id=1, annotations=[ + Label(0), + Label(1), + ]), + DatasetItem(id=2, annotations=[ + Label(0), + Label(2), + ]), + ]) + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=0), + DatasetItem(id=1, annotations=[ + Label(0), + ]), + DatasetItem(id=2, annotations=[ + Label(0), + ]), + ]) + + extractor = SrcExtractor() + + filtered = XPathAnnotationsFilter(extractor, + '/item/annotation[label_id = 0]') + + self.assertListEqual(list(filtered), list(DstExtractor())) + + def test_annotations_filter_can_remove_empty_items(self): + class SrcExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=0), + DatasetItem(id=1, annotations=[ + Label(0), + Label(1), + ]), + DatasetItem(id=2, annotations=[ + Label(0), + Label(2), + ]), + ]) + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=2, annotations=[ + Label(2), + ]), + ]) + + extractor = SrcExtractor() + + filtered = XPathAnnotationsFilter(extractor, + '/item/annotation[label_id = 2]', remove_empty=True) + + self.assertListEqual(list(filtered), list(DstExtractor())) diff --git a/tests/test_project.py b/tests/test_project.py index c3dc134866..6944377ff1 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -8,13 +8,8 @@ from datumaro.components.config_model import Source, Model from datumaro.components.launcher import Launcher, ModelTransform from datumaro.components.extractor import (Extractor, DatasetItem, - Label, Mask, Points, Polygon, PolyLine, Bbox, Caption, - LabelCategories, AnnotationType, Transform -) -from datumaro.util.image import Image + Label, LabelCategories, AnnotationType) from datumaro.components.config import Config, DefaultConfig, SchemaBuilder -from datumaro.components.dataset_filter import \ - XPathDatasetFilter, XPathAnnotationsFilter, DatasetItemEncoder from datumaro.components.dataset import Dataset, DEFAULT_FORMAT from datumaro.util.test_utils import TestDir, compare_datasets @@ -383,104 +378,34 @@ def test_can_detect_and_import(self): DEFAULT_FORMAT) compare_datasets(self, source_dataset, imported_dataset) -class DatasetFilterTest(TestCase): - @staticmethod - def test_item_representations(): - item = DatasetItem(id=1, subset='subset', path=['a', 'b'], - image=np.ones((5, 4, 3)), - annotations=[ - Label(0, attributes={'a1': 1, 'a2': '2'}, id=1, group=2), - Caption('hello', id=1), - Caption('world', group=5), - Label(2, id=3, attributes={ 'x': 1, 'y': '2' }), - Bbox(1, 2, 3, 4, label=4, id=4, attributes={ 'a': 1.0 }), - Bbox(5, 6, 7, 8, id=5, group=5), - Points([1, 2, 2, 0, 1, 1], label=0, id=5), - Mask(id=5, image=np.ones((3, 2))), - Mask(label=3, id=5, image=np.ones((2, 3))), - PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11), - Polygon([1, 2, 3, 4, 5, 6, 7, 8]), - ] - ) - - encoded = DatasetItemEncoder.encode(item) - DatasetItemEncoder.to_string(encoded) - - def test_item_filter_can_be_applied(self): - class TestExtractor(Extractor): - def __iter__(self): - for i in range(4): - yield DatasetItem(id=i, subset='train') - - extractor = TestExtractor() - - filtered = XPathDatasetFilter(extractor, '/item[id > 1]') - - self.assertEqual(2, len(filtered)) - - def test_annotations_filter_can_be_applied(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=0), - DatasetItem(id=1, annotations=[ - Label(0), - Label(1), - ]), - DatasetItem(id=2, annotations=[ - Label(0), - Label(2), - ]), - ]) - - class DstExtractor(Extractor): + def test_custom_extractor_can_be_created(self): + class CustomExtractor(Extractor): def __iter__(self): return iter([ - DatasetItem(id=0), - DatasetItem(id=1, annotations=[ - Label(0), - ]), - DatasetItem(id=2, annotations=[ - Label(0), - ]), - ]) - - extractor = SrcExtractor() - - filtered = XPathAnnotationsFilter(extractor, - '/item/annotation[label_id = 0]') + DatasetItem(id=0, subset='train'), + DatasetItem(id=1, subset='train'), + DatasetItem(id=2, subset='train'), - self.assertListEqual(list(filtered), list(DstExtractor())) + DatasetItem(id=3, subset='test'), + DatasetItem(id=4, subset='test'), - def test_annotations_filter_can_remove_empty_items(self): - class SrcExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=0), - DatasetItem(id=1, annotations=[ - Label(0), - Label(1), - ]), - DatasetItem(id=2, annotations=[ - Label(0), - Label(2), - ]), + DatasetItem(id=1), + DatasetItem(id=2), + DatasetItem(id=3), ]) - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=2, annotations=[ - Label(2), - ]), - ]) + extractor_name = 'ext1' + project = Project() + project.env.extractors.register(extractor_name, CustomExtractor) + project.add_source('src1', { + 'url': 'path', + 'format': extractor_name, + }) - extractor = SrcExtractor() + dataset = project.make_dataset() - filtered = XPathAnnotationsFilter(extractor, - '/item/annotation[label_id = 2]', remove_empty=True) + compare_datasets(self, CustomExtractor(), dataset) - self.assertListEqual(list(filtered), list(DstExtractor())) class ConfigTest(TestCase): def test_can_produce_multilayer_config_from_dict(self): @@ -509,196 +434,3 @@ def test_can_produce_multilayer_config_from_dict(self): }, schema=schema_top) self.assertEqual(value, source.container['elem'].desc.options['k']) - -class ExtractorTest(TestCase): - def test_custom_extractor_can_be_created(self): - class CustomExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=0, subset='train'), - DatasetItem(id=1, subset='train'), - DatasetItem(id=2, subset='train'), - - DatasetItem(id=3, subset='test'), - DatasetItem(id=4, subset='test'), - - DatasetItem(id=1), - DatasetItem(id=2), - DatasetItem(id=3), - ]) - - extractor_name = 'ext1' - project = Project() - project.env.extractors.register(extractor_name, CustomExtractor) - project.add_source('src1', { - 'url': 'path', - 'format': extractor_name, - }) - - dataset = project.make_dataset() - - compare_datasets(self, CustomExtractor(), dataset) - -class DatasetTest(TestCase): - def test_create_from_extractors(self): - class SrcExtractor1(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', annotations=[ - Bbox(1, 2, 3, 4), - Label(4), - ]), - DatasetItem(id=1, subset='val', annotations=[ - Label(4), - ]), - ]) - - class SrcExtractor2(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='val', annotations=[ - Label(5), - ]), - ]) - - class DstExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', annotations=[ - Bbox(1, 2, 3, 4), - Label(4), - ]), - DatasetItem(id=1, subset='val', annotations=[ - Label(4), - Label(5), - ]), - ]) - - dataset = Dataset.from_extractors(SrcExtractor1(), SrcExtractor2()) - - compare_datasets(self, DstExtractor(), dataset) - - def test_can_create_from_iterable(self): - class TestExtractor(Extractor): - def __iter__(self): - return iter([ - DatasetItem(id=1, subset='train', annotations=[ - Bbox(1, 2, 3, 4, label=2), - Label(4), - ]), - DatasetItem(id=1, subset='val', annotations=[ - Label(3), - ]), - ]) - - def categories(self): - return { AnnotationType.label: LabelCategories.from_iterable( - ['a', 'b', 'c', 'd', 'e']) - } - - actual = Dataset.from_iterable([ - DatasetItem(id=1, subset='train', annotations=[ - Bbox(1, 2, 3, 4, label=2), - Label(4), - ]), - DatasetItem(id=1, subset='val', annotations=[ - Label(3), - ]), - ], categories=['a', 'b', 'c', 'd', 'e']) - - compare_datasets(self, TestExtractor(), actual) - - def test_can_save_and_load(self): - source_dataset = Dataset.from_iterable([ - DatasetItem(id=1, annotations=[ Label(2) ]), - ], categories=['a', 'b', 'c']) - - with TestDir() as test_dir: - source_dataset.save(test_dir) - - loaded_dataset = Dataset.load(test_dir) - - compare_datasets(self, source_dataset, loaded_dataset) - - def test_can_detect(self): - env = Environment() - env.importers.items = {DEFAULT_FORMAT: env.importers[DEFAULT_FORMAT]} - env.extractors.items = {DEFAULT_FORMAT: env.extractors[DEFAULT_FORMAT]} - - dataset = Dataset.from_iterable([ - DatasetItem(id=1, annotations=[ Label(2) ]), - ], categories=['a', 'b', 'c']) - - with TestDir() as test_dir: - dataset.save(test_dir) - - detected_format = Dataset.detect(test_dir, env=env) - - self.assertEqual(DEFAULT_FORMAT, detected_format) - - def test_can_detect_and_import(self): - env = Environment() - env.importers.items = {DEFAULT_FORMAT: env.importers[DEFAULT_FORMAT]} - env.extractors.items = {DEFAULT_FORMAT: env.extractors[DEFAULT_FORMAT]} - - source_dataset = Dataset.from_iterable([ - DatasetItem(id=1, annotations=[ Label(2) ]), - ], categories=['a', 'b', 'c']) - - with TestDir() as test_dir: - source_dataset.save(test_dir) - - imported_dataset = Dataset.import_from(test_dir, env=env) - - compare_datasets(self, source_dataset, imported_dataset) - - def test_can_export_by_string_format_name(self): - env = Environment() - env.converters.items = {'qq': env.converters[DEFAULT_FORMAT]} - - dataset = Dataset.from_iterable([ - DatasetItem(id=1, annotations=[ Label(2) ]), - ], categories=['a', 'b', 'c'], env=env) - - with TestDir() as test_dir: - dataset.export('qq', save_dir=test_dir) - - def test_can_transform_by_string_name(self): - expected = Dataset.from_iterable([ - DatasetItem(id=1, annotations=[ Label(2) ], attributes={'qq': 1}), - ], categories=['a', 'b', 'c']) - - class TestTransform(Transform): - def transform_item(self, item): - return self.wrap_item(item, attributes={'qq': 1}) - - env = Environment() - env.transforms.items = {'qq': TestTransform} - - dataset = Dataset.from_iterable([ - DatasetItem(id=1, annotations=[ Label(2) ]), - ], categories=['a', 'b', 'c'], env=env) - - actual = dataset.transform('qq') - - self.assertTrue(isinstance(actual, Dataset)) - self.assertEqual(env, actual.env) - compare_datasets(self, expected, actual) - -class DatasetItemTest(TestCase): - def test_ctor_requires_id(self): - with self.assertRaises(Exception): - # pylint: disable=no-value-for-parameter - DatasetItem() - # pylint: enable=no-value-for-parameter - - @staticmethod - def test_ctors_with_image(): - for args in [ - { 'id': 0, 'image': None }, - { 'id': 0, 'image': 'path.jpg' }, - { 'id': 0, 'image': np.array([1, 2, 3]) }, - { 'id': 0, 'image': lambda f: np.array([1, 2, 3]) }, - { 'id': 0, 'image': Image(data=np.array([1, 2, 3])) }, - ]: - DatasetItem(**args) \ No newline at end of file