Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move dataset tests to a separate file #74

Merged
merged 1 commit into from
Jan 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
280 changes: 280 additions & 0 deletions tests/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
import numpy as np

from unittest import TestCase

from datumaro.components.project import Environment
from datumaro.components.extractor import (Extractor, DatasetItem,
Label, Mask, Points, Polygon, PolyLine, Bbox, Caption,
LabelCategories, AnnotationType, Transform
)
from datumaro.util.image import Image
from datumaro.components.dataset_filter import \
XPathDatasetFilter, XPathAnnotationsFilter, DatasetItemEncoder
from datumaro.components.dataset import Dataset, DEFAULT_FORMAT
from datumaro.util.test_utils import TestDir, compare_datasets


class DatasetTest(TestCase):
def test_create_from_extractors(self):
class SrcExtractor1(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', annotations=[
Bbox(1, 2, 3, 4),
Label(4),
]),
DatasetItem(id=1, subset='val', annotations=[
Label(4),
]),
])

class SrcExtractor2(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='val', annotations=[
Label(5),
]),
])

class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', annotations=[
Bbox(1, 2, 3, 4),
Label(4),
]),
DatasetItem(id=1, subset='val', annotations=[
Label(4),
Label(5),
]),
])

dataset = Dataset.from_extractors(SrcExtractor1(), SrcExtractor2())

compare_datasets(self, DstExtractor(), dataset)

def test_can_create_from_iterable(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train', annotations=[
Bbox(1, 2, 3, 4, label=2),
Label(4),
]),
DatasetItem(id=1, subset='val', annotations=[
Label(3),
]),
])

def categories(self):
return { AnnotationType.label: LabelCategories.from_iterable(
['a', 'b', 'c', 'd', 'e'])
}

actual = Dataset.from_iterable([
DatasetItem(id=1, subset='train', annotations=[
Bbox(1, 2, 3, 4, label=2),
Label(4),
]),
DatasetItem(id=1, subset='val', annotations=[
Label(3),
]),
], categories=['a', 'b', 'c', 'd', 'e'])

compare_datasets(self, TestExtractor(), actual)

def test_can_save_and_load(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id=1, annotations=[ Label(2) ]),
], categories=['a', 'b', 'c'])

with TestDir() as test_dir:
source_dataset.save(test_dir)

loaded_dataset = Dataset.load(test_dir)

compare_datasets(self, source_dataset, loaded_dataset)

def test_can_detect(self):
env = Environment()
env.importers.items = {DEFAULT_FORMAT: env.importers[DEFAULT_FORMAT]}
env.extractors.items = {DEFAULT_FORMAT: env.extractors[DEFAULT_FORMAT]}

dataset = Dataset.from_iterable([
DatasetItem(id=1, annotations=[ Label(2) ]),
], categories=['a', 'b', 'c'])

with TestDir() as test_dir:
dataset.save(test_dir)

detected_format = Dataset.detect(test_dir, env=env)

self.assertEqual(DEFAULT_FORMAT, detected_format)

def test_can_detect_and_import(self):
env = Environment()
env.importers.items = {DEFAULT_FORMAT: env.importers[DEFAULT_FORMAT]}
env.extractors.items = {DEFAULT_FORMAT: env.extractors[DEFAULT_FORMAT]}

source_dataset = Dataset.from_iterable([
DatasetItem(id=1, annotations=[ Label(2) ]),
], categories=['a', 'b', 'c'])

with TestDir() as test_dir:
source_dataset.save(test_dir)

imported_dataset = Dataset.import_from(test_dir, env=env)

compare_datasets(self, source_dataset, imported_dataset)

def test_can_export_by_string_format_name(self):
env = Environment()
env.converters.items = {'qq': env.converters[DEFAULT_FORMAT]}

dataset = Dataset.from_iterable([
DatasetItem(id=1, annotations=[ Label(2) ]),
], categories=['a', 'b', 'c'], env=env)

with TestDir() as test_dir:
dataset.export('qq', save_dir=test_dir)

def test_can_transform_by_string_name(self):
expected = Dataset.from_iterable([
DatasetItem(id=1, annotations=[ Label(2) ], attributes={'qq': 1}),
], categories=['a', 'b', 'c'])

class TestTransform(Transform):
def transform_item(self, item):
return self.wrap_item(item, attributes={'qq': 1})

env = Environment()
env.transforms.items = {'qq': TestTransform}

dataset = Dataset.from_iterable([
DatasetItem(id=1, annotations=[ Label(2) ]),
], categories=['a', 'b', 'c'], env=env)

actual = dataset.transform('qq')

self.assertTrue(isinstance(actual, Dataset))
self.assertEqual(env, actual.env)
compare_datasets(self, expected, actual)


class DatasetItemTest(TestCase):
def test_ctor_requires_id(self):
with self.assertRaises(Exception):
# pylint: disable=no-value-for-parameter
DatasetItem()
# pylint: enable=no-value-for-parameter

@staticmethod
def test_ctors_with_image():
for args in [
{ 'id': 0, 'image': None },
{ 'id': 0, 'image': 'path.jpg' },
{ 'id': 0, 'image': np.array([1, 2, 3]) },
{ 'id': 0, 'image': lambda f: np.array([1, 2, 3]) },
{ 'id': 0, 'image': Image(data=np.array([1, 2, 3])) },
]:
DatasetItem(**args)


class DatasetFilterTest(TestCase):
@staticmethod
def test_item_representations():
item = DatasetItem(id=1, subset='subset', path=['a', 'b'],
image=np.ones((5, 4, 3)),
annotations=[
Label(0, attributes={'a1': 1, 'a2': '2'}, id=1, group=2),
Caption('hello', id=1),
Caption('world', group=5),
Label(2, id=3, attributes={ 'x': 1, 'y': '2' }),
Bbox(1, 2, 3, 4, label=4, id=4, attributes={ 'a': 1.0 }),
Bbox(5, 6, 7, 8, id=5, group=5),
Points([1, 2, 2, 0, 1, 1], label=0, id=5),
Mask(id=5, image=np.ones((3, 2))),
Mask(label=3, id=5, image=np.ones((2, 3))),
PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11),
Polygon([1, 2, 3, 4, 5, 6, 7, 8]),
]
)

encoded = DatasetItemEncoder.encode(item)
DatasetItemEncoder.to_string(encoded)

def test_item_filter_can_be_applied(self):
class TestExtractor(Extractor):
def __iter__(self):
for i in range(4):
yield DatasetItem(id=i, subset='train')

extractor = TestExtractor()

filtered = XPathDatasetFilter(extractor, '/item[id > 1]')

self.assertEqual(2, len(filtered))

def test_annotations_filter_can_be_applied(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0),
DatasetItem(id=1, annotations=[
Label(0),
Label(1),
]),
DatasetItem(id=2, annotations=[
Label(0),
Label(2),
]),
])

class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0),
DatasetItem(id=1, annotations=[
Label(0),
]),
DatasetItem(id=2, annotations=[
Label(0),
]),
])

extractor = SrcExtractor()

filtered = XPathAnnotationsFilter(extractor,
'/item/annotation[label_id = 0]')

self.assertListEqual(list(filtered), list(DstExtractor()))

def test_annotations_filter_can_remove_empty_items(self):
class SrcExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=0),
DatasetItem(id=1, annotations=[
Label(0),
Label(1),
]),
DatasetItem(id=2, annotations=[
Label(0),
Label(2),
]),
])

class DstExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=2, annotations=[
Label(2),
]),
])

extractor = SrcExtractor()

filtered = XPathAnnotationsFilter(extractor,
'/item/annotation[label_id = 2]', remove_empty=True)

self.assertListEqual(list(filtered), list(DstExtractor()))
Loading