Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow undeclared label attributes on CVAT format #192

Merged
merged 5 commits into from
Mar 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-

### Changed
-
- Added an option to allow undeclared annotation attributes in CVAT format export (<https://github.com/openvinotoolkit/datumaro/pull/192>)

### Deprecated
-
Expand All @@ -20,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-

### Fixed
-
- Added support for label attributes in Datumaro format (<https://github.com/openvinotoolkit/datumaro/pull/192>)

### Security
-
Expand Down
8 changes: 7 additions & 1 deletion datumaro/components/cli_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# SPDX-License-Identifier: MIT

import argparse
import logging as log

from datumaro.cli.util import MultilineFormatter
from datumaro.util import to_snake_case
Expand Down Expand Up @@ -36,7 +37,12 @@ def parse_cmdline(cls, args=None):
args = args[1:]
parser = cls.build_cmdline_parser()
args = parser.parse_args(args)
return vars(args)
args = vars(args)

log.debug("Parsed parameters: \n\t%s",
'\n\t'.join('%s: %s' % (k, v) for k, v in args.items()))

return args

def remove_plugin_type(s):
for t in {'transform', 'extractor', 'converter', 'launcher', 'importer'}:
Expand Down
60 changes: 49 additions & 11 deletions datumaro/plugins/cvat_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import os.path as osp
from collections import OrderedDict
from itertools import chain
from xml.sax.saxutils import XMLGenerator

from datumaro.components.converter import Converter
Expand Down Expand Up @@ -182,9 +183,9 @@ def _write_item(self, item, index):
for ann in item.annotations:
if ann.type in {AnnotationType.points, AnnotationType.polyline,
AnnotationType.polygon, AnnotationType.bbox}:
self._write_shape(ann)
self._write_shape(ann, item)
elif ann.type == AnnotationType.label:
self._write_tag(ann)
self._write_tag(ann, item)
else:
continue

Expand Down Expand Up @@ -215,7 +216,7 @@ def _write_meta(self):
("input_type", "text"),
("default_value", ""),
("values", ""),
])) for attr in label.attributes
])) for attr in self._get_label_attrs(label)
])
])) for label in label_cat.items
]),
Expand All @@ -226,15 +227,27 @@ def _write_meta(self):
def _get_label(self, label_id):
if label_id is None:
return ""
label_cat = self._extractor.categories()[AnnotationType.label]
label_cat = self._extractor.categories().get(
AnnotationType.label, LabelCategories())
return label_cat.items[label_id]

def _write_shape(self, shape):
def _get_label_attrs(self, label):
label_cat = self._extractor.categories().get(
AnnotationType.label, LabelCategories())
if isinstance(label, int):
label = label_cat[label]
return set(chain(label.attributes, label_cat.attributes)) - \
self._context._builtin_attrs

def _write_shape(self, shape, item):
if shape.label is None:
log.warning("Item %s: skipping a %s with no label",
item.id, shape.type.name)
return

label_name = self._get_label(shape.label).name
shape_data = OrderedDict([
("label", self._get_label(shape.label).name),
("label", label_name),
("occluded", str(int(shape.attributes.get('occluded', False)))),
])

Expand Down Expand Up @@ -271,13 +284,21 @@ def _write_shape(self, shape):
raise NotImplementedError("unknown shape type")

for attr_name, attr_value in shape.attributes.items():
if attr_name in self._context._builtin_attrs:
continue
if isinstance(attr_value, bool):
attr_value = 'true' if attr_value else 'false'
if attr_name in self._get_label(shape.label).attributes:
if self._context._allow_undeclared_attrs or \
attr_name in self._get_label_attrs(shape.label):
self._writer.add_attribute(OrderedDict([
("name", str(attr_name)),
("value", str(attr_value)),
]))
else:
log.warning("Item %s: skipping undeclared "
"attribute '%s' for label '%s' "
"(allow with --allow-undeclared-attrs option)",
item.id, attr_name, label_name)

if shape.type == AnnotationType.bbox:
self._writer.close_box()
Expand All @@ -290,25 +311,36 @@ def _write_shape(self, shape):
else:
raise NotImplementedError("unknown shape type")

def _write_tag(self, label):
def _write_tag(self, label, item):
if label.label is None:
log.warning("Item %s: skipping a %s with no label",
item.id, label.type.name)
return

label_name = self._get_label(label.label).name
tag_data = OrderedDict([
('label', self._get_label(label.label).name),
('label', label_name),
])
if label.group:
tag_data['group_id'] = str(label.group)
self._writer.open_tag(tag_data)

for attr_name, attr_value in label.attributes.items():
if attr_name in self._context._builtin_attrs:
continue
if isinstance(attr_value, bool):
attr_value = 'true' if attr_value else 'false'
if attr_name in self._get_label(label.label).attributes:
if self._context._allow_undeclared_attrs or \
attr_name in self._get_label_attrs(label.label):
self._writer.add_attribute(OrderedDict([
("name", str(attr_name)),
("value", str(attr_value)),
]))
else:
log.warning("Item %s: skipping undeclared "
"attribute '%s' for label '%s' "
"(allow with --allow-undeclared-attrs option)",
item.id, attr_name, label_name)

self._writer.close_tag()

Expand All @@ -320,12 +352,18 @@ def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument('--reindex', action='store_true',
help="Assign new indices to frames (default: %(default)s)")
parser.add_argument('--allow-undeclared-attrs', action='store_true',
help="Write annotation attributes even if they are not present in "
"the input dataset metainfo (default: %(default)s)")
return parser

def __init__(self, extractor, save_dir, reindex=False, **kwargs):
def __init__(self, extractor, save_dir, reindex=False,
allow_undeclared_attrs=False, **kwargs):
super().__init__(extractor, save_dir, **kwargs)

self._reindex = reindex
self._builtin_attrs = CvatPath.BUILTIN_ATTRS
self._allow_undeclared_attrs = allow_undeclared_attrs

def apply(self):
self._images_dir = osp.join(self._save_dir, CvatPath.IMAGES_DIR)
Expand Down
2 changes: 2 additions & 0 deletions datumaro/plugins/cvat_format/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ class CvatPath:
IMAGES_DIR = 'images'

IMAGE_EXT = '.jpg'

BUILTIN_ATTRS = {'occluded', 'outside', 'keyframe', 'track_id'}
5 changes: 5 additions & 0 deletions datumaro/plugins/datumaro_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,19 @@ def _convert_caption_object(self, obj):
})
return converted

def _convert_attribute_categories(self, attributes):
return sorted(attributes)

def _convert_label_categories(self, obj):
converted = {
'labels': [],
'attributes': self._convert_attribute_categories(obj.attributes),
}
for label in obj.items:
converted['labels'].append({
'name': cast(label.name, str),
'parent': cast(label.parent, str),
'attributes': self._convert_attribute_categories(label.attributes),
})
return converted

Expand Down
6 changes: 4 additions & 2 deletions datumaro/plugins/datumaro_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ def _load_categories(parsed):

parsed_label_cat = parsed['categories'].get(AnnotationType.label.name)
if parsed_label_cat:
label_categories = LabelCategories()
label_categories = LabelCategories(
attributes=parsed_label_cat.get('attributes', []))
for item in parsed_label_cat['labels']:
label_categories.add(item['name'], parent=item['parent'])
label_categories.add(item['name'], parent=item['parent'],
attributes=item.get('attributes', []))

categories[AnnotationType.label] = label_categories

Expand Down
52 changes: 37 additions & 15 deletions tests/test_cvat_format.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from functools import partial
import numpy as np
import os
import os.path as osp

import numpy as np
from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (DatasetItem,
Expand Down Expand Up @@ -148,18 +148,17 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,
target_dataset=target_dataset, importer_args=importer_args, **kwargs)

def test_can_save_and_load(self):
label_categories = LabelCategories()
src_label_cat = LabelCategories(attributes={'occluded', 'common'})
for i in range(10):
label_categories.add(str(i))
label_categories.items[2].attributes.update(['a1', 'a2', 'empty'])
label_categories.attributes.update(['occluded'])
src_label_cat.add(str(i))
src_label_cat.items[2].attributes.update(['a1', 'a2', 'empty'])

source_dataset = Dataset.from_iterable([
DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=1, group=4,
attributes={ 'occluded': True}),
attributes={ 'occluded': True, 'common': 't' }),
Points([1, 1, 3, 2, 2, 3],
label=2,
attributes={ 'a1': 'x', 'a2': 42, 'empty': '',
Expand Down Expand Up @@ -188,16 +187,19 @@ def test_can_save_and_load(self):

DatasetItem(id=3, subset='s3', image=Image(
path='3.jpg', size=(2, 4))),
], categories={
AnnotationType.label: label_categories,
})
], categories={ AnnotationType.label: src_label_cat })

target_label_cat = LabelCategories(
attributes={'occluded'}) # unable to represent a common attribute
for i in range(10):
target_label_cat.add(str(i), attributes={'common'})
target_label_cat.items[2].attributes.update(['a1', 'a2', 'empty', 'common'])
target_dataset = Dataset.from_iterable([
DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)),
annotations=[
Polygon([0, 0, 4, 0, 4, 4],
label=1, group=4,
attributes={ 'occluded': True }),
attributes={ 'occluded': True, 'common': 't' }),
Points([1, 1, 3, 2, 2, 3],
label=2,
attributes={ 'occluded': False, 'empty': '',
Expand Down Expand Up @@ -228,15 +230,36 @@ def test_can_save_and_load(self):
DatasetItem(id=3, subset='s3', image=Image(
path='3.jpg', size=(2, 4)),
attributes={'frame': 0}),
], categories={
AnnotationType.label: label_categories,
})
], categories={ AnnotationType.label: target_label_cat })

with TestDir() as test_dir:
self._test_save_and_load(source_dataset,
partial(CvatConverter.convert, save_images=True), test_dir,
target_dataset=target_dataset)

def test_can_allow_undeclared_attrs(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id=0, annotations=[
Label(0, attributes={ 'x': 4, 'y': 2 }),
Bbox(1, 2, 3, 4, label=0, attributes={ 'x': 1, 'y': 1 }),
]),
], categories=[ ('a', '', {'x'}) ])

target_label_cat = LabelCategories(attributes={'occluded'})
target_label_cat.add('a', attributes={'x'})
target_dataset = Dataset.from_iterable([
DatasetItem(id=0, annotations=[
Label(0, attributes={ 'x': 4, 'y': 2 }),
Bbox(1, 2, 3, 4, label=0,
attributes={ 'x': 1, 'y': 1, 'occluded': False }),
], attributes={'frame': 0}),
], categories={ AnnotationType.label: target_label_cat })

with TestDir() as test_dir:
self._test_save_and_load(source_dataset,
partial(CvatConverter.convert, allow_undeclared_attrs=True),
test_dir, target_dataset=target_dataset)

def test_relative_paths(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id='1', image=np.ones((4, 2, 3))),
Expand All @@ -259,11 +282,10 @@ def test_relative_paths(self):
target_dataset=target_dataset, require_images=True)

def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
label_categories = LabelCategories()
label_categories = LabelCategories(attributes={'occluded'})
for i in range(10):
label_categories.add(str(i))
label_categories.items[2].attributes.update(['a1', 'a2', 'empty'])
label_categories.attributes.update(['occluded'])

source_dataset = Dataset.from_iterable([
DatasetItem(id='кириллица с пробелом',
Expand Down
16 changes: 10 additions & 6 deletions tests/test_datumaro_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os.path as osp

import numpy as np

from unittest import TestCase
from datumaro.components.project import Dataset
from datumaro.components.extractor import (DatasetItem,
Expand All @@ -29,9 +28,9 @@ def _test_save_and_load(self, source_dataset, converter, test_dir,

@property
def test_dataset(self):
label_categories = LabelCategories()
label_categories = LabelCategories(attributes={'a', 'b', 'score'})
for i in range(5):
label_categories.add('cat' + str(i))
label_categories.add('cat' + str(i), attributes={'x', 'y'})

mask_categories = MaskCategories(
generate_colormap(len(label_categories.items)))
Expand All @@ -52,9 +51,14 @@ def test_dataset(self):
Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
'score': 1.0,
}),
Bbox(5, 6, 7, 8, id=5, group=5),
Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
Bbox(5, 6, 7, 8, id=5, group=5, attributes={
'a': 1.5,
'b': 'text',
}),
Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4,
attributes={ 'x': 1, 'y': '2', }),
Mask(label=3, id=5, z_order=2, image=np.ones((2, 3)),
attributes={ 'x': 1, 'y': '2', }),
]),
DatasetItem(id=21, subset='train',
annotations=[
Expand Down