Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Datumaro dependency to 0.2.0 #3813

Merged
merged 6 commits into from
Oct 28, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 55 additions & 44 deletions cvat/apps/dataset_manager/bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,24 @@
#
# SPDX-License-Identifier: MIT

import sys
import os.path as osp
import sys
from collections import namedtuple
from typing import Any, Callable, DefaultDict, Dict, List, Literal, Mapping, NamedTuple, OrderedDict, Tuple, Union
from pathlib import Path
from typing import (Any, Callable, DefaultDict, Dict, List, Literal, Mapping,
NamedTuple, OrderedDict, Tuple, Union)

import datumaro.components as datumaro
from datumaro.util import cast
from datumaro.util.image import ByteImage, Image
from django.utils import timezone

import datumaro.components.extractor as datumaro
from cvat.apps.engine.frame_provider import FrameProvider
from cvat.apps.engine.models import AttributeType, ShapeType, Project, Task, Label, DimensionType, Image as Img
from datumaro.util import cast
from datumaro.util.image import ByteImage, Image
from cvat.apps.engine.models import AttributeType, DimensionType
from cvat.apps.engine.models import Image as Img
from cvat.apps.engine.models import Label, Project, ShapeType, Task

from .annotation import AnnotationManager, TrackManager, AnnotationIR
from .annotation import AnnotationIR, AnnotationManager, TrackManager


class InstanceLabelData:
Expand Down Expand Up @@ -192,7 +195,7 @@ def meta_for_task(db_task, host, label_mapping=None):
("bugtracker", db_task.bug_tracker),
("created", str(timezone.localtime(db_task.created_date))),
("updated", str(timezone.localtime(db_task.updated_date))),
("subset", db_task.subset or datumaro.DEFAULT_SUBSET_NAME),
("subset", db_task.subset or datumaro.extractor.DEFAULT_SUBSET_NAME),
("start_frame", str(db_task.data.start_frame)),
("stop_frame", str(db_task.data.stop_frame)),
("frame_filter", db_task.data.frame_filter),
Expand Down Expand Up @@ -800,17 +803,18 @@ def categories(self) -> dict:

@staticmethod
def _load_categories(labels: list):
categories: Dict[datumaro.AnnotationType, datumaro.Categories] = {}
categories: Dict[datumaro.annotation.AnnotationType,
datumaro.annotation.Categories] = {}

label_categories = datumaro.LabelCategories(attributes=['occluded'])
label_categories = datumaro.annotation.LabelCategories(attributes=['occluded'])

for _, label in labels:
label_categories.add(label['name'])
for _, attr in label['attributes']:
label_categories.attributes.add(attr['name'])


categories[datumaro.AnnotationType.label] = label_categories
categories[datumaro.annotation.AnnotationType.label] = label_categories

return categories

Expand All @@ -824,7 +828,7 @@ def _load_user_info(meta: dict):

def _read_cvat_anno(self, cvat_frame_anno: Union[ProjectData.Frame, TaskData.Frame], labels: list):
categories = self.categories()
label_cat = categories[datumaro.AnnotationType.label]
label_cat = categories[datumaro.annotation.AnnotationType.label]
def map_label(name): return label_cat.find(name)[0]
label_attrs = {
label['name']: label['attributes']
Expand All @@ -834,7 +838,7 @@ def map_label(name): return label_cat.find(name)[0]
return convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label)


class CvatTaskDataExtractor(datumaro.SourceExtractor, CVATDataExtractorMixin):
class CvatTaskDataExtractor(datumaro.extractor.SourceExtractor, CVATDataExtractorMixin):
def __init__(self, task_data, include_images=False, format_type=None, dimension=DimensionType.DIM_2D):
super().__init__()
self._categories = self._load_categories(task_data.meta['task']['labels'])
Expand Down Expand Up @@ -893,7 +897,8 @@ def _make_image(i, **kwargs):
dm_anno = self._read_cvat_anno(frame_data, task_data.meta['task']['labels'])

if dimension == DimensionType.DIM_2D:
dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0],
dm_item = datumaro.extractor.DatasetItem(
id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, image=dm_image,
attributes={'frame': frame_data.frame
})
Expand All @@ -908,17 +913,19 @@ def _make_image(i, **kwargs):
attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"]})
attributes["track_id"] = -1

dm_item = datumaro.DatasetItem(id=osp.splitext(osp.split(frame_data.name)[-1])[0],
annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1],
attributes=attributes)
dm_item = datumaro.extractor.DatasetItem(
id=osp.splitext(osp.split(frame_data.name)[-1])[0],
annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1],
attributes=attributes
)

dm_items.append(dm_item)

self._items = dm_items

def _read_cvat_anno(self, cvat_frame_anno: TaskData.Frame, labels: list):
categories = self.categories()
label_cat = categories[datumaro.AnnotationType.label]
label_cat = categories[datumaro.annotation.AnnotationType.label]
def map_label(name): return label_cat.find(name)[0]
label_attrs = {
label['name']: label['attributes']
Expand All @@ -927,15 +934,15 @@ def map_label(name): return label_cat.find(name)[0]

return convert_cvat_anno_to_dm(cvat_frame_anno, label_attrs, map_label, self._format_type, self._dimension)

class CVATProjectDataExtractor(datumaro.Extractor, CVATDataExtractorMixin):
class CVATProjectDataExtractor(datumaro.extractor.Extractor, CVATDataExtractorMixin):
def __init__(self, project_data: ProjectData, include_images: bool = False, format_type: str = None, dimension: DimensionType = DimensionType.DIM_2D):
super().__init__()
self._categories = self._load_categories(project_data.meta['project']['labels'])
self._user = self._load_user_info(project_data.meta['project']) if dimension == DimensionType.DIM_3D else {}
self._dimension = dimension
self._format_type = format_type

dm_items: List[datumaro.DatasetItem] = []
dm_items: List[datumaro.extractor.DatasetItem] = []

ext_per_task: Dict[int, str] = {}
image_maker_per_task: Dict[int, Callable] = {}
Expand Down Expand Up @@ -996,7 +1003,8 @@ def _make_image(i, **kwargs):
dm_image = Image(**image_args)
dm_anno = self._read_cvat_anno(frame_data, project_data.meta['project']['labels'])
if self._dimension == DimensionType.DIM_2D:
dm_item = datumaro.DatasetItem(id=osp.splitext(frame_data.name)[0],
dm_item = datumaro.extractor.DatasetItem(
id=osp.splitext(frame_data.name)[0],
annotations=dm_anno, image=dm_image,
subset=frame_data.subset,
attributes={'frame': frame_data.frame}
Expand All @@ -1012,9 +1020,11 @@ def _make_image(i, **kwargs):
attributes["labels"].append({"label_id": idx, "name": label["name"], "color": label["color"]})
attributes["track_id"] = -1

dm_item = datumaro.DatasetItem(id=osp.splitext(osp.split(frame_data.name)[-1])[0],
annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1],
attributes=attributes, subset=frame_data.subset)
dm_item = datumaro.extractor.DatasetItem(
id=osp.splitext(osp.split(frame_data.name)[-1])[0],
annotations=dm_anno, point_cloud=dm_image[0], related_images=dm_image[1],
attributes=attributes, subset=frame_data.subset
)
dm_items.append(dm_item)

self._items = dm_items
Expand Down Expand Up @@ -1063,13 +1073,13 @@ def get_defaulted_subset(subset: str, subsets: List[str]) -> str:
if subset:
return subset
else:
if datumaro.DEFAULT_SUBSET_NAME not in subsets:
return datumaro.DEFAULT_SUBSET_NAME
if datumaro.extractor.DEFAULT_SUBSET_NAME not in subsets:
return datumaro.extractor.DEFAULT_SUBSET_NAME
else:
i = 1
while i < sys.maxsize:
if f'{datumaro.DEFAULT_SUBSET_NAME}_{i}' not in subsets:
return f'{datumaro.DEFAULT_SUBSET_NAME}_{i}'
if f'{datumaro.extractor.DEFAULT_SUBSET_NAME}_{i}' not in subsets:
return f'{datumaro.extractor.DEFAULT_SUBSET_NAME}_{i}'
i += 1
raise Exception('Cannot find default name for subset')

Expand Down Expand Up @@ -1100,7 +1110,7 @@ def convert_attrs(label, cvat_attrs):
anno_label = map_label(tag_obj.label)
anno_attr = convert_attrs(tag_obj.label, tag_obj.attributes)

anno = datumaro.Label(label=anno_label,
anno = datumaro.annotation.Label(label=anno_label,
attributes=anno_attr, group=anno_group)
item_anno.append(anno)

Expand All @@ -1121,20 +1131,20 @@ def convert_attrs(label, cvat_attrs):

anno_points = shape_obj.points
if shape_obj.type == ShapeType.POINTS:
anno = datumaro.Points(anno_points,
anno = datumaro.annotation.Points(anno_points,
label=anno_label, attributes=anno_attr, group=anno_group,
z_order=shape_obj.z_order)
elif shape_obj.type == ShapeType.POLYLINE:
anno = datumaro.PolyLine(anno_points,
anno = datumaro.annotation.PolyLine(anno_points,
label=anno_label, attributes=anno_attr, group=anno_group,
z_order=shape_obj.z_order)
elif shape_obj.type == ShapeType.POLYGON:
anno = datumaro.Polygon(anno_points,
anno = datumaro.annotation.Polygon(anno_points,
label=anno_label, attributes=anno_attr, group=anno_group,
z_order=shape_obj.z_order)
elif shape_obj.type == ShapeType.RECTANGLE:
x0, y0, x1, y1 = anno_points
anno = datumaro.Bbox(x0, y0, x1 - x0, y1 - y0,
anno = datumaro.annotation.Bbox(x0, y0, x1 - x0, y1 - y0,
label=anno_label, attributes=anno_attr, group=anno_group,
z_order=shape_obj.z_order)
elif shape_obj.type == ShapeType.CUBOID:
Expand All @@ -1144,9 +1154,10 @@ def convert_attrs(label, cvat_attrs):
else:
anno_id = index
position, rotation, scale = anno_points[0:3], anno_points[3:6], anno_points[6:9]
anno = datumaro.Cuboid3d(id=anno_id, position=position, rotation=rotation, scale=scale,
label=anno_label, attributes=anno_attr, group=anno_group
)
anno = datumaro.annotation.Cuboid3d(
id=anno_id, position=position, rotation=rotation, scale=scale,
label=anno_label, attributes=anno_attr, group=anno_group
)
else:
continue
else:
Expand Down Expand Up @@ -1192,17 +1203,17 @@ def find_dataset_root(dm_dataset, task_data):

def import_dm_annotations(dm_dataset, task_data):
shapes = {
datumaro.AnnotationType.bbox: ShapeType.RECTANGLE,
datumaro.AnnotationType.polygon: ShapeType.POLYGON,
datumaro.AnnotationType.polyline: ShapeType.POLYLINE,
datumaro.AnnotationType.points: ShapeType.POINTS,
datumaro.AnnotationType.cuboid_3d: ShapeType.CUBOID
datumaro.annotation.AnnotationType.bbox: ShapeType.RECTANGLE,
datumaro.annotation.AnnotationType.polygon: ShapeType.POLYGON,
datumaro.annotation.AnnotationType.polyline: ShapeType.POLYLINE,
datumaro.annotation.AnnotationType.points: ShapeType.POINTS,
datumaro.annotation.AnnotationType.cuboid_3d: ShapeType.CUBOID
}

if len(dm_dataset) == 0:
return

label_cat = dm_dataset.categories()[datumaro.AnnotationType.label]
label_cat = dm_dataset.categories()[datumaro.annotation.AnnotationType.label]

root_hint = find_dataset_root(dm_dataset, task_data)

Expand Down Expand Up @@ -1231,7 +1242,7 @@ def import_dm_annotations(dm_dataset, task_data):
if hasattr(ann, 'label') and ann.label is None:
raise CvatImportError("annotation has no label")
if ann.type in shapes:
if ann.type == datumaro.AnnotationType.cuboid_3d:
if ann.type == datumaro.annotation.AnnotationType.cuboid_3d:
try:
ann.points = [*ann.position,*ann.rotation,*ann.scale,0,0,0,0,0,0,0]
except Exception as e:
Expand All @@ -1249,7 +1260,7 @@ def import_dm_annotations(dm_dataset, task_data):
attributes=[task_data.Attribute(name=n, value=str(v))
for n, v in ann.attributes.items()],
))
elif ann.type == datumaro.AnnotationType.label:
elif ann.type == datumaro.annotation.AnnotationType.label:
task_data.add_tag(task_data.Tag(
frame=frame_number,
label=label_cat.items[ann.label].name,
Expand Down
33 changes: 33 additions & 0 deletions cvat/apps/dataset_manager/formats/datumaro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (C) 2019 Intel Corporation
#
# SPDX-License-Identifier: MIT

from tempfile import TemporaryDirectory

from datumaro.components.dataset import Dataset
from pyunpack import Archive

from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor,
import_dm_annotations)
from cvat.apps.dataset_manager.util import make_zip_archive

from .registry import dm_env, exporter, importer


@exporter(name="Datumaro", ext="ZIP", version="1.0")
def _export(dst_file, instance_data, save_images=False):
dataset = Dataset.from_extractors(GetCVATDataExtractor(
instance_data=instance_data, include_images=save_images), env=dm_env)
with TemporaryDirectory() as tmp_dir:
dataset.export(tmp_dir, 'datumaro', save_images=save_images)
Copy link
Contributor

@zhiltsov-max zhiltsov-max Oct 20, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please check if the annotations do not include absolute image paths.

What about the format tests?

Also, think about loading tracks, if all the annotations have a track_id attribute.

Copy link
Contributor Author

@sizov-kirill sizov-kirill Oct 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Committed solution for problem with absolute image paths.

The format is tested along with all other formats in files: test_formats.py, test_rest_api_formats.py.

About tracks, I have a few thoughts:

  1. Not to support tracks with Datumaro format until Datumaro as a tool starts to support them.
  2. Or we can check for track_id attribute for all formats in the import_dm_annotations function (in other case we'll just duplicate similar code as for MOT format).

How do you think which solution is better?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The second solution looks better to me, but implementing it straight will cause unexpected side-effects. I think, we can try to add a common import option to import tracks, if possible. Not all formats will support this, of course, but those, that do, will utilize it. Not in this PR, though. MOT code is already duplicated with some adjustments in MOTS, btw, so I think the generic variant might be a bit complicated.


make_zip_archive(tmp_dir, dst_file)

@importer(name="Datumaro", ext="ZIP", version="1.0")
def _import(src_file, instance_data):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)

import_dm_annotations(dataset, instance_data)
Loading