Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Datumaro] Diff with exact annotation matching #1989

Merged
merged 11 commits into from
Sep 2, 2020
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added password reset functionality (<https://github.com/opencv/cvat/pull/2058>)
- Ability to work with data on the fly (https://github.com/opencv/cvat/pull/2007)
- Annotation in process outline color wheel (<https://github.com/opencv/cvat/pull/2084>)
- [Datumaro] CLI command for dataset equality comparison (<https://github.com/opencv/cvat/pull/1989>)

### Changed
- UI models (like DEXTR) were redesigned to be more interactive (<https://github.com/opencv/cvat/pull/2054>)
Expand Down
103 changes: 85 additions & 18 deletions datumaro/datumaro/cli/contexts/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,26 @@
# SPDX-License-Identifier: MIT

import argparse
from enum import Enum
import json
import logging as log
import os
import os.path as osp
import shutil
from enum import Enum

from datumaro.components.project import Project, Environment, \
PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG
from datumaro.components.comparator import Comparator
from datumaro.components.cli_plugin import CliPlugin
from datumaro.components.dataset_filter import DatasetItemEncoder
from datumaro.components.extractor import AnnotationType
from datumaro.components.cli_plugin import CliPlugin
from datumaro.components.operations import \
compute_image_statistics, compute_ann_statistics
from datumaro.components.operations import (DistanceComparator,
ExactComparator, compute_ann_statistics, compute_image_statistics, mean_std)
from datumaro.components.project import \
PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG
from datumaro.components.project import Environment, Project

from ...util import (CliException, MultilineFormatter, add_subparser,
make_file_name)
from ...util.project import generate_next_file_name, load_project
from .diff import DiffVisualizer
from ...util import add_subparser, CliException, MultilineFormatter, \
make_file_name
from ...util.project import load_project, generate_next_file_name


def build_create_parser(parser_ctor=argparse.ArgumentParser):
Expand Down Expand Up @@ -503,20 +504,20 @@ def merge_command(args):
def build_diff_parser(parser_ctor=argparse.ArgumentParser):
parser = parser_ctor(help="Compare projects",
description="""
Compares two projects.|n
Compares two projects, match annotations by distance.|n
|n
Examples:|n
- Compare two projects, consider bboxes matching if their IoU > 0.7,|n
- Compare two projects, match boxes if IoU > 0.7,|n
|s|s|s|sprint results to Tensorboard:
|s|sdiff path/to/other/project -o diff/ -f tensorboard --iou-thresh 0.7
|s|sdiff path/to/other/project -o diff/ -v tensorboard --iou-thresh 0.7
""",
formatter_class=MultilineFormatter)

parser.add_argument('other_project_dir',
help="Directory of the second project to be compared")
parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None,
help="Directory to save comparison results (default: do not save)")
parser.add_argument('-f', '--format',
parser.add_argument('-v', '--visualizer',
default=DiffVisualizer.DEFAULT_FORMAT,
choices=[f.name for f in DiffVisualizer.Format],
help="Output format (default: %(default)s)")
Expand All @@ -536,9 +537,7 @@ def diff_command(args):
first_project = load_project(args.project_dir)
second_project = load_project(args.other_project_dir)

comparator = Comparator(
iou_threshold=args.iou_thresh,
conf_threshold=args.conf_thresh)
comparator = DistanceComparator(iou_threshold=args.iou_thresh)

dst_dir = args.dst_dir
if dst_dir:
Expand All @@ -556,7 +555,7 @@ def diff_command(args):
dst_dir_existed = osp.exists(dst_dir)
try:
visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator,
output_format=args.format)
output_format=args.visualizer)
visualizer.save_dataset_diff(
first_project.make_dataset(),
second_project.make_dataset())
Expand All @@ -567,6 +566,73 @@ def diff_command(args):

return 0

def build_ediff_parser(parser_ctor=argparse.ArgumentParser):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ediff -> compare?

parser = parser_ctor(help="Compare projects for equality",
description="""
Compares two projects for equality.|n
|n
Examples:|n
- Compare two projects, exclude annotation group |n
|s|s|sand the 'is_crowd' attribute from comparison:|n
|s|sediff other/project/ -if group -ia is_crowd
""",
formatter_class=MultilineFormatter)

parser.add_argument('other_project_dir',
help="Directory of the second project to be compared")
parser.add_argument('-iia', '--ignore-item-attr', action='append',
help="Ignore item attribute (repeatable)")
parser.add_argument('-ia', '--ignore-attr', action='append',
help="Ignore annotation attribute (repeatable)")
parser.add_argument('-if', '--ignore-field',
action='append', default=['id', 'group'],
help="Ignore annotation field (repeatable, default: %(default)s)")
parser.add_argument('--match-images', action='store_true',
help='Match dataset items by images instead of ids')
parser.add_argument('--all', action='store_true',
help="Include matches in the output")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the first project to be compared (default: current dir)")
parser.set_defaults(command=ediff_command)

return parser

def ediff_command(args):
first_project = load_project(args.project_dir)
second_project = load_project(args.other_project_dir)

comparator = ExactComparator(
match_images=args.match_images,
ignored_fields=args.ignore_field,
ignored_attrs=args.ignore_attr,
ignored_item_attrs=args.ignore_item_attr)
matches, mismatches, a_extra, b_extra, errors = \
comparator.compare_datasets(
first_project.make_dataset(), second_project.make_dataset())
output = {
"mismatches": mismatches,
"a_extra_items": sorted(a_extra),
"b_extra_items": sorted(b_extra),
"errors": errors,
}
if args.all:
output["matches"] = matches

output_file = generate_next_file_name('diff', ext='.json')
with open(output_file, 'w') as f:
json.dump(output, f, indent=4, sort_keys=True)

print("Found:")
print("The first project has %s unmatched items" % len(a_extra))
print("The second project has %s unmatched items" % len(b_extra))
print("%s item conflicts" % len(errors))
print("%s matching annotations" % len(matches))
print("%s mismatching annotations" % len(mismatches))

log.info("Output has been saved to '%s'" % output_file)

return 0

def build_transform_parser(parser_ctor=argparse.ArgumentParser):
builtins = sorted(Environment().transforms.items)

Expand Down Expand Up @@ -753,6 +819,7 @@ def build_parser(parser_ctor=argparse.ArgumentParser):
add_subparser(subparsers, 'extract', build_extract_parser)
add_subparser(subparsers, 'merge', build_merge_parser)
add_subparser(subparsers, 'diff', build_diff_parser)
add_subparser(subparsers, 'ediff', build_ediff_parser)
add_subparser(subparsers, 'transform', build_transform_parser)
add_subparser(subparsers, 'info', build_info_parser)
add_subparser(subparsers, 'stats', build_stats_parser)
Expand Down
2 changes: 1 addition & 1 deletion datumaro/datumaro/cli/contexts/project/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def save_item_bbox_diff(self, item_a, item_b, diff):
_, mispred, a_unmatched, b_unmatched = diff

if 0 < len(a_unmatched) + len(b_unmatched) + len(mispred):
img_a = item_a.image.copy()
img_a = item_a.image.data.copy()
img_b = img_a.copy()
for a_bbox, b_bbox in mispred:
self.draw_bbox(img_a, a_bbox, (0, 255, 0))
Expand Down
113 changes: 0 additions & 113 deletions datumaro/datumaro/components/comparator.py

This file was deleted.

6 changes: 5 additions & 1 deletion datumaro/datumaro/components/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def wrap(item, **kwargs):
@attrs
class Categories:
attributes = attrib(factory=set, validator=default_if_none(set),
kw_only=True)
kw_only=True, eq=False)

@attrs
class LabelCategories(Categories):
Expand Down Expand Up @@ -137,6 +137,8 @@ def inverse_colormap(self):
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, __class__):
return False
for label_id, my_color in self.colormap.items():
other_color = other.colormap.get(label_id)
if not np.array_equal(my_color, other_color):
Expand Down Expand Up @@ -179,6 +181,8 @@ def paint(self, colormap):
def __eq__(self, other):
if not super().__eq__(other):
return False
if not isinstance(other, __class__):
return False
return \
(self.label == other.label) and \
(self.z_order == other.z_order) and \
Expand Down
Loading