From 2d9c954ac9dd09a81fd70f4976aa788a61e8d0c5 Mon Sep 17 00:00:00 2001 From: "Yi, Jihyeon" Date: Mon, 29 Mar 2021 19:02:57 +0900 Subject: [PATCH] validator threshold adjustment + style correction --- datumaro/components/validator.py | 116 ++++++++------ tests/test_validator.py | 250 +++++++++++++++++++++---------- 2 files changed, 242 insertions(+), 124 deletions(-) diff --git a/datumaro/components/validator.py b/datumaro/components/validator.py index 84fba8d35c..7fd384c59c 100644 --- a/datumaro/components/validator.py +++ b/datumaro/components/validator.py @@ -27,6 +27,8 @@ class _Validator: + DEFAULT_FEW_SAMPLES = 1 + DEFAULT_IMBALANCE_RATIO = 50 """ A base class for task-specific validators. @@ -57,7 +59,10 @@ def __init__(self, task_type=None, ann_type=None, far_from_mean_thr=None): self.task_type = task_type self.ann_type = ann_type + self.far_from_mean_thr = far_from_mean_thr + self.imbalance_ratio_thr = self.DEFAULT_IMBALANCE_RATIO + self.few_samples_thr = self.DEFAULT_FEW_SAMPLES def compute_statistics(self, dataset): """ @@ -300,7 +305,7 @@ def _update_props_far_from_mean(item, ann): defined_label_dist[category.name] = 0 for item in dataset: - ann_count = [ann.type == self.ann_type \ + ann_count = [ann.type == self.ann_type for ann in item.annotations].count(True) if self.task_type == TaskType.classification: @@ -371,7 +376,7 @@ def _update_props_far_from_mean(item, ann): attr_dets = defined_attr_stats[attr] if self.task_type == TaskType.detection and \ - ann.type == self.ann_type: + ann.type == self.ann_type: bbox_attr_label = bbox_dist_by_attr.setdefault( label_name, {}) bbox_attr_stats = bbox_attr_label.setdefault( @@ -441,8 +446,8 @@ def _check_undefined_attribute(self, label_name, attr_name, attr_dets): def _check_label_defined_but_not_found(self, stats): validation_reports = [] count_by_defined_labels = stats['label_distribution']['defined_labels'] - labels_not_found = [label_name \ - for label_name, count in count_by_defined_labels.items() \ + labels_not_found = [label_name + for label_name, count in count_by_defined_labels.items() if count == 0] for label_name in labels_not_found: @@ -453,8 +458,8 @@ def _check_label_defined_but_not_found(self, stats): def _check_attribute_defined_but_not_found(self, label_name, attr_stats): validation_reports = [] - attrs_not_found = [attr_name \ - for attr_name, attr_dets in attr_stats.items() \ + attrs_not_found = [attr_name + for attr_name, attr_dets in attr_stats.items() if len(attr_dets['distribution']) == 0] for attr_name in attrs_not_found: @@ -467,8 +472,8 @@ def _check_attribute_defined_but_not_found(self, label_name, attr_stats): def _check_only_one_label(self, stats): validation_reports = [] count_by_defined_labels = stats['label_distribution']['defined_labels'] - labels_found = [label_name \ - for label_name, count in count_by_defined_labels.items() \ + labels_found = [label_name + for label_name, count in count_by_defined_labels.items() if count > 0] if len(labels_found) == 1: @@ -488,12 +493,14 @@ def _check_only_one_attribute_value(self, label_name, attr_name, attr_dets): return validation_reports - def _check_few_samples_in_label(self, stats, thr): + def _check_few_samples_in_label(self, stats): validation_reports = [] + thr = self.few_samples_thr + defined_label_dist = stats['label_distribution']['defined_labels'] - labels_with_few_samples = [(label_name, count) \ - for label_name, count in defined_label_dist.items() \ - if 0 < count < thr] + labels_with_few_samples = [(label_name, count) + for label_name, count in defined_label_dist.items() + if 0 < count <= thr] for label_name, count in labels_with_few_samples: validation_reports += self._generate_validation_report( @@ -502,11 +509,13 @@ def _check_few_samples_in_label(self, stats, thr): return validation_reports def _check_few_samples_in_attribute(self, label_name, - attr_name, attr_dets, thr): + attr_name, attr_dets): validation_reports = [] - attr_values_with_few_samples = [(attr_value, count) \ - for attr_value, count in attr_dets['distribution'].items() \ - if count < thr] + thr = self.few_samples_thr + + attr_values_with_few_samples = [(attr_value, count) + for attr_value, count in attr_dets['distribution'].items() + if count <= thr] for attr_value, count in attr_values_with_few_samples: details = (label_name, attr_name, attr_value, count) @@ -515,11 +524,12 @@ def _check_few_samples_in_attribute(self, label_name, return validation_reports - def _check_imbalanced_labels(self, stats, thr): + def _check_imbalanced_labels(self, stats): validation_reports = [] + thr = self.imbalance_ratio_thr defined_label_dist = stats['label_distribution']['defined_labels'] - count_by_defined_labels = [count \ + count_by_defined_labels = [count for label, count in defined_label_dist.items()] if len(count_by_defined_labels) == 0: @@ -528,15 +538,15 @@ def _check_imbalanced_labels(self, stats, thr): count_max = np.max(count_by_defined_labels) count_min = np.min(count_by_defined_labels) balance = count_max / count_min if count_min > 0 else float('inf') - if balance > thr: + if balance >= thr: validation_reports += self._generate_validation_report( ImbalancedLabels, Severity.warning) return validation_reports - def _check_imbalanced_attribute(self, label_name, attr_name, - attr_dets, thr): + def _check_imbalanced_attribute(self, label_name, attr_name, attr_dets): validation_reports = [] + thr = self.imbalance_ratio_thr count_by_defined_attr = list(attr_dets['distribution'].values()) if len(count_by_defined_attr) == 0: @@ -545,7 +555,7 @@ def _check_imbalanced_attribute(self, label_name, attr_name, count_max = np.max(count_by_defined_attr) count_min = np.min(count_by_defined_attr) balance = count_max / count_min if count_min > 0 else float('inf') - if balance > thr: + if balance >= thr: validation_reports += self._generate_validation_report( ImbalancedAttribute, Severity.warning, label_name, attr_name) @@ -607,8 +617,8 @@ def generate_reports(self, stats): reports += self._check_multi_label_annotations(stats) reports += self._check_label_defined_but_not_found(stats) reports += self._check_only_one_label(stats) - reports += self._check_few_samples_in_label(stats, 2) - reports += self._check_imbalanced_labels(stats, 5) + reports += self._check_few_samples_in_label(stats) + reports += self._check_imbalanced_labels(stats) label_dist = stats['label_distribution'] attr_dist = stats['attribute_distribution'] @@ -625,9 +635,9 @@ def generate_reports(self, stats): for attr_name, attr_dets in attr_stats.items(): reports += self._check_few_samples_in_attribute( - label_name, attr_name, attr_dets, 2) + label_name, attr_name, attr_dets) reports += self._check_imbalanced_attribute( - label_name, attr_name, attr_dets, 5) + label_name, attr_name, attr_dets) reports += self._check_only_one_attribute_value( label_name, attr_name, attr_dets) reports += self._check_missing_attribute( @@ -649,25 +659,33 @@ class DetectionValidator(_Validator): A validator class for detection tasks. """ - DEFAULT_FAR_FROM_MEAN = 2.0 + DEFAULT_FAR_FROM_MEAN = 5.0 + DEFAULT_BBOX_IMBALANCE = 0.8 + DEFAULT_BBOX_TOPK_BINS = 0.1 def __init__(self): super().__init__(TaskType.detection, AnnotationType.bbox, far_from_mean_thr=self.DEFAULT_FAR_FROM_MEAN) + self.bbox_imbalance_thr = self.DEFAULT_BBOX_IMBALANCE + self.bbox_topk_bins_ratio = self.DEFAULT_BBOX_TOPK_BINS - def _check_imbalanced_bbox_dist_in_label(self, label_name, bbox_label_stats, - thr, topk_ratio): + def _check_imbalanced_bbox_dist_in_label(self, label_name, + bbox_label_stats): validation_reports = [] + thr = self.bbox_imbalance_thr + topk_ratio = self.bbox_topk_bins_ratio for prop, prop_stats in bbox_label_stats.items(): value_counts = prop_stats['histogram']['counts'] n_bucket = len(value_counts) - topk = int(np.around(n_bucket * topk_ratio)) + if n_bucket < 2: + continue + topk = max(1, int(np.around(n_bucket * topk_ratio))) if topk > 0: topk_values = np.sort(value_counts)[-topk:] ratio = np.sum(topk_values) / np.sum(value_counts) - if ratio > thr: + if ratio >= thr: details = (label_name, prop) validation_reports += self._generate_validation_report( ImbalancedBboxDistInLabel, Severity.warning, *details) @@ -675,19 +693,23 @@ def _check_imbalanced_bbox_dist_in_label(self, label_name, bbox_label_stats, return validation_reports def _check_imbalanced_bbox_dist_in_attr(self, label_name, attr_name, - bbox_attr_stats, thr, topk_ratio): + bbox_attr_stats): validation_reports = [] + thr = self.bbox_imbalance_thr + topk_ratio = self.bbox_topk_bins_ratio for attr_value, value_stats in bbox_attr_stats.items(): for prop, prop_stats in value_stats.items(): value_counts = prop_stats['histogram']['counts'] n_bucket = len(value_counts) - topk = int(np.around(n_bucket * topk_ratio)) + if n_bucket < 2: + continue + topk = max(1, int(np.around(n_bucket * topk_ratio))) if topk > 0: topk_values = np.sort(value_counts)[-topk:] ratio = np.sum(topk_values) / np.sum(value_counts) - if ratio > thr: + if ratio >= thr: details = (label_name, attr_name, attr_value, prop) validation_reports += self._generate_validation_report( ImbalancedBboxDistInAttribute, @@ -744,9 +766,9 @@ def _check_far_from_label_mean(self, label_name, bbox_label_stats): if prop_stats['mean'] is not None: mean = round(prop_stats['mean'], 2) - for item_dets, anns_far_from_mean in items_far_from_mean.items(): + for item_dets, anns_far in items_far_from_mean.items(): item_id, item_subset = item_dets - for ann_id, val in anns_far_from_mean.items(): + for ann_id, val in anns_far.items(): val = round(val, 2) details = (item_subset, label_name, ann_id, prop, mean, val) validation_reports += self._generate_validation_report( @@ -763,9 +785,9 @@ def _check_far_from_attr_mean(self, label_name, attr_name, bbox_attr_stats): if prop_stats['mean'] is not None: mean = round(prop_stats['mean'], 2) - for item_dets, anns_far_from_mean in items_far_from_mean.items(): + for item_dets, anns_far in items_far_from_mean.items(): item_id, item_subset = item_dets - for ann_id, val in anns_far_from_mean.items(): + for ann_id, val in anns_far.items(): val = round(val, 2) details = (item_subset, label_name, ann_id, attr_name, attr_value, prop, mean, val) @@ -798,8 +820,8 @@ def generate_reports(self, stats): reports += self._check_missing_bbox_annotation(stats) reports += self._check_label_defined_but_not_found(stats) reports += self._check_only_one_label(stats) - reports += self._check_few_samples_in_label(stats, 2) - reports += self._check_imbalanced_labels(stats, 5) + reports += self._check_few_samples_in_label(stats) + reports += self._check_imbalanced_labels(stats) reports += self._check_negative_length(stats) reports += self._check_invalid_value(stats) @@ -821,9 +843,9 @@ def generate_reports(self, stats): for attr_name, attr_dets in attr_stats.items(): reports += self._check_few_samples_in_attribute( - label_name, attr_name, attr_dets, 2) + label_name, attr_name, attr_dets) reports += self._check_imbalanced_attribute( - label_name, attr_name, attr_dets, 5) + label_name, attr_name, attr_dets) reports += self._check_only_one_attribute_value( label_name, attr_name, attr_dets) reports += self._check_missing_attribute( @@ -835,13 +857,13 @@ def generate_reports(self, stats): reports += self._check_far_from_label_mean( label_name, bbox_label_stats) reports += self._check_imbalanced_bbox_dist_in_label( - label_name, bbox_label_stats, 1, 0.25) + label_name, bbox_label_stats) for attr_name, bbox_attr_stats in bbox_attr_label.items(): reports += self._check_far_from_attr_mean( label_name, attr_name, bbox_attr_stats) reports += self._check_imbalanced_bbox_dist_in_attr( - label_name, attr_name, bbox_attr_stats, 1, 0.25) + label_name, attr_name, bbox_attr_stats) for label_name, label_stats in undefined_label_dist.items(): reports += self._check_undefined_label(label_name, label_stats) @@ -889,11 +911,11 @@ def validate_annotations(dataset: IDataset, task_type: Union[str, TaskType]): # generate validation reports and summary reports = validator.generate_reports(stats) - reports = list(map(lambda r : r.to_dict(), reports)) + reports = list(map(lambda r: r.to_dict(), reports)) summary = { - 'errors': sum(map(lambda r : r['severity'] == 'error', reports)), - 'warnings': sum(map(lambda r : r['severity'] == 'warning', reports)) + 'errors': sum(map(lambda r: r['severity'] == 'error', reports)), + 'warnings': sum(map(lambda r: r['severity'] == 'warning', reports)) } validation_results['validation_reports'] = reports diff --git a/tests/test_validator.py b/tests/test_validator.py index 5c348103df..d6c8700d80 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -19,26 +19,27 @@ from datumaro.components.validator import (ClassificationValidator, DetectionValidator, TaskType, validate_annotations, _Validator) + class TestValidatorTemplate(TestCase): @classmethod def setUpClass(cls): cls.dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.ones((5, 5, 3)), annotations=[ - Label(1, id=0, attributes={ 'a': 1, 'b': 7, }), + Label(1, id=0, attributes={'a': 1, 'b': 7, }), Bbox(1, 2, 3, 4, id=1, label=0, attributes={ 'a': 1, 'b': 2, }), ]), DatasetItem(id=2, image=np.ones((2, 4, 3)), annotations=[ - Label(2, id=0, attributes={ 'a': 2, 'b': 2, }), + Label(2, id=0, attributes={'a': 2, 'b': 2, }), Bbox(2, 3, 1, 4, id=1, label=0, attributes={ 'a': 1, 'b': 1, }), ]), DatasetItem(id=3), DatasetItem(id=4, image=np.ones((2, 4, 3)), annotations=[ - Label(0, id=0, attributes={ 'b': 4, }), - Label(1, id=1, attributes={ 'a': 11, 'b': 7, }), + Label(0, id=0, attributes={'b': 4, }), + Label(1, id=1, attributes={'a': 11, 'b': 7, }), Bbox(1, 3, 2, 4, id=2, label=0, attributes={ 'a': 2, 'b': 1, }), @@ -47,30 +48,30 @@ def setUpClass(cls): }), ]), DatasetItem(id=5, image=np.ones((2, 4, 3)), annotations=[ - Label(0, id=0, attributes={ 'a': 20, 'b': 10 }), + Label(0, id=0, attributes={'a': 20, 'b': 10, }), Bbox(1, 2, 3, 4, id=1, label=1, attributes={ 'a': 1, 'b': 1, }), ]), DatasetItem(id=6, image=np.ones((2, 4, 3)), annotations=[ - Label(1, id=0, attributes={ 'a': 11, 'b': 2, 'c': 3}), + Label(1, id=0, attributes={'a': 11, 'b': 2, 'c': 3, }), Bbox(2, 3, 4, 1, id=1, label=1, attributes={ 'a': 2, 'b': 2, }), ]), DatasetItem(id=7, image=np.ones((2, 4, 3)), annotations=[ - Label(1, id=0, attributes={ 'a': 1, 'b': 2, 'c': 5 }), + Label(1, id=0, attributes={'a': 1, 'b': 2, 'c': 5, }), Bbox(1, 2, 3, 4, id=1, label=2, attributes={ 'a': 1, 'b': 2, }), ]), DatasetItem(id=8, image=np.ones((2, 4, 3)), annotations=[ - Label(2, id=0, attributes={ 'a': 7, 'b': 9, 'c': 5 }), + Label(2, id=0, attributes={'a': 7, 'b': 9, 'c': 5, }), Bbox(2, 1, 3, 4, id=1, label=2, attributes={ 'a': 2, 'b': 1, }), ]), - ], categories=[[f'label_{i}', None, { 'a', 'b' }] \ + ], categories=[[f'label_{i}', None, {'a', 'b', }] for i in range(2)]) @@ -124,7 +125,7 @@ def test_check_undefined_attribute(self): label_name = 'unit' attr_name = 'test' attr_dets = { - 'items_with_undefined_attr':[(1, 'unittest')] + 'items_with_undefined_attr': [(1, 'unittest')] } actual_reports = self.validator._check_undefined_attribute( @@ -193,64 +194,122 @@ def test_check_only_one_attribute_value(self): self.assertIsInstance(actual_reports[0], OnlyOneAttributeValue) def test_check_few_samples_in_label(self): - stats = { - 'label_distribution': { - 'defined_labels': { - 'unit': 1 + with self.subTest('Few Samples'): + stats = { + 'label_distribution': { + 'defined_labels': { + 'unit': self.validator.DEFAULT_FEW_SAMPLES + } } } - } - actual_reports = self.validator._check_few_samples_in_label(stats, 2) + actual_reports = self.validator._check_few_samples_in_label(stats) - self.assertTrue(len(actual_reports) == 1) - self.assertIsInstance(actual_reports[0], FewSamplesInLabel) + self.assertTrue(len(actual_reports) == 1) + self.assertIsInstance(actual_reports[0], FewSamplesInLabel) + + with self.subTest('No Few Samples Warning'): + stats = { + 'label_distribution': { + 'defined_labels': { + 'unit': self.validator.DEFAULT_FEW_SAMPLES + 1 + } + } + } + + actual_reports = self.validator._check_few_samples_in_label(stats) + + self.assertTrue(len(actual_reports) == 0) def test_check_few_samples_in_attribute(self): label_name = 'unit' attr_name = 'test' - attr_dets = { - 'distribution': { - 'mock': 1 + + with self.subTest('Few Samples'): + attr_dets = { + 'distribution': { + 'mock': self.validator.DEFAULT_FEW_SAMPLES + } } - } - actual_reports = self.validator._check_few_samples_in_attribute( - label_name, attr_name, attr_dets, 2) + actual_reports = self.validator._check_few_samples_in_attribute( + label_name, attr_name, attr_dets) - self.assertTrue(len(actual_reports) == 1) - self.assertIsInstance(actual_reports[0], FewSamplesInAttribute) + self.assertTrue(len(actual_reports) == 1) + self.assertIsInstance(actual_reports[0], FewSamplesInAttribute) + + with self.subTest('No Few Samples Warning'): + attr_dets = { + 'distribution': { + 'mock': self.validator.DEFAULT_FEW_SAMPLES + 1 + } + } + + actual_reports = self.validator._check_few_samples_in_attribute( + label_name, attr_name, attr_dets) + + self.assertTrue(len(actual_reports) == 0) def test_check_imbalanced_labels(self): - stats = { - 'label_distribution': { - 'defined_labels': { - 'unit': 5, - 'test': 1 + with self.subTest('Imbalance'): + stats = { + 'label_distribution': { + 'defined_labels': { + 'unit': self.validator.DEFAULT_IMBALANCE_RATIO, + 'test': 1 + } } } - } - actual_reports = self.validator._check_imbalanced_labels(stats, 2) + actual_reports = self.validator._check_imbalanced_labels(stats) - self.assertTrue(len(actual_reports) == 1) - self.assertIsInstance(actual_reports[0], ImbalancedLabels) + self.assertTrue(len(actual_reports) == 1) + self.assertIsInstance(actual_reports[0], ImbalancedLabels) + + with self.subTest('No Imbalance Warning'): + stats = { + 'label_distribution': { + 'defined_labels': { + 'unit': self.validator.DEFAULT_IMBALANCE_RATIO - 1, + 'test': 1 + } + } + } + + actual_reports = self.validator._check_imbalanced_labels(stats) + + self.assertTrue(len(actual_reports) == 0) def test_check_imbalanced_attribute(self): label_name = 'unit' attr_name = 'test' - attr_dets = { - 'distribution': { - 'mock': 5, - 'mock_1': 1 + + with self.subTest('Imbalance'): + attr_dets = { + 'distribution': { + 'mock': self.validator.DEFAULT_IMBALANCE_RATIO, + 'mock_1': 1 + } } - } - actual_reports = self.validator._check_imbalanced_attribute( - label_name, attr_name, attr_dets, 2) + actual_reports = self.validator._check_imbalanced_attribute( + label_name, attr_name, attr_dets) - self.assertTrue(len(actual_reports) == 1) - self.assertIsInstance(actual_reports[0], ImbalancedAttribute) + self.assertTrue(len(actual_reports) == 1) + self.assertIsInstance(actual_reports[0], ImbalancedAttribute) + + with self.subTest('No Imbalance Warning'): + attr_dets = { + 'distribution': { + 'mock': self.validator.DEFAULT_IMBALANCE_RATIO - 1, + 'mock_1': 1 + } + } + + actual_reports = self.validator._check_imbalanced_attribute( + label_name, attr_name, attr_dets) + + self.assertTrue(len(actual_reports) == 0) class TestClassificationValidator(TestValidatorTemplate): @@ -286,38 +345,74 @@ def setUpClass(cls): def test_check_imbalanced_bbox_dist_in_label(self): label_name = 'unittest' - bbox_label_stats = { - 'x': { - 'histogram': { - 'counts': [1] + most = int(self.validator.DEFAULT_BBOX_IMBALANCE * 100) + rest = 100 - most + + with self.subTest('Imbalanced'): + bbox_label_stats = { + 'x': { + 'histogram': { + 'counts': [most, rest] + } } } - } + reports = self.validator._check_imbalanced_bbox_dist_in_label( + label_name, bbox_label_stats) - actual_reports = self.validator._check_imbalanced_bbox_dist_in_label( - label_name, bbox_label_stats, 0.9, 1) + self.assertTrue(len(reports) == 1) + self.assertIsInstance(reports[0], ImbalancedBboxDistInLabel) - self.assertTrue(len(actual_reports) == 1) - self.assertIsInstance(actual_reports[0], ImbalancedBboxDistInLabel) + with self.subTest('No Imbalanced Warning'): + bbox_label_stats = { + 'x': { + 'histogram': { + 'counts': [most - 1, rest] + } + } + } + reports = self.validator._check_imbalanced_bbox_dist_in_label( + label_name, bbox_label_stats) + + self.assertTrue(len(reports) == 0) def test_check_imbalanced_bbox_dist_in_attr(self): label_name = 'unit' attr_name = 'test' - bbox_attr_stats = { - 'mock': { - 'x': { - 'histogram': { - 'counts': [1] + most = int(self.validator.DEFAULT_BBOX_IMBALANCE * 100) + rest = 100 - most + + with self.subTest('Imbalanced'): + bbox_attr_stats = { + 'mock': { + 'x': { + 'histogram': { + 'counts': [most, rest] + } } } } - } - actual_reports = self.validator._check_imbalanced_bbox_dist_in_attr( - label_name, attr_name, bbox_attr_stats, 0.9, 1) + reports = self.validator._check_imbalanced_bbox_dist_in_attr( + label_name, attr_name, bbox_attr_stats) - self.assertTrue(len(actual_reports) == 1) - self.assertIsInstance(actual_reports[0], ImbalancedBboxDistInAttribute) + self.assertTrue(len(reports) == 1) + self.assertIsInstance(reports[0], ImbalancedBboxDistInAttribute) + + with self.subTest('No Imbalanced Warning'): + bbox_attr_stats = { + 'mock': { + 'x': { + 'histogram': { + 'counts': [most - 1, rest] + } + } + } + } + + reports = self.validator._check_imbalanced_bbox_dist_in_attr( + label_name, attr_name, bbox_attr_stats) + + self.assertTrue(len(reports) == 0) def test_check_missing_bbox_annotation(self): stats = { @@ -359,7 +454,6 @@ def test_check_invalid_value(self): self.assertTrue(len(actual_reports) == 1) self.assertIsInstance(actual_reports[0], InvalidValue) - def test_check_far_from_label_mean(self): label_name = 'unittest' bbox_label_stats = { @@ -406,7 +500,7 @@ class TestValidateAnnotations(TestValidatorTemplate): def test_validate_annotations_classification(self): actual_results = validate_annotations(self.dataset, 'classification') - with self.subTest('Test of statistics', i = 0): + with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] self.assertEqual(actual_stats['total_label_count'], 8) self.assertEqual(len(actual_stats['items_missing_label']), 1) @@ -435,7 +529,7 @@ def test_validate_annotations_classification(self): len(undefined_attr_dets['items_with_undefined_attr']), 1) self.assertEqual(undefined_attr_dets['distribution'], {'5': 1}) - with self.subTest('Test of validation reports', i = 1): + with self.subTest('Test of validation reports', i=1): actual_reports = actual_results['validation_reports'] report_types = [r['anomaly_type'] for r in actual_reports] report_count_by_type = Counter(report_types) @@ -449,7 +543,7 @@ def test_validate_annotations_classification(self): self.assertEqual(report_count_by_type['OnlyOneAttributeValue'], 1) self.assertEqual(report_count_by_type['MissingAttribute'], 1) - with self.subTest('Test of summary', i = 2): + with self.subTest('Test of summary', i=2): actual_summary = actual_results['summary'] expected_summary = { 'errors': 10, @@ -461,7 +555,7 @@ def test_validate_annotations_classification(self): def test_validate_annotations_detection(self): actual_results = validate_annotations(self.dataset, 'detection') - with self.subTest('Test of statistics', i = 0): + with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] self.assertEqual(actual_stats['total_bbox_count'], 8) self.assertEqual(len(actual_stats['items_missing_bbox']), 1) @@ -489,22 +583,24 @@ def test_validate_annotations_detection(self): bbox_dist_item = actual_stats['bbox_distribution_in_dataset_item'] self.assertEqual(sum(bbox_dist_item.values()), 8) - with self.subTest('Test of validation reports', i = 1): + with self.subTest('Test of validation reports', i=1): actual_reports = actual_results['validation_reports'] report_types = [r['anomaly_type'] for r in actual_reports] - report_count_by_type = Counter(report_types) + count_by_type = Counter(report_types) - self.assertEqual(len(actual_reports), 11) - self.assertEqual(report_count_by_type['FewSamplesInAttribute'], 4) - self.assertEqual(report_count_by_type['UndefinedAttribute'], 4) - self.assertEqual(report_count_by_type['UndefinedLabel'], 2) - self.assertEqual(report_count_by_type['MissingBboxAnnotation'], 1) + self.assertEqual(len(actual_reports), 45) + self.assertEqual(count_by_type['ImbalancedBboxDistInAttribute'], 32) + self.assertEqual(count_by_type['FewSamplesInAttribute'], 4) + self.assertEqual(count_by_type['UndefinedAttribute'], 4) + self.assertEqual(count_by_type['ImbalancedBboxDistInLabel'], 2) + self.assertEqual(count_by_type['UndefinedLabel'], 2) + self.assertEqual(count_by_type['MissingBboxAnnotation'], 1) - with self.subTest('Test of summary', i = 2): + with self.subTest('Test of summary', i=2): actual_summary = actual_results['summary'] expected_summary = { 'errors': 6, - 'warnings': 5 + 'warnings': 39 } self.assertEqual(actual_summary, expected_summary)