From 2d9c954ac9dd09a81fd70f4976aa788a61e8d0c5 Mon Sep 17 00:00:00 2001
From: "Yi, Jihyeon" <jihyeon.yi@intel.com>
Date: Mon, 29 Mar 2021 19:02:57 +0900
Subject: [PATCH] validator threshold adjustment + style correction

---
 datumaro/components/validator.py | 116 ++++++++------
 tests/test_validator.py          | 250 +++++++++++++++++++++----------
 2 files changed, 242 insertions(+), 124 deletions(-)

diff --git a/datumaro/components/validator.py b/datumaro/components/validator.py
index 84fba8d35c..7fd384c59c 100644
--- a/datumaro/components/validator.py
+++ b/datumaro/components/validator.py
@@ -27,6 +27,8 @@
 
 
 class _Validator:
+    DEFAULT_FEW_SAMPLES = 1
+    DEFAULT_IMBALANCE_RATIO = 50
     """
     A base class for task-specific validators.
 
@@ -57,7 +59,10 @@ def __init__(self, task_type=None, ann_type=None, far_from_mean_thr=None):
 
         self.task_type = task_type
         self.ann_type = ann_type
+
         self.far_from_mean_thr = far_from_mean_thr
+        self.imbalance_ratio_thr = self.DEFAULT_IMBALANCE_RATIO
+        self.few_samples_thr = self.DEFAULT_FEW_SAMPLES
 
     def compute_statistics(self, dataset):
         """
@@ -300,7 +305,7 @@ def _update_props_far_from_mean(item, ann):
             defined_label_dist[category.name] = 0
 
         for item in dataset:
-            ann_count = [ann.type == self.ann_type \
+            ann_count = [ann.type == self.ann_type
                 for ann in item.annotations].count(True)
 
             if self.task_type == TaskType.classification:
@@ -371,7 +376,7 @@ def _update_props_far_from_mean(item, ann):
                             attr_dets = defined_attr_stats[attr]
 
                             if self.task_type == TaskType.detection and \
-                                ann.type == self.ann_type:
+                               ann.type == self.ann_type:
                                 bbox_attr_label = bbox_dist_by_attr.setdefault(
                                     label_name, {})
                                 bbox_attr_stats = bbox_attr_label.setdefault(
@@ -441,8 +446,8 @@ def _check_undefined_attribute(self, label_name, attr_name, attr_dets):
     def _check_label_defined_but_not_found(self, stats):
         validation_reports = []
         count_by_defined_labels = stats['label_distribution']['defined_labels']
-        labels_not_found = [label_name \
-            for label_name, count in count_by_defined_labels.items() \
+        labels_not_found = [label_name
+            for label_name, count in count_by_defined_labels.items()
                 if count == 0]
 
         for label_name in labels_not_found:
@@ -453,8 +458,8 @@ def _check_label_defined_but_not_found(self, stats):
 
     def _check_attribute_defined_but_not_found(self, label_name, attr_stats):
         validation_reports = []
-        attrs_not_found = [attr_name \
-            for attr_name, attr_dets in attr_stats.items() \
+        attrs_not_found = [attr_name
+            for attr_name, attr_dets in attr_stats.items()
                 if len(attr_dets['distribution']) == 0]
 
         for attr_name in attrs_not_found:
@@ -467,8 +472,8 @@ def _check_attribute_defined_but_not_found(self, label_name, attr_stats):
     def _check_only_one_label(self, stats):
         validation_reports = []
         count_by_defined_labels = stats['label_distribution']['defined_labels']
-        labels_found = [label_name \
-            for label_name, count in count_by_defined_labels.items() \
+        labels_found = [label_name
+            for label_name, count in count_by_defined_labels.items()
                 if count > 0]
 
         if len(labels_found) == 1:
@@ -488,12 +493,14 @@ def _check_only_one_attribute_value(self, label_name, attr_name, attr_dets):
 
         return validation_reports
 
-    def _check_few_samples_in_label(self, stats, thr):
+    def _check_few_samples_in_label(self, stats):
         validation_reports = []
+        thr = self.few_samples_thr
+
         defined_label_dist = stats['label_distribution']['defined_labels']
-        labels_with_few_samples = [(label_name, count) \
-            for label_name, count in defined_label_dist.items() \
-                if 0 < count < thr]
+        labels_with_few_samples = [(label_name, count)
+            for label_name, count in defined_label_dist.items()
+                if 0 < count <= thr]
 
         for label_name, count in labels_with_few_samples:
             validation_reports += self._generate_validation_report(
@@ -502,11 +509,13 @@ def _check_few_samples_in_label(self, stats, thr):
         return validation_reports
 
     def _check_few_samples_in_attribute(self, label_name,
-                                        attr_name, attr_dets, thr):
+                                        attr_name, attr_dets):
         validation_reports = []
-        attr_values_with_few_samples = [(attr_value, count) \
-            for attr_value, count in attr_dets['distribution'].items()  \
-                if count < thr]
+        thr = self.few_samples_thr
+
+        attr_values_with_few_samples = [(attr_value, count)
+            for attr_value, count in attr_dets['distribution'].items()
+                if count <= thr]
 
         for attr_value, count in attr_values_with_few_samples:
             details = (label_name, attr_name, attr_value, count)
@@ -515,11 +524,12 @@ def _check_few_samples_in_attribute(self, label_name,
 
         return validation_reports
 
-    def _check_imbalanced_labels(self, stats, thr):
+    def _check_imbalanced_labels(self, stats):
         validation_reports = []
+        thr = self.imbalance_ratio_thr
 
         defined_label_dist = stats['label_distribution']['defined_labels']
-        count_by_defined_labels = [count \
+        count_by_defined_labels = [count
             for label, count in defined_label_dist.items()]
 
         if len(count_by_defined_labels) == 0:
@@ -528,15 +538,15 @@ def _check_imbalanced_labels(self, stats, thr):
         count_max = np.max(count_by_defined_labels)
         count_min = np.min(count_by_defined_labels)
         balance = count_max / count_min if count_min > 0 else float('inf')
-        if balance > thr:
+        if balance >= thr:
             validation_reports += self._generate_validation_report(
                 ImbalancedLabels, Severity.warning)
 
         return validation_reports
 
-    def _check_imbalanced_attribute(self, label_name, attr_name,
-                                    attr_dets, thr):
+    def _check_imbalanced_attribute(self, label_name, attr_name, attr_dets):
         validation_reports = []
+        thr = self.imbalance_ratio_thr
 
         count_by_defined_attr = list(attr_dets['distribution'].values())
         if len(count_by_defined_attr) == 0:
@@ -545,7 +555,7 @@ def _check_imbalanced_attribute(self, label_name, attr_name,
         count_max = np.max(count_by_defined_attr)
         count_min = np.min(count_by_defined_attr)
         balance = count_max / count_min if count_min > 0 else float('inf')
-        if balance > thr:
+        if balance >= thr:
             validation_reports += self._generate_validation_report(
                 ImbalancedAttribute, Severity.warning, label_name, attr_name)
 
@@ -607,8 +617,8 @@ def generate_reports(self, stats):
         reports += self._check_multi_label_annotations(stats)
         reports += self._check_label_defined_but_not_found(stats)
         reports += self._check_only_one_label(stats)
-        reports += self._check_few_samples_in_label(stats, 2)
-        reports += self._check_imbalanced_labels(stats, 5)
+        reports += self._check_few_samples_in_label(stats)
+        reports += self._check_imbalanced_labels(stats)
 
         label_dist = stats['label_distribution']
         attr_dist = stats['attribute_distribution']
@@ -625,9 +635,9 @@ def generate_reports(self, stats):
 
             for attr_name, attr_dets in attr_stats.items():
                 reports += self._check_few_samples_in_attribute(
-                    label_name, attr_name, attr_dets, 2)
+                    label_name, attr_name, attr_dets)
                 reports += self._check_imbalanced_attribute(
-                    label_name, attr_name, attr_dets, 5)
+                    label_name, attr_name, attr_dets)
                 reports += self._check_only_one_attribute_value(
                     label_name, attr_name, attr_dets)
                 reports += self._check_missing_attribute(
@@ -649,25 +659,33 @@ class DetectionValidator(_Validator):
     A validator class for detection tasks.
     """
 
-    DEFAULT_FAR_FROM_MEAN = 2.0
+    DEFAULT_FAR_FROM_MEAN = 5.0
+    DEFAULT_BBOX_IMBALANCE = 0.8
+    DEFAULT_BBOX_TOPK_BINS = 0.1
 
     def __init__(self):
         super().__init__(TaskType.detection, AnnotationType.bbox,
             far_from_mean_thr=self.DEFAULT_FAR_FROM_MEAN)
+        self.bbox_imbalance_thr = self.DEFAULT_BBOX_IMBALANCE
+        self.bbox_topk_bins_ratio = self.DEFAULT_BBOX_TOPK_BINS
 
-    def _check_imbalanced_bbox_dist_in_label(self, label_name, bbox_label_stats,
-                                             thr, topk_ratio):
+    def _check_imbalanced_bbox_dist_in_label(self, label_name,
+                                             bbox_label_stats):
         validation_reports = []
+        thr = self.bbox_imbalance_thr
+        topk_ratio = self.bbox_topk_bins_ratio
 
         for prop, prop_stats in bbox_label_stats.items():
             value_counts = prop_stats['histogram']['counts']
             n_bucket = len(value_counts)
-            topk = int(np.around(n_bucket * topk_ratio))
+            if n_bucket < 2:
+                continue
+            topk = max(1, int(np.around(n_bucket * topk_ratio)))
 
             if topk > 0:
                 topk_values = np.sort(value_counts)[-topk:]
                 ratio = np.sum(topk_values) / np.sum(value_counts)
-                if ratio > thr:
+                if ratio >= thr:
                     details = (label_name, prop)
                     validation_reports += self._generate_validation_report(
                         ImbalancedBboxDistInLabel, Severity.warning, *details)
@@ -675,19 +693,23 @@ def _check_imbalanced_bbox_dist_in_label(self, label_name, bbox_label_stats,
         return validation_reports
 
     def _check_imbalanced_bbox_dist_in_attr(self, label_name, attr_name,
-                                            bbox_attr_stats, thr, topk_ratio):
+                                            bbox_attr_stats):
         validation_reports = []
+        thr = self.bbox_imbalance_thr
+        topk_ratio = self.bbox_topk_bins_ratio
 
         for attr_value, value_stats in bbox_attr_stats.items():
             for prop, prop_stats in value_stats.items():
                 value_counts = prop_stats['histogram']['counts']
                 n_bucket = len(value_counts)
-                topk = int(np.around(n_bucket * topk_ratio))
+                if n_bucket < 2:
+                    continue
+                topk = max(1, int(np.around(n_bucket * topk_ratio)))
 
                 if topk > 0:
                     topk_values = np.sort(value_counts)[-topk:]
                     ratio = np.sum(topk_values) / np.sum(value_counts)
-                    if ratio > thr:
+                    if ratio >= thr:
                         details = (label_name, attr_name, attr_value, prop)
                         validation_reports += self._generate_validation_report(
                             ImbalancedBboxDistInAttribute,
@@ -744,9 +766,9 @@ def _check_far_from_label_mean(self, label_name, bbox_label_stats):
             if prop_stats['mean'] is not None:
                 mean = round(prop_stats['mean'], 2)
 
-            for item_dets, anns_far_from_mean in items_far_from_mean.items():
+            for item_dets, anns_far in items_far_from_mean.items():
                 item_id, item_subset = item_dets
-                for ann_id, val in anns_far_from_mean.items():
+                for ann_id, val in anns_far.items():
                     val = round(val, 2)
                     details = (item_subset, label_name, ann_id, prop, mean, val)
                     validation_reports += self._generate_validation_report(
@@ -763,9 +785,9 @@ def _check_far_from_attr_mean(self, label_name, attr_name, bbox_attr_stats):
                 if prop_stats['mean'] is not None:
                     mean = round(prop_stats['mean'], 2)
 
-                for item_dets, anns_far_from_mean in items_far_from_mean.items():
+                for item_dets, anns_far in items_far_from_mean.items():
                     item_id, item_subset = item_dets
-                    for ann_id, val in anns_far_from_mean.items():
+                    for ann_id, val in anns_far.items():
                         val = round(val, 2)
                         details = (item_subset, label_name, ann_id, attr_name,
                             attr_value, prop, mean, val)
@@ -798,8 +820,8 @@ def generate_reports(self, stats):
         reports += self._check_missing_bbox_annotation(stats)
         reports += self._check_label_defined_but_not_found(stats)
         reports += self._check_only_one_label(stats)
-        reports += self._check_few_samples_in_label(stats, 2)
-        reports += self._check_imbalanced_labels(stats, 5)
+        reports += self._check_few_samples_in_label(stats)
+        reports += self._check_imbalanced_labels(stats)
         reports += self._check_negative_length(stats)
         reports += self._check_invalid_value(stats)
 
@@ -821,9 +843,9 @@ def generate_reports(self, stats):
 
             for attr_name, attr_dets in attr_stats.items():
                 reports += self._check_few_samples_in_attribute(
-                    label_name, attr_name, attr_dets, 2)
+                    label_name, attr_name, attr_dets)
                 reports += self._check_imbalanced_attribute(
-                    label_name, attr_name, attr_dets, 5)
+                    label_name, attr_name, attr_dets)
                 reports += self._check_only_one_attribute_value(
                     label_name, attr_name, attr_dets)
                 reports += self._check_missing_attribute(
@@ -835,13 +857,13 @@ def generate_reports(self, stats):
             reports += self._check_far_from_label_mean(
                 label_name, bbox_label_stats)
             reports += self._check_imbalanced_bbox_dist_in_label(
-                label_name, bbox_label_stats, 1, 0.25)
+                label_name, bbox_label_stats)
 
             for attr_name, bbox_attr_stats in bbox_attr_label.items():
                 reports += self._check_far_from_attr_mean(
                     label_name, attr_name, bbox_attr_stats)
                 reports += self._check_imbalanced_bbox_dist_in_attr(
-                    label_name, attr_name, bbox_attr_stats, 1, 0.25)
+                    label_name, attr_name, bbox_attr_stats)
 
         for label_name, label_stats in undefined_label_dist.items():
             reports += self._check_undefined_label(label_name, label_stats)
@@ -889,11 +911,11 @@ def validate_annotations(dataset: IDataset, task_type: Union[str, TaskType]):
 
     # generate validation reports and summary
     reports = validator.generate_reports(stats)
-    reports = list(map(lambda r : r.to_dict(), reports))
+    reports = list(map(lambda r: r.to_dict(), reports))
 
     summary = {
-        'errors': sum(map(lambda r : r['severity'] == 'error', reports)),
-        'warnings': sum(map(lambda r : r['severity'] == 'warning', reports))
+        'errors': sum(map(lambda r: r['severity'] == 'error', reports)),
+        'warnings': sum(map(lambda r: r['severity'] == 'warning', reports))
     }
 
     validation_results['validation_reports'] = reports
diff --git a/tests/test_validator.py b/tests/test_validator.py
index 5c348103df..d6c8700d80 100644
--- a/tests/test_validator.py
+++ b/tests/test_validator.py
@@ -19,26 +19,27 @@
 from datumaro.components.validator import (ClassificationValidator,
     DetectionValidator, TaskType, validate_annotations, _Validator)
 
+
 class TestValidatorTemplate(TestCase):
     @classmethod
     def setUpClass(cls):
         cls.dataset = Dataset.from_iterable([
             DatasetItem(id=1, image=np.ones((5, 5, 3)), annotations=[
-                Label(1, id=0, attributes={ 'a': 1, 'b': 7, }),
+                Label(1, id=0, attributes={'a': 1, 'b': 7, }),
                 Bbox(1, 2, 3, 4, id=1, label=0, attributes={
                     'a': 1, 'b': 2,
                 }),
             ]),
             DatasetItem(id=2, image=np.ones((2, 4, 3)), annotations=[
-                Label(2, id=0, attributes={ 'a': 2, 'b': 2, }),
+                Label(2, id=0, attributes={'a': 2, 'b': 2, }),
                 Bbox(2, 3, 1, 4, id=1, label=0, attributes={
                     'a': 1, 'b': 1,
                 }),
             ]),
             DatasetItem(id=3),
             DatasetItem(id=4, image=np.ones((2, 4, 3)), annotations=[
-                Label(0, id=0, attributes={ 'b': 4, }),
-                Label(1, id=1, attributes={ 'a': 11, 'b': 7, }),
+                Label(0, id=0, attributes={'b': 4, }),
+                Label(1, id=1, attributes={'a': 11, 'b': 7, }),
                 Bbox(1, 3, 2, 4, id=2, label=0, attributes={
                     'a': 2, 'b': 1,
                 }),
@@ -47,30 +48,30 @@ def setUpClass(cls):
                 }),
             ]),
             DatasetItem(id=5, image=np.ones((2, 4, 3)), annotations=[
-                Label(0, id=0, attributes={ 'a': 20, 'b': 10 }),
+                Label(0, id=0, attributes={'a': 20, 'b': 10, }),
                 Bbox(1, 2, 3, 4, id=1, label=1, attributes={
                     'a': 1, 'b': 1,
                 }),
             ]),
             DatasetItem(id=6, image=np.ones((2, 4, 3)), annotations=[
-                Label(1, id=0, attributes={ 'a': 11, 'b': 2, 'c': 3}),
+                Label(1, id=0, attributes={'a': 11, 'b': 2, 'c': 3, }),
                 Bbox(2, 3, 4, 1, id=1, label=1, attributes={
                     'a': 2, 'b': 2,
                 }),
             ]),
             DatasetItem(id=7, image=np.ones((2, 4, 3)), annotations=[
-                Label(1, id=0, attributes={ 'a': 1, 'b': 2, 'c': 5 }),
+                Label(1, id=0, attributes={'a': 1, 'b': 2, 'c': 5, }),
                 Bbox(1, 2, 3, 4, id=1, label=2, attributes={
                     'a': 1, 'b': 2,
                 }),
             ]),
             DatasetItem(id=8, image=np.ones((2, 4, 3)), annotations=[
-                Label(2, id=0, attributes={ 'a': 7, 'b': 9, 'c': 5 }),
+                Label(2, id=0, attributes={'a': 7, 'b': 9, 'c': 5, }),
                 Bbox(2, 1, 3, 4, id=1, label=2, attributes={
                     'a': 2, 'b': 1,
                 }),
             ]),
-        ], categories=[[f'label_{i}', None, { 'a', 'b' }] \
+        ], categories=[[f'label_{i}', None, {'a', 'b', }]
             for i in range(2)])
 
 
@@ -124,7 +125,7 @@ def test_check_undefined_attribute(self):
         label_name = 'unit'
         attr_name = 'test'
         attr_dets = {
-            'items_with_undefined_attr':[(1, 'unittest')]
+            'items_with_undefined_attr': [(1, 'unittest')]
         }
 
         actual_reports = self.validator._check_undefined_attribute(
@@ -193,64 +194,122 @@ def test_check_only_one_attribute_value(self):
         self.assertIsInstance(actual_reports[0], OnlyOneAttributeValue)
 
     def test_check_few_samples_in_label(self):
-        stats = {
-            'label_distribution': {
-                'defined_labels': {
-                    'unit': 1
+        with self.subTest('Few Samples'):
+            stats = {
+                'label_distribution': {
+                    'defined_labels': {
+                        'unit': self.validator.DEFAULT_FEW_SAMPLES
+                    }
                 }
             }
-        }
 
-        actual_reports = self.validator._check_few_samples_in_label(stats, 2)
+            actual_reports = self.validator._check_few_samples_in_label(stats)
 
-        self.assertTrue(len(actual_reports) == 1)
-        self.assertIsInstance(actual_reports[0], FewSamplesInLabel)
+            self.assertTrue(len(actual_reports) == 1)
+            self.assertIsInstance(actual_reports[0], FewSamplesInLabel)
+
+        with self.subTest('No Few Samples Warning'):
+            stats = {
+                'label_distribution': {
+                    'defined_labels': {
+                        'unit': self.validator.DEFAULT_FEW_SAMPLES + 1
+                    }
+                }
+            }
+
+            actual_reports = self.validator._check_few_samples_in_label(stats)
+
+            self.assertTrue(len(actual_reports) == 0)
 
     def test_check_few_samples_in_attribute(self):
         label_name = 'unit'
         attr_name = 'test'
-        attr_dets = {
-            'distribution': {
-                'mock': 1
+
+        with self.subTest('Few Samples'):
+            attr_dets = {
+                'distribution': {
+                    'mock': self.validator.DEFAULT_FEW_SAMPLES
+                }
             }
-        }
 
-        actual_reports = self.validator._check_few_samples_in_attribute(
-            label_name, attr_name, attr_dets, 2)
+            actual_reports = self.validator._check_few_samples_in_attribute(
+                label_name, attr_name, attr_dets)
 
-        self.assertTrue(len(actual_reports) == 1)
-        self.assertIsInstance(actual_reports[0], FewSamplesInAttribute)
+            self.assertTrue(len(actual_reports) == 1)
+            self.assertIsInstance(actual_reports[0], FewSamplesInAttribute)
+
+        with self.subTest('No Few Samples Warning'):
+            attr_dets = {
+                'distribution': {
+                    'mock': self.validator.DEFAULT_FEW_SAMPLES + 1
+                }
+            }
+
+            actual_reports = self.validator._check_few_samples_in_attribute(
+                label_name, attr_name, attr_dets)
+
+            self.assertTrue(len(actual_reports) == 0)
 
     def test_check_imbalanced_labels(self):
-        stats = {
-            'label_distribution': {
-                'defined_labels': {
-                    'unit': 5,
-                    'test': 1
+        with self.subTest('Imbalance'):
+            stats = {
+                'label_distribution': {
+                    'defined_labels': {
+                        'unit': self.validator.DEFAULT_IMBALANCE_RATIO,
+                        'test': 1
+                    }
                 }
             }
-        }
 
-        actual_reports = self.validator._check_imbalanced_labels(stats, 2)
+            actual_reports = self.validator._check_imbalanced_labels(stats)
 
-        self.assertTrue(len(actual_reports) == 1)
-        self.assertIsInstance(actual_reports[0], ImbalancedLabels)
+            self.assertTrue(len(actual_reports) == 1)
+            self.assertIsInstance(actual_reports[0], ImbalancedLabels)
+
+        with self.subTest('No Imbalance Warning'):
+            stats = {
+                'label_distribution': {
+                    'defined_labels': {
+                        'unit': self.validator.DEFAULT_IMBALANCE_RATIO - 1,
+                        'test': 1
+                    }
+                }
+            }
+
+            actual_reports = self.validator._check_imbalanced_labels(stats)
+
+            self.assertTrue(len(actual_reports) == 0)
 
     def test_check_imbalanced_attribute(self):
         label_name = 'unit'
         attr_name = 'test'
-        attr_dets = {
-            'distribution': {
-                'mock': 5,
-                'mock_1': 1
+
+        with self.subTest('Imbalance'):
+            attr_dets = {
+                'distribution': {
+                    'mock': self.validator.DEFAULT_IMBALANCE_RATIO,
+                    'mock_1': 1
+                }
             }
-        }
 
-        actual_reports = self.validator._check_imbalanced_attribute(
-            label_name, attr_name, attr_dets, 2)
+            actual_reports = self.validator._check_imbalanced_attribute(
+                label_name, attr_name, attr_dets)
 
-        self.assertTrue(len(actual_reports) == 1)
-        self.assertIsInstance(actual_reports[0], ImbalancedAttribute)
+            self.assertTrue(len(actual_reports) == 1)
+            self.assertIsInstance(actual_reports[0], ImbalancedAttribute)
+
+        with self.subTest('No Imbalance Warning'):
+            attr_dets = {
+                'distribution': {
+                    'mock': self.validator.DEFAULT_IMBALANCE_RATIO - 1,
+                    'mock_1': 1
+                }
+            }
+
+            actual_reports = self.validator._check_imbalanced_attribute(
+                label_name, attr_name, attr_dets)
+
+            self.assertTrue(len(actual_reports) == 0)
 
 
 class TestClassificationValidator(TestValidatorTemplate):
@@ -286,38 +345,74 @@ def setUpClass(cls):
 
     def test_check_imbalanced_bbox_dist_in_label(self):
         label_name = 'unittest'
-        bbox_label_stats = {
-            'x': {
-                'histogram': {
-                    'counts': [1]
+        most = int(self.validator.DEFAULT_BBOX_IMBALANCE * 100)
+        rest = 100 - most
+
+        with self.subTest('Imbalanced'):
+            bbox_label_stats = {
+                'x': {
+                    'histogram': {
+                        'counts': [most, rest]
+                    }
                 }
             }
-        }
+            reports = self.validator._check_imbalanced_bbox_dist_in_label(
+                label_name, bbox_label_stats)
 
-        actual_reports = self.validator._check_imbalanced_bbox_dist_in_label(
-            label_name, bbox_label_stats, 0.9, 1)
+            self.assertTrue(len(reports) == 1)
+            self.assertIsInstance(reports[0], ImbalancedBboxDistInLabel)
 
-        self.assertTrue(len(actual_reports) == 1)
-        self.assertIsInstance(actual_reports[0], ImbalancedBboxDistInLabel)
+        with self.subTest('No Imbalanced Warning'):
+            bbox_label_stats = {
+                'x': {
+                    'histogram': {
+                        'counts': [most - 1, rest]
+                    }
+                }
+            }
+            reports = self.validator._check_imbalanced_bbox_dist_in_label(
+                label_name, bbox_label_stats)
+
+            self.assertTrue(len(reports) == 0)
 
     def test_check_imbalanced_bbox_dist_in_attr(self):
         label_name = 'unit'
         attr_name = 'test'
-        bbox_attr_stats = {
-            'mock': {
-                'x': {
-                    'histogram': {
-                        'counts': [1]
+        most = int(self.validator.DEFAULT_BBOX_IMBALANCE * 100)
+        rest = 100 - most
+
+        with self.subTest('Imbalanced'):
+            bbox_attr_stats = {
+                'mock': {
+                    'x': {
+                        'histogram': {
+                            'counts': [most, rest]
+                        }
                     }
                 }
             }
-        }
 
-        actual_reports = self.validator._check_imbalanced_bbox_dist_in_attr(
-            label_name, attr_name, bbox_attr_stats, 0.9, 1)
+            reports = self.validator._check_imbalanced_bbox_dist_in_attr(
+                label_name, attr_name, bbox_attr_stats)
 
-        self.assertTrue(len(actual_reports) == 1)
-        self.assertIsInstance(actual_reports[0], ImbalancedBboxDistInAttribute)
+            self.assertTrue(len(reports) == 1)
+            self.assertIsInstance(reports[0], ImbalancedBboxDistInAttribute)
+
+        with self.subTest('No Imbalanced Warning'):
+            bbox_attr_stats = {
+                'mock': {
+                    'x': {
+                        'histogram': {
+                            'counts': [most - 1, rest]
+                        }
+                    }
+                }
+            }
+
+            reports = self.validator._check_imbalanced_bbox_dist_in_attr(
+                label_name, attr_name, bbox_attr_stats)
+
+            self.assertTrue(len(reports) == 0)
 
     def test_check_missing_bbox_annotation(self):
         stats = {
@@ -359,7 +454,6 @@ def test_check_invalid_value(self):
         self.assertTrue(len(actual_reports) == 1)
         self.assertIsInstance(actual_reports[0], InvalidValue)
 
-
     def test_check_far_from_label_mean(self):
         label_name = 'unittest'
         bbox_label_stats = {
@@ -406,7 +500,7 @@ class TestValidateAnnotations(TestValidatorTemplate):
     def test_validate_annotations_classification(self):
         actual_results = validate_annotations(self.dataset, 'classification')
 
-        with self.subTest('Test of statistics', i = 0):
+        with self.subTest('Test of statistics', i=0):
             actual_stats = actual_results['statistics']
             self.assertEqual(actual_stats['total_label_count'], 8)
             self.assertEqual(len(actual_stats['items_missing_label']), 1)
@@ -435,7 +529,7 @@ def test_validate_annotations_classification(self):
                 len(undefined_attr_dets['items_with_undefined_attr']), 1)
             self.assertEqual(undefined_attr_dets['distribution'], {'5': 1})
 
-        with self.subTest('Test of validation reports', i = 1):
+        with self.subTest('Test of validation reports', i=1):
             actual_reports = actual_results['validation_reports']
             report_types = [r['anomaly_type'] for r in actual_reports]
             report_count_by_type = Counter(report_types)
@@ -449,7 +543,7 @@ def test_validate_annotations_classification(self):
             self.assertEqual(report_count_by_type['OnlyOneAttributeValue'], 1)
             self.assertEqual(report_count_by_type['MissingAttribute'], 1)
 
-        with self.subTest('Test of summary', i = 2):
+        with self.subTest('Test of summary', i=2):
             actual_summary = actual_results['summary']
             expected_summary = {
                 'errors': 10,
@@ -461,7 +555,7 @@ def test_validate_annotations_classification(self):
     def test_validate_annotations_detection(self):
         actual_results = validate_annotations(self.dataset, 'detection')
 
-        with self.subTest('Test of statistics', i = 0):
+        with self.subTest('Test of statistics', i=0):
             actual_stats = actual_results['statistics']
             self.assertEqual(actual_stats['total_bbox_count'], 8)
             self.assertEqual(len(actual_stats['items_missing_bbox']), 1)
@@ -489,22 +583,24 @@ def test_validate_annotations_detection(self):
             bbox_dist_item = actual_stats['bbox_distribution_in_dataset_item']
             self.assertEqual(sum(bbox_dist_item.values()), 8)
 
-        with self.subTest('Test of validation reports', i = 1):
+        with self.subTest('Test of validation reports', i=1):
             actual_reports = actual_results['validation_reports']
             report_types = [r['anomaly_type'] for r in actual_reports]
-            report_count_by_type = Counter(report_types)
+            count_by_type = Counter(report_types)
 
-            self.assertEqual(len(actual_reports), 11)
-            self.assertEqual(report_count_by_type['FewSamplesInAttribute'], 4)
-            self.assertEqual(report_count_by_type['UndefinedAttribute'], 4)
-            self.assertEqual(report_count_by_type['UndefinedLabel'], 2)
-            self.assertEqual(report_count_by_type['MissingBboxAnnotation'], 1)
+            self.assertEqual(len(actual_reports), 45)
+            self.assertEqual(count_by_type['ImbalancedBboxDistInAttribute'], 32)
+            self.assertEqual(count_by_type['FewSamplesInAttribute'], 4)
+            self.assertEqual(count_by_type['UndefinedAttribute'], 4)
+            self.assertEqual(count_by_type['ImbalancedBboxDistInLabel'], 2)
+            self.assertEqual(count_by_type['UndefinedLabel'], 2)
+            self.assertEqual(count_by_type['MissingBboxAnnotation'], 1)
 
-        with self.subTest('Test of summary', i = 2):
+        with self.subTest('Test of summary', i=2):
             actual_summary = actual_results['summary']
             expected_summary = {
                 'errors': 6,
-                'warnings': 5
+                'warnings': 39
             }
 
             self.assertEqual(actual_summary, expected_summary)