From b7e6c40c078660638d96ebf017e16cadded50bea Mon Sep 17 00:00:00 2001 From: woos Date: Mon, 24 May 2021 12:52:37 +0900 Subject: [PATCH 1/5] configurable cli support --- datumaro/cli/contexts/project/__init__.py | 10 +- datumaro/components/validator.py | 109 +++++++++++++++++----- tests/test_validator.py | 57 ++++++----- 3 files changed, 130 insertions(+), 46 deletions(-) diff --git a/datumaro/cli/contexts/project/__init__.py b/datumaro/cli/contexts/project/__init__.py index 963f52158e..528d643914 100644 --- a/datumaro/cli/contexts/project/__init__.py +++ b/datumaro/cli/contexts/project/__init__.py @@ -677,7 +677,7 @@ def transform_command(args): extra_args = {} if hasattr(transform, 'parse_cmdline'): extra_args = transform.parse_cmdline(args.extra_args) - + log.info("Loading the project...") dataset = project.make_dataset() @@ -808,6 +808,8 @@ def build_validate_parser(parser_ctor=argparse.ArgumentParser): help="Subset to validate (default: None)") parser.add_argument('-p', '--project', dest='project_dir', default='.', help="Directory of the project to validate (default: current dir)") + parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, + help="Optional arguments for validator (pass '-- -h' for help)") parser.set_defaults(command=validate_command) return parser @@ -822,7 +824,11 @@ def validate_command(args): if subset_name is not None: dataset = dataset.get_subset(subset_name) dst_file_name += f'-{subset_name}' - validation_results = validate_annotations(dataset, task_type) + + extra_args = {} + from datumaro.components.validator import _Validator + extra_args = _Validator.parse_cmdline(args.extra_args) + validation_results = validate_annotations(dataset, task_type, **extra_args) def numpy_encoder(obj): if isinstance(obj, np.generic): diff --git a/datumaro/components/validator.py b/datumaro/components/validator.py index 2d03363475..9b319dbe16 100644 --- a/datumaro/components/validator.py +++ b/datumaro/components/validator.py @@ -18,6 +18,7 @@ NegativeLength, InvalidValue, FarFromLabelMean, FarFromAttrMean, OnlyOneAttributeValue) from datumaro.components.extractor import AnnotationType, LabelCategories +from datumaro.components.cli_plugin import CliPlugin from datumaro.util import parse_str_enum_value @@ -26,13 +27,7 @@ TaskType = Enum('TaskType', ['classification', 'detection', 'segmentation']) -class _Validator: - DEFAULT_FEW_SAMPLES = 1 - DEFAULT_IMBALANCE_RATIO = 50 - DEFAULT_FAR_FROM_MEAN = 5.0 - DEFAULT_DOMINANCE_RATIO = 0.8 - DEFAULT_DOMINANCE_TOPK_BINS = 0.1 - +class _Validator(CliPlugin): # statistics templates numerical_stat_template = { 'items_far_from_mean': {}, @@ -64,7 +59,48 @@ class _Validator: Abstract method that must be implemented in a subclass. """ - def __init__(self, task_type=None): + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-fs', '--few_samples_thr', default=1, type=int, + help="Threshold for giving a warning for minimum number of samples per class") + parser.add_argument('-ir', '--imbalance_ratio_thr', default=50, type=int, + help="Threshold for giving data imbalance warning; IR(imbalance ratio) = majority/minority") + parser.add_argument('-m', '--far_from_mean_thr', default=5.0, type=float, + help="Threshold for giving a warning that data is far from mean;" + "A constant used to define mean +/- k * standard deviation;") + parser.add_argument('-dr', '--dominance_ratio_thr', default=0.8, type=float, + help="Threshold for giving a warning for bounding box imbalance;" + "Dominace_ratio = ratio of Top-k bin to total in histogram;") + parser.add_argument('-k', '--topk_bins', default=0.1, type=float, + help="Ratio of bins with the highest number of data to total bins in the histogram;" + "[0, 1]; 0.1 = 10%;") + return parser + + def __init__(self, task_type, few_samples_thr=None, + imbalance_ratio_thr=None, far_from_mean_thr=None, + dominance_ratio_thr=None, topk_bins=None, ): + """ + Validator + + Parameters + --------------- + few_samples_thr: int + minimum number of samples per class + warn user when samples per class is less than threshold + imbalance_ratio_thr: int + ratio of majority attribute to minority attribute + warn user when annotations are unevenly distributed + far_from_mean_thr: float + constant used to define mean +/- m * stddev + warn user when there are too big or small values + dominance_ratio_thr: float + ratio of Top-k bin to total + warn user when dominance ratio is over threshold + topk_bins: float + ratio of selected bins with most item number to total bins + warn user when values are not evenly distributed + """ self.task_type = parse_str_enum_value(task_type, TaskType, default=TaskType.classification) @@ -78,11 +114,11 @@ def __init__(self, task_type=None): self.ann_types = {AnnotationType.mask, AnnotationType.polygon} self.str_ann_type = "mask or polygon" - self.far_from_mean_thr = self.DEFAULT_FAR_FROM_MEAN - self.imbalance_ratio_thr = self.DEFAULT_IMBALANCE_RATIO - self.few_samples_thr = self.DEFAULT_FEW_SAMPLES - self.dominance_thr = self.DEFAULT_DOMINANCE_RATIO - self.topk_bins_ratio = self.DEFAULT_DOMINANCE_TOPK_BINS + self.few_samples_thr = few_samples_thr + self.imbalance_ratio_thr = imbalance_ratio_thr + self.far_from_mean_thr = far_from_mean_thr + self.dominance_thr = dominance_ratio_thr + self.topk_bins_ratio = topk_bins def _compute_common_statistics(self, dataset): defined_attr_template = { @@ -537,8 +573,12 @@ class ClassificationValidator(_Validator): A validator class for classification tasks. """ - def __init__(self): - super().__init__(TaskType.classification) + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.classification, few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) def _check_multi_label_annotations(self, stats): validation_reports = [] @@ -636,8 +676,12 @@ class DetectionValidator(_Validator): """ A validator class for detection tasks. """ - def __init__(self): - super().__init__(TaskType.detection) + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.detection, few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) def _check_negative_length(self, stats): validation_reports = [] @@ -917,8 +961,12 @@ class SegmentationValidator(_Validator): A validator class for (instance) segmentation tasks. """ - def __init__(self): - super().__init__(TaskType.segmentation) + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.segmentation, few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) def compute_statistics(self, dataset): """ @@ -1149,7 +1197,7 @@ def generate_reports(self, stats): return reports -def validate_annotations(dataset: IDataset, task_type: Union[str, TaskType]): +def validate_annotations(dataset: IDataset, task_type: Union[str, TaskType], **extra_args): """ Returns the validation results of a dataset based on task type. @@ -1167,15 +1215,30 @@ def validate_annotations(dataset: IDataset, task_type: Union[str, TaskType]): """ + few_samples_thr = extra_args['few_samples_thr'] + imbalance_ratio_thr = extra_args['imbalance_ratio_thr'] + far_from_mean_thr = extra_args['far_from_mean_thr'] + dominance_ratio_thr = extra_args['dominance_ratio_thr'] + topk_bins = extra_args['topk_bins'] + validation_results = {} task_type = parse_str_enum_value(task_type, TaskType) if task_type == TaskType.classification: - validator = ClassificationValidator() + validator = ClassificationValidator(few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) elif task_type == TaskType.detection: - validator = DetectionValidator() + validator = DetectionValidator(few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) elif task_type == TaskType.segmentation: - validator = SegmentationValidator() + validator = SegmentationValidator(few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) if not isinstance(dataset, IDataset): raise TypeError("Invalid dataset type '%s'" % type(dataset)) diff --git a/tests/test_validator.py b/tests/test_validator.py index 7229efe741..a2eb2559fc 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -113,7 +113,9 @@ def setUpClass(cls): class TestBaseValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = _Validator(TaskType.classification) + cls.validator = _Validator(task_type=TaskType.classification, few_samples_thr=1, + imbalance_ratio_thr=50, far_from_mean_thr=5.0, + dominance_ratio_thr=0.8, topk_bins=0.1) def test_generate_reports(self): with self.assertRaises(NotImplementedError): @@ -233,7 +235,7 @@ def test_check_few_samples_in_label(self): stats = { 'label_distribution': { 'defined_labels': { - 'unit': self.validator.DEFAULT_FEW_SAMPLES + 'unit': self.validator.few_samples_thr } } } @@ -247,7 +249,7 @@ def test_check_few_samples_in_label(self): stats = { 'label_distribution': { 'defined_labels': { - 'unit': self.validator.DEFAULT_FEW_SAMPLES + 1 + 'unit': self.validator.few_samples_thr + 1 } } } @@ -263,7 +265,7 @@ def test_check_few_samples_in_attribute(self): with self.subTest('Few Samples'): attr_dets = { 'distribution': { - 'mock': self.validator.DEFAULT_FEW_SAMPLES + 'mock': self.validator.few_samples_thr } } @@ -276,7 +278,7 @@ def test_check_few_samples_in_attribute(self): with self.subTest('No Few Samples Warning'): attr_dets = { 'distribution': { - 'mock': self.validator.DEFAULT_FEW_SAMPLES + 1 + 'mock': self.validator.few_samples_thr + 1 } } @@ -290,7 +292,7 @@ def test_check_imbalanced_labels(self): stats = { 'label_distribution': { 'defined_labels': { - 'unit': self.validator.DEFAULT_IMBALANCE_RATIO, + 'unit': self.validator.imbalance_ratio_thr, 'test': 1 } } @@ -305,7 +307,7 @@ def test_check_imbalanced_labels(self): stats = { 'label_distribution': { 'defined_labels': { - 'unit': self.validator.DEFAULT_IMBALANCE_RATIO - 1, + 'unit': self.validator.imbalance_ratio_thr - 1, 'test': 1 } } @@ -322,7 +324,7 @@ def test_check_imbalanced_attribute(self): with self.subTest('Imbalance'): attr_dets = { 'distribution': { - 'mock': self.validator.DEFAULT_IMBALANCE_RATIO, + 'mock': self.validator.imbalance_ratio_thr, 'mock_1': 1 } } @@ -336,7 +338,7 @@ def test_check_imbalanced_attribute(self): with self.subTest('No Imbalance Warning'): attr_dets = { 'distribution': { - 'mock': self.validator.DEFAULT_IMBALANCE_RATIO - 1, + 'mock': self.validator.imbalance_ratio_thr - 1, 'mock_1': 1 } } @@ -350,7 +352,9 @@ def test_check_imbalanced_attribute(self): class TestClassificationValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = ClassificationValidator() + cls.validator = ClassificationValidator(few_samples_thr=1, imbalance_ratio_thr=50, + far_from_mean_thr=5.0, dominance_ratio_thr=0.8, + topk_bins=0.1) def test_check_missing_label_annotation(self): stats = { @@ -376,11 +380,13 @@ def test_check_multi_label_annotations(self): class TestDetectionValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = DetectionValidator() + cls.validator = DetectionValidator(few_samples_thr=1, imbalance_ratio_thr=50, + far_from_mean_thr=5.0, dominance_ratio_thr=0.8, + topk_bins=0.1) def test_check_imbalanced_dist_in_label(self): label_name = 'unittest' - most = int(self.validator.DEFAULT_DOMINANCE_RATIO * 100) + most = int(self.validator.dominance_thr * 100) rest = 100 - most with self.subTest('Imbalanced'): @@ -413,7 +419,7 @@ def test_check_imbalanced_dist_in_label(self): def test_check_imbalanced_dist_in_attr(self): label_name = 'unit' attr_name = 'test' - most = int(self.validator.DEFAULT_DOMINANCE_RATIO * 100) + most = int(self.validator.dominance_thr * 100) rest = 100 - most with self.subTest('Imbalanced'): @@ -534,11 +540,13 @@ def test_check_far_from_attr_mean(self): class TestSegmentationValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = SegmentationValidator() + cls.validator = SegmentationValidator(few_samples_thr=1, imbalance_ratio_thr=50, + far_from_mean_thr=5.0, dominance_ratio_thr=0.8, + topk_bins=0.1) def test_check_imbalanced_dist_in_label(self): label_name = 'unittest' - most = int(self.validator.DEFAULT_DOMINANCE_RATIO * 100) + most = int(self.validator.dominance_thr * 100) rest = 100 - most with self.subTest('Imbalanced'): @@ -571,7 +579,7 @@ def test_check_imbalanced_dist_in_label(self): def test_check_imbalanced_dist_in_attr(self): label_name = 'unit' attr_name = 'test' - most = int(self.validator.DEFAULT_DOMINANCE_RATIO * 100) + most = int(self.validator.dominance_thr * 100) rest = 100 - most with self.subTest('Imbalanced'): @@ -674,8 +682,15 @@ def test_check_far_from_attr_mean(self): class TestValidateAnnotations(TestValidatorTemplate): + extra_args = { + 'few_samples_thr': 1, + 'imbalance_ratio_thr': 50, + 'far_from_mean_thr': 5.0, + 'dominance_ratio_thr': 0.8, + 'topk_bins': 0.1, + } def test_validate_annotations_classification(self): - actual_results = validate_annotations(self.dataset, 'classification') + actual_results = validate_annotations(self.dataset, 'classification', **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -730,7 +745,7 @@ def test_validate_annotations_classification(self): self.assertEqual(actual_summary, expected_summary) def test_validate_annotations_detection(self): - actual_results = validate_annotations(self.dataset, 'detection') + actual_results = validate_annotations(self.dataset, 'detection', **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -783,7 +798,7 @@ def test_validate_annotations_detection(self): self.assertEqual(actual_summary, expected_summary) def test_validate_annotations_segmentation(self): - actual_results = validate_annotations(self.dataset, 'segmentation') + actual_results = validate_annotations(self.dataset, 'segmentation', **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -838,8 +853,8 @@ def test_validate_annotations_segmentation(self): def test_validate_annotations_invalid_task_type(self): with self.assertRaises(ValueError): - validate_annotations(self.dataset, 'INVALID') + validate_annotations(self.dataset, 'INVALID', **self.extra_args) def test_validate_annotations_invalid_dataset_type(self): with self.assertRaises(TypeError): - validate_annotations(object(), 'classification') + validate_annotations(object(), 'classification', **self.extra_args) From 3d7f2d6d49251ce2b2de49e053f41654b8f8b63f Mon Sep 17 00:00:00 2001 From: woos Date: Mon, 24 May 2021 13:44:18 +0900 Subject: [PATCH 2/5] Add Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a256c4877c..c4ef0af264 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support for MNIST and MNIST in CSV dataset formats () - Documentation file for COCO format () - Documentation file and integration tests for YOLO format () +- Support for Validator configurable threshold() ### Changed - LabelMe format saves dataset items with their relative paths by subsets without changing names () From 45b3aaad8e4e824d96c29d67bcde2bc2d2c17422 Mon Sep 17 00:00:00 2001 From: woos Date: Thu, 27 May 2021 01:10:40 +0900 Subject: [PATCH 3/5] validator-threshold-support --- datumaro/cli/contexts/project/__init__.py | 2 +- datumaro/components/validator.py | 80 +++++++++++++---------- tests/test_validator.py | 31 +++++---- 3 files changed, 65 insertions(+), 48 deletions(-) diff --git a/datumaro/cli/contexts/project/__init__.py b/datumaro/cli/contexts/project/__init__.py index 528d643914..2260edcbc2 100644 --- a/datumaro/cli/contexts/project/__init__.py +++ b/datumaro/cli/contexts/project/__init__.py @@ -677,7 +677,7 @@ def transform_command(args): extra_args = {} if hasattr(transform, 'parse_cmdline'): extra_args = transform.parse_cmdline(args.extra_args) - + log.info("Loading the project...") dataset = project.make_dataset() diff --git a/datumaro/components/validator.py b/datumaro/components/validator.py index 9b319dbe16..1b2d78fe84 100644 --- a/datumaro/components/validator.py +++ b/datumaro/components/validator.py @@ -63,23 +63,25 @@ class _Validator(CliPlugin): def build_cmdline_parser(cls, **kwargs): parser = super().build_cmdline_parser(**kwargs) parser.add_argument('-fs', '--few_samples_thr', default=1, type=int, - help="Threshold for giving a warning for minimum number of samples per class") + help="Threshold for giving a warning for minimum number of" + "samples per class") parser.add_argument('-ir', '--imbalance_ratio_thr', default=50, type=int, - help="Threshold for giving data imbalance warning; IR(imbalance ratio) = majority/minority") + help="Threshold for giving data imbalance warning;" + "IR(imbalance ratio) = majority/minority") parser.add_argument('-m', '--far_from_mean_thr', default=5.0, type=float, - help="Threshold for giving a warning that data is far from mean;" + help="Threshold for giving a warning that data is far from mean;" "A constant used to define mean +/- k * standard deviation;") parser.add_argument('-dr', '--dominance_ratio_thr', default=0.8, type=float, - help="Threshold for giving a warning for bounding box imbalance;" + help="Threshold for giving a warning for bounding box imbalance;" "Dominace_ratio = ratio of Top-k bin to total in histogram;") parser.add_argument('-k', '--topk_bins', default=0.1, type=float, - help="Ratio of bins with the highest number of data to total bins in the histogram;" - "[0, 1]; 0.1 = 10%;") + help="Ratio of bins with the highest number of data" + "to total bins in the histogram; [0, 1]; 0.1 = 10%;") return parser - def __init__(self, task_type, few_samples_thr=None, - imbalance_ratio_thr=None, far_from_mean_thr=None, - dominance_ratio_thr=None, topk_bins=None, ): + def __init__(self, task_type, few_samples_thr=None, + imbalance_ratio_thr=None, far_from_mean_thr=None, + dominance_ratio_thr=None, topk_bins=None): """ Validator @@ -573,12 +575,13 @@ class ClassificationValidator(_Validator): A validator class for classification tasks. """ - def __init__(self, few_samples_thr, imbalance_ratio_thr, - far_from_mean_thr, dominance_ratio_thr, topk_bins): - super().__init__(task_type=TaskType.classification, few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.classification, + few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) def _check_multi_label_annotations(self, stats): validation_reports = [] @@ -676,12 +679,13 @@ class DetectionValidator(_Validator): """ A validator class for detection tasks. """ - def __init__(self, few_samples_thr, imbalance_ratio_thr, - far_from_mean_thr, dominance_ratio_thr, topk_bins): - super().__init__(task_type=TaskType.detection, few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.detection, + few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) def _check_negative_length(self, stats): validation_reports = [] @@ -961,12 +965,13 @@ class SegmentationValidator(_Validator): A validator class for (instance) segmentation tasks. """ - def __init__(self, few_samples_thr, imbalance_ratio_thr, - far_from_mean_thr, dominance_ratio_thr, topk_bins): - super().__init__(task_type=TaskType.segmentation, few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + def __init__(self, few_samples_thr, imbalance_ratio_thr, + far_from_mean_thr, dominance_ratio_thr, topk_bins): + super().__init__(task_type=TaskType.segmentation, + few_samples_thr=few_samples_thr, + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) def compute_statistics(self, dataset): """ @@ -1225,20 +1230,23 @@ def validate_annotations(dataset: IDataset, task_type: Union[str, TaskType], **e task_type = parse_str_enum_value(task_type, TaskType) if task_type == TaskType.classification: - validator = ClassificationValidator(few_samples_thr=few_samples_thr, + validator = ClassificationValidator(few_samples_thr=few_samples_thr, imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, + topk_bins=topk_bins) elif task_type == TaskType.detection: - validator = DetectionValidator(few_samples_thr=few_samples_thr, + validator = DetectionValidator(few_samples_thr=few_samples_thr, imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, + topk_bins=topk_bins) elif task_type == TaskType.segmentation: - validator = SegmentationValidator(few_samples_thr=few_samples_thr, + validator = SegmentationValidator(few_samples_thr=few_samples_thr, imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, topk_bins=topk_bins) + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, + topk_bins=topk_bins) if not isinstance(dataset, IDataset): raise TypeError("Invalid dataset type '%s'" % type(dataset)) diff --git a/tests/test_validator.py b/tests/test_validator.py index a2eb2559fc..8ed953c85a 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -113,8 +113,8 @@ def setUpClass(cls): class TestBaseValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = _Validator(task_type=TaskType.classification, few_samples_thr=1, - imbalance_ratio_thr=50, far_from_mean_thr=5.0, + cls.validator = _Validator(task_type=TaskType.classification, few_samples_thr=1, + imbalance_ratio_thr=50, far_from_mean_thr=5.0, dominance_ratio_thr=0.8, topk_bins=0.1) def test_generate_reports(self): @@ -352,8 +352,10 @@ def test_check_imbalanced_attribute(self): class TestClassificationValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = ClassificationValidator(few_samples_thr=1, imbalance_ratio_thr=50, - far_from_mean_thr=5.0, dominance_ratio_thr=0.8, + cls.validator = ClassificationValidator(few_samples_thr=1, + imbalance_ratio_thr=50, + far_from_mean_thr=5.0, + dominance_ratio_thr=0.8, topk_bins=0.1) def test_check_missing_label_annotation(self): @@ -380,8 +382,10 @@ def test_check_multi_label_annotations(self): class TestDetectionValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = DetectionValidator(few_samples_thr=1, imbalance_ratio_thr=50, - far_from_mean_thr=5.0, dominance_ratio_thr=0.8, + cls.validator = DetectionValidator(few_samples_thr=1, + imbalance_ratio_thr=50, + far_from_mean_thr=5.0, + dominance_ratio_thr=0.8, topk_bins=0.1) def test_check_imbalanced_dist_in_label(self): @@ -540,8 +544,10 @@ def test_check_far_from_attr_mean(self): class TestSegmentationValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = SegmentationValidator(few_samples_thr=1, imbalance_ratio_thr=50, - far_from_mean_thr=5.0, dominance_ratio_thr=0.8, + cls.validator = SegmentationValidator(few_samples_thr=1, + imbalance_ratio_thr=50, + far_from_mean_thr=5.0, + dominance_ratio_thr=0.8, topk_bins=0.1) def test_check_imbalanced_dist_in_label(self): @@ -690,7 +696,8 @@ class TestValidateAnnotations(TestValidatorTemplate): 'topk_bins': 0.1, } def test_validate_annotations_classification(self): - actual_results = validate_annotations(self.dataset, 'classification', **self.extra_args) + actual_results = validate_annotations(self.dataset, 'classification', + **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -745,7 +752,8 @@ def test_validate_annotations_classification(self): self.assertEqual(actual_summary, expected_summary) def test_validate_annotations_detection(self): - actual_results = validate_annotations(self.dataset, 'detection', **self.extra_args) + actual_results = validate_annotations(self.dataset, 'detection', + **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -798,7 +806,8 @@ def test_validate_annotations_detection(self): self.assertEqual(actual_summary, expected_summary) def test_validate_annotations_segmentation(self): - actual_results = validate_annotations(self.dataset, 'segmentation', **self.extra_args) + actual_results = validate_annotations(self.dataset, 'segmentation', + **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] From 0b56e9169ad152bf4cb9dc318368c451558e5886 Mon Sep 17 00:00:00 2001 From: woos Date: Thu, 27 May 2021 09:29:28 +0900 Subject: [PATCH 4/5] validator-thr-support --- datumaro/cli/contexts/project/__init__.py | 2 +- tests/test_validator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datumaro/cli/contexts/project/__init__.py b/datumaro/cli/contexts/project/__init__.py index 2260edcbc2..ff4dfb10bd 100644 --- a/datumaro/cli/contexts/project/__init__.py +++ b/datumaro/cli/contexts/project/__init__.py @@ -824,7 +824,7 @@ def validate_command(args): if subset_name is not None: dataset = dataset.get_subset(subset_name) dst_file_name += f'-{subset_name}' - + extra_args = {} from datumaro.components.validator import _Validator extra_args = _Validator.parse_cmdline(args.extra_args) diff --git a/tests/test_validator.py b/tests/test_validator.py index 8ed953c85a..084c2bc8aa 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -352,7 +352,7 @@ def test_check_imbalanced_attribute(self): class TestClassificationValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = ClassificationValidator(few_samples_thr=1, + cls.validator = ClassificationValidator(few_samples_thr=1, imbalance_ratio_thr=50, far_from_mean_thr=5.0, dominance_ratio_thr=0.8, From 2b7b621aedf36ec45a34eab737ce41bd705785ff Mon Sep 17 00:00:00 2001 From: Maxim Zhiltsov Date: Thu, 27 May 2021 13:56:42 +0300 Subject: [PATCH 5/5] fix formatting --- datumaro/components/validator.py | 24 ++++++------- tests/test_validator.py | 58 ++++++++++++++------------------ 2 files changed, 38 insertions(+), 44 deletions(-) diff --git a/datumaro/components/validator.py b/datumaro/components/validator.py index 1b2d78fe84..1e910029f8 100644 --- a/datumaro/components/validator.py +++ b/datumaro/components/validator.py @@ -1231,22 +1231,22 @@ def validate_annotations(dataset: IDataset, task_type: Union[str, TaskType], **e task_type = parse_str_enum_value(task_type, TaskType) if task_type == TaskType.classification: validator = ClassificationValidator(few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, - topk_bins=topk_bins) + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, + topk_bins=topk_bins) elif task_type == TaskType.detection: validator = DetectionValidator(few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, - topk_bins=topk_bins) + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, + topk_bins=topk_bins) elif task_type == TaskType.segmentation: validator = SegmentationValidator(few_samples_thr=few_samples_thr, - imbalance_ratio_thr=imbalance_ratio_thr, - far_from_mean_thr=far_from_mean_thr, - dominance_ratio_thr=dominance_ratio_thr, - topk_bins=topk_bins) + imbalance_ratio_thr=imbalance_ratio_thr, + far_from_mean_thr=far_from_mean_thr, + dominance_ratio_thr=dominance_ratio_thr, + topk_bins=topk_bins) if not isinstance(dataset, IDataset): raise TypeError("Invalid dataset type '%s'" % type(dataset)) diff --git a/tests/test_validator.py b/tests/test_validator.py index 084c2bc8aa..041e7e66b9 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -31,11 +31,11 @@ def setUpClass(cls): 'a': 1, 'b': 2, }), Mask(id=2, label=0, attributes={'a': 1, 'b': 2}, - image=np.array([[0, 0, 0, 0, 0], - [0, 0, 1, 1, 1], - [0, 0, 1, 1, 1], - [0, 0, 1, 1, 1], - [0, 0, 1, 1, 1], + image=np.array([[0, 0, 0, 0, 0], + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], ])), ]), DatasetItem(id=2, image=np.ones((2, 4, 3)), annotations=[ @@ -79,10 +79,10 @@ def setUpClass(cls): 'a': 2, 'b': 2, }), Mask(id=2, label=1, attributes={'a': 2, 'b': 2}, - image=np.array([[1, 0, 0], - [1, 0, 0], - [1, 0, 0], - [1, 0, 0], + image=np.array([[1, 0, 0], + [1, 0, 0], + [1, 0, 0], + [1, 0, 0], ])), ]), DatasetItem(id=7, image=np.ones((2, 4, 3)), annotations=[ @@ -91,7 +91,7 @@ def setUpClass(cls): 'a': 1, 'b': 2, }), Polygon([1, 2, 1, 5, 5, 5, 5, 2], label=2, id=2, - attributes={'a': 1, 'b': 2, + attributes={'a': 1, 'b': 2, }), ]), DatasetItem(id=8, image=np.ones((2, 4, 3)), annotations=[ @@ -100,10 +100,10 @@ def setUpClass(cls): 'a': 2, 'b': 1, }), Mask(id=2, label=2, attributes={'a': 2, 'b': 1}, - image=np.array([[1, 1, 1], - [1, 1, 1], - [1, 1, 1], - [1, 1, 1], + image=np.array([[1, 1, 1], + [1, 1, 1], + [1, 1, 1], + [1, 1, 1], ])), ]), ], categories=[[f'label_{i}', None, {'a', 'b', }] @@ -113,9 +113,9 @@ def setUpClass(cls): class TestBaseValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): - cls.validator = _Validator(task_type=TaskType.classification, few_samples_thr=1, - imbalance_ratio_thr=50, far_from_mean_thr=5.0, - dominance_ratio_thr=0.8, topk_bins=0.1) + cls.validator = _Validator(task_type=TaskType.classification, + few_samples_thr=1, imbalance_ratio_thr=50, far_from_mean_thr=5.0, + dominance_ratio_thr=0.8, topk_bins=0.1) def test_generate_reports(self): with self.assertRaises(NotImplementedError): @@ -353,10 +353,8 @@ class TestClassificationValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): cls.validator = ClassificationValidator(few_samples_thr=1, - imbalance_ratio_thr=50, - far_from_mean_thr=5.0, - dominance_ratio_thr=0.8, - topk_bins=0.1) + imbalance_ratio_thr=50, far_from_mean_thr=5.0, + dominance_ratio_thr=0.8, topk_bins=0.1) def test_check_missing_label_annotation(self): stats = { @@ -383,10 +381,8 @@ class TestDetectionValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): cls.validator = DetectionValidator(few_samples_thr=1, - imbalance_ratio_thr=50, - far_from_mean_thr=5.0, - dominance_ratio_thr=0.8, - topk_bins=0.1) + imbalance_ratio_thr=50, far_from_mean_thr=5.0, + dominance_ratio_thr=0.8, topk_bins=0.1) def test_check_imbalanced_dist_in_label(self): label_name = 'unittest' @@ -545,10 +541,8 @@ class TestSegmentationValidator(TestValidatorTemplate): @classmethod def setUpClass(cls): cls.validator = SegmentationValidator(few_samples_thr=1, - imbalance_ratio_thr=50, - far_from_mean_thr=5.0, - dominance_ratio_thr=0.8, - topk_bins=0.1) + imbalance_ratio_thr=50, far_from_mean_thr=5.0, + dominance_ratio_thr=0.8, topk_bins=0.1) def test_check_imbalanced_dist_in_label(self): label_name = 'unittest' @@ -697,7 +691,7 @@ class TestValidateAnnotations(TestValidatorTemplate): } def test_validate_annotations_classification(self): actual_results = validate_annotations(self.dataset, 'classification', - **self.extra_args) + **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -753,7 +747,7 @@ def test_validate_annotations_classification(self): def test_validate_annotations_detection(self): actual_results = validate_annotations(self.dataset, 'detection', - **self.extra_args) + **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics'] @@ -807,7 +801,7 @@ def test_validate_annotations_detection(self): def test_validate_annotations_segmentation(self): actual_results = validate_annotations(self.dataset, 'segmentation', - **self.extra_args) + **self.extra_args) with self.subTest('Test of statistics', i=0): actual_stats = actual_results['statistics']