From d3bb81cfda2b99658bc288f099e0c73a8336b91e Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Tue, 14 Jun 2022 16:15:02 +0200 Subject: [PATCH] fix: use the same set of default stats --- popmon/analysis/comparison/hist_comparer.py | 16 ++++--- popmon/config.py | 44 +++++++------------ .../visualization/alert_section_generator.py | 2 +- popmon/visualization/overview_section.py | 2 +- popmon/visualization/section_generator.py | 2 +- .../traffic_light_section_generator.py | 2 +- tests/popmon/test_utils.py | 8 ++-- 7 files changed, 33 insertions(+), 43 deletions(-) diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py index f0312c88..3c9474ee 100644 --- a/popmon/analysis/comparison/hist_comparer.py +++ b/popmon/analysis/comparison/hist_comparer.py @@ -78,11 +78,13 @@ def hist_compare(row, hist_name1="", hist_name2=""): entries_list = get_consistent_numpy_entries([hist1, hist2]) if is_numeric(hist1): htype = "num" + args = entries_list else: htype = "cat" + args = [hist1, hist2] for key, func in Comparisons.get_comparisons(dim=1, htype=htype).items(): - results = func(*entries_list) + results = func(*args) if len(key) == 1: results = (results,) for k, v in zip(key, results): @@ -98,12 +100,12 @@ def hist_compare(row, hist_name1="", hist_name2=""): numpy_ndgrids = get_consistent_numpy_ndgrids([hist1, hist2], dim=hist1.n_dim) entries_list = [entry.flatten() for entry in numpy_ndgrids] - for key, func in Comparisons.get_comparisons(dim=(2,)).items(): - results = func(*entries_list) - if len(key) == 1: - results = (results,) - for k, v in zip(key, results): - x[k] = v + for key, func in Comparisons.get_comparisons(dim=(2,)).items(): + results = func(*entries_list) + if len(key) == 1: + results = (results,) + for k, v in zip(key, results): + x[k] = v for key, func in Comparisons.get_comparisons(dim=-1).items(): results = func(*entries_list) diff --git a/popmon/config.py b/popmon/config.py index 91880756..741bca43 100644 --- a/popmon/config.py +++ b/popmon/config.py @@ -20,7 +20,6 @@ from typing import Optional, Union from pydantic import BaseModel, BaseSettings -from pydantic.fields import Field from typing_extensions import Literal # Global configuration for the joblib parallelization. Could be used to change the number of jobs, and/or change @@ -103,12 +102,18 @@ class Section(BaseModel): traffic_lights: TrafficLightsSection = TrafficLightsSection() -def get_stats(): - from popmon.analysis.comparison.comparison_registry import Comparisons - - comparisons = Comparisons.get_descriptions() +class Report(BaseModel): + """Report-specific configuration""" - stats = [ + skip_empty_plots: bool = True + last_n: int = 0 + skip_first_n: int = 0 + skip_last_n: int = 0 + report_filepath: Optional[Union[str, Path]] = None + # if set to false, then smaller show_stats + extended_report: bool = True + # if limited report is selected, check if stats list is provided, if not, get a default minimal list + show_stats: list[str] = [ "distinct*", "filled*", "nan*", @@ -122,31 +127,12 @@ def get_stats(): "fraction_true*", "phik*", "*unknown_labels*", - "*chi2_norm*", - "*ks*", - "*zscore*", + "*zscore", "n_*", + "*jsd*", + "*psi*", + "*max_prob_diff*", ] - - for key in comparisons.keys(): - stats.append(f"*{key}*") - - return stats - - -class Report(BaseModel): - """Report-specific configuration""" - - skip_empty_plots: bool = True - last_n: int = 0 - skip_first_n: int = 0 - skip_last_n: int = 0 - report_filepath: Optional[Union[str, Path]] = None - # if set to false, then smaller show_stats - # if limited report is selected, check if stats list is provided, if not, get a default minimal list - # show_stats = show_stats if not extended_report else None - extended_report: bool = True - show_stats: list[str] = Field(default_factory=get_stats) section: Section = Section() top_n: int = 20 diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py index 40bc782c..94daa6b5 100644 --- a/popmon/visualization/alert_section_generator.py +++ b/popmon/visualization/alert_section_generator.py @@ -86,7 +86,7 @@ def __init__( self.skip_first_n = settings.skip_first_n self.skip_last_n = settings.skip_last_n self.skip_empty_plots = settings.skip_empty_plots - self.show_stats = settings.show_stats + self.show_stats = settings.show_stats if not settings.extended_report else None self.section_name = settings.section.alerts.name self.description = settings.section.alerts.description diff --git a/popmon/visualization/overview_section.py b/popmon/visualization/overview_section.py index a01ec471..46c6e31a 100644 --- a/popmon/visualization/overview_section.py +++ b/popmon/visualization/overview_section.py @@ -89,7 +89,7 @@ def __init__( self.suffices = suffices self.ignore_stat_endswith = ignore_stat_endswith or [] self.skip_empty_plots = settings.skip_empty_plots - self.show_stats = settings.show_stats + self.show_stats = settings.show_stats if not settings.extended_report else None self.section_name = settings.section.overview.name self.description = settings.section.overview.description diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py index 6a975d41..4dbd455c 100644 --- a/popmon/visualization/section_generator.py +++ b/popmon/visualization/section_generator.py @@ -127,7 +127,7 @@ def __init__( self.ignore_stat_endswith = ignore_stat_endswith or [] self.skip_empty_plots = settings.skip_empty_plots self.description = description - self.show_stats = settings.show_stats + self.show_stats = settings.show_stats if not settings.extended_report else None def get_description(self): return self.section_name diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py index 405b2998..187bd7b9 100644 --- a/popmon/visualization/traffic_light_section_generator.py +++ b/popmon/visualization/traffic_light_section_generator.py @@ -92,7 +92,7 @@ def __init__( self.suffices = suffices self.ignore_stat_endswith = ignore_stat_endswith or [] self.skip_empty_plots = settings.skip_empty_plots - self.show_stats = settings.show_stats + self.show_stats = settings.show_stats if not settings.extended_report else None self.section_name = settings.section.traffic_lights.name self.description = settings.section.traffic_lights.description diff --git a/tests/popmon/test_utils.py b/tests/popmon/test_utils.py index 5b99e12f..8e032d19 100644 --- a/tests/popmon/test_utils.py +++ b/tests/popmon/test_utils.py @@ -1,9 +1,10 @@ -from popmon.config import get_stats +from popmon.config import Report from popmon.utils import filter_metrics def test_filter_metrics(): - stats = get_stats() + settings = Report() + metrics = [ "distinct_pull", "filled_pull", @@ -39,5 +40,6 @@ def test_filter_metrics(): "ref_max_prob_diff", ] assert ( - filter_metrics(metrics, ignore_stat_endswith=[], show_stats=stats) == expected + filter_metrics(metrics, ignore_stat_endswith=[], show_stats=settings.show_stats) + == expected )