Skip to content

Commit

Permalink
fix: use the same set of default stats
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed Jun 14, 2022
1 parent 997eb3e commit d3bb81c
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 43 deletions.
16 changes: 9 additions & 7 deletions popmon/analysis/comparison/hist_comparer.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,13 @@ def hist_compare(row, hist_name1="", hist_name2=""):
entries_list = get_consistent_numpy_entries([hist1, hist2])
if is_numeric(hist1):
htype = "num"
args = entries_list
else:
htype = "cat"
args = [hist1, hist2]

for key, func in Comparisons.get_comparisons(dim=1, htype=htype).items():
results = func(*entries_list)
results = func(*args)
if len(key) == 1:
results = (results,)
for k, v in zip(key, results):
Expand All @@ -98,12 +100,12 @@ def hist_compare(row, hist_name1="", hist_name2=""):
numpy_ndgrids = get_consistent_numpy_ndgrids([hist1, hist2], dim=hist1.n_dim)
entries_list = [entry.flatten() for entry in numpy_ndgrids]

for key, func in Comparisons.get_comparisons(dim=(2,)).items():
results = func(*entries_list)
if len(key) == 1:
results = (results,)
for k, v in zip(key, results):
x[k] = v
for key, func in Comparisons.get_comparisons(dim=(2,)).items():
results = func(*entries_list)
if len(key) == 1:
results = (results,)
for k, v in zip(key, results):
x[k] = v

for key, func in Comparisons.get_comparisons(dim=-1).items():
results = func(*entries_list)
Expand Down
44 changes: 15 additions & 29 deletions popmon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from typing import Optional, Union

from pydantic import BaseModel, BaseSettings
from pydantic.fields import Field
from typing_extensions import Literal

# Global configuration for the joblib parallelization. Could be used to change the number of jobs, and/or change
Expand Down Expand Up @@ -103,12 +102,18 @@ class Section(BaseModel):
traffic_lights: TrafficLightsSection = TrafficLightsSection()


def get_stats():
from popmon.analysis.comparison.comparison_registry import Comparisons

comparisons = Comparisons.get_descriptions()
class Report(BaseModel):
"""Report-specific configuration"""

stats = [
skip_empty_plots: bool = True
last_n: int = 0
skip_first_n: int = 0
skip_last_n: int = 0
report_filepath: Optional[Union[str, Path]] = None
# if set to false, then smaller show_stats
extended_report: bool = True
# if limited report is selected, check if stats list is provided, if not, get a default minimal list
show_stats: list[str] = [
"distinct*",
"filled*",
"nan*",
Expand All @@ -122,31 +127,12 @@ def get_stats():
"fraction_true*",
"phik*",
"*unknown_labels*",
"*chi2_norm*",
"*ks*",
"*zscore*",
"*zscore",
"n_*",
"*jsd*",
"*psi*",
"*max_prob_diff*",
]

for key in comparisons.keys():
stats.append(f"*{key}*")

return stats


class Report(BaseModel):
"""Report-specific configuration"""

skip_empty_plots: bool = True
last_n: int = 0
skip_first_n: int = 0
skip_last_n: int = 0
report_filepath: Optional[Union[str, Path]] = None
# if set to false, then smaller show_stats
# if limited report is selected, check if stats list is provided, if not, get a default minimal list
# show_stats = show_stats if not extended_report else None
extended_report: bool = True
show_stats: list[str] = Field(default_factory=get_stats)
section: Section = Section()
top_n: int = 20

Expand Down
2 changes: 1 addition & 1 deletion popmon/visualization/alert_section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(
self.skip_first_n = settings.skip_first_n
self.skip_last_n = settings.skip_last_n
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats
self.show_stats = settings.show_stats if not settings.extended_report else None

self.section_name = settings.section.alerts.name
self.description = settings.section.alerts.description
Expand Down
2 changes: 1 addition & 1 deletion popmon/visualization/overview_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def __init__(
self.suffices = suffices
self.ignore_stat_endswith = ignore_stat_endswith or []
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats
self.show_stats = settings.show_stats if not settings.extended_report else None
self.section_name = settings.section.overview.name
self.description = settings.section.overview.description

Expand Down
2 changes: 1 addition & 1 deletion popmon/visualization/section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def __init__(
self.ignore_stat_endswith = ignore_stat_endswith or []
self.skip_empty_plots = settings.skip_empty_plots
self.description = description
self.show_stats = settings.show_stats
self.show_stats = settings.show_stats if not settings.extended_report else None

def get_description(self):
return self.section_name
Expand Down
2 changes: 1 addition & 1 deletion popmon/visualization/traffic_light_section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def __init__(
self.suffices = suffices
self.ignore_stat_endswith = ignore_stat_endswith or []
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats
self.show_stats = settings.show_stats if not settings.extended_report else None

self.section_name = settings.section.traffic_lights.name
self.description = settings.section.traffic_lights.description
Expand Down
8 changes: 5 additions & 3 deletions tests/popmon/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from popmon.config import get_stats
from popmon.config import Report
from popmon.utils import filter_metrics


def test_filter_metrics():
stats = get_stats()
settings = Report()

metrics = [
"distinct_pull",
"filled_pull",
Expand Down Expand Up @@ -39,5 +40,6 @@ def test_filter_metrics():
"ref_max_prob_diff",
]
assert (
filter_metrics(metrics, ignore_stat_endswith=[], show_stats=stats) == expected
filter_metrics(metrics, ignore_stat_endswith=[], show_stats=settings.show_stats)
== expected
)

0 comments on commit d3bb81c

Please sign in to comment.