From 9df178897c63d165e3ee786efb10ae6950a9d69a Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Thu, 2 Jun 2022 15:09:40 +0200 Subject: [PATCH] refactor: various minor cleanups --- examples/flight_delays.py | 6 ++-- popmon/alerting/compute_tl_bounds.py | 4 +-- popmon/pipeline/report.py | 36 +++++++++++-------- popmon/visualization/section_generator.py | 2 +- .../traffic_light_section_generator.py | 1 - popmon/visualization/utils.py | 1 - 6 files changed, 28 insertions(+), 22 deletions(-) diff --git a/examples/flight_delays.py b/examples/flight_delays.py index 103871bb..ea9ef1ba 100644 --- a/examples/flight_delays.py +++ b/examples/flight_delays.py @@ -1,6 +1,6 @@ import pandas as pd -import popmon # noqa +import popmon from popmon import resources # open synthetic data @@ -10,7 +10,9 @@ # generate stability report using automatic binning of all encountered features # (importing popmon automatically adds this functionality to a dataframe) -report = df.pm_stability_report( +report = popmon.df_stability_report( + df, + reference_type="self", time_axis="DATE", time_width="1w", time_offset="2015-07-02", diff --git a/popmon/alerting/compute_tl_bounds.py b/popmon/alerting/compute_tl_bounds.py index 88d7de38..961e1601 100644 --- a/popmon/alerting/compute_tl_bounds.py +++ b/popmon/alerting/compute_tl_bounds.py @@ -287,7 +287,7 @@ def pull_bounds( required = [m + suffix_mean, m + suffix_std] assert all(r in row for r in required) - x = pd.Series() + x = {} for m in cols: x[m + "_red_high"] = np.nan x[m + "_yellow_high"] = np.nan @@ -300,7 +300,7 @@ def pull_bounds( x[m + "_yellow_high"] = row[m + suffix_mean] + row[m + suffix_std] * yellow_high x[m + "_yellow_low"] = row[m + suffix_mean] + row[m + suffix_std] * yellow_low x[m + "_red_low"] = row[m + suffix_mean] + row[m + suffix_std] * red_low - return x + return pd.Series(x) def df_single_op_pull_bounds( diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py index 87e22ff0..df259466 100644 --- a/popmon/pipeline/report.py +++ b/popmon/pipeline/report.py @@ -161,8 +161,25 @@ def stability_report( pipeline = get_report_pipeline_class(reference_type, reference)(**cfg) result = pipeline.transform(datastore) - stability_report = StabilityReport(datastore=result) - return stability_report + stability_report_result = StabilityReport(datastore=result) + return stability_report_result + + +def set_time_axis(df): + time_axes = get_time_axes(df) + num = len(time_axes) + if num == 1: + time_axis = time_axes[0] + logger.info(f'Time-axis automatically set to "{time_axis}"') + elif num == 0: + raise ValueError( + "No obvious time-axes found. Cannot generate stability report." + ) + else: + raise ValueError( + f"Found {num} time-axes: {time_axes}. Set *one* time_axis manually!" + ) + return time_axis def df_stability_report( @@ -298,19 +315,8 @@ def df_stability_report( f'time_axis "{time_axis}" not found in columns of reference dataframe.' ) if isinstance(time_axis, bool): - time_axes = get_time_axes(df) - num = len(time_axes) - if num == 1: - time_axis = time_axes[0] - logger.info(f'Time-axis automatically set to "{time_axis}"') - elif num == 0: - raise ValueError( - "No obvious time-axes found. Cannot generate stability report." - ) - else: - raise ValueError( - f"Found {num} time-axes: {time_axes}. Set *one* time_axis manually!" - ) + time_axis = set_time_axis(df) + if features is not None: # by now time_axis is defined. ensure that all histograms start with it. if not isinstance(features, list): diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py index f06fbc10..fc9f9eb6 100644 --- a/popmon/visualization/section_generator.py +++ b/popmon/visualization/section_generator.py @@ -190,7 +190,7 @@ def _plot_metric( ): """Split off plot histogram generation to allow for parallel processing""" # pick up static traffic light boundaries - name = feature + ":" + metric + name = f"{feature}:{metric}" sbounds = static_bounds.get(name, ()) # pick up dynamic traffic light boundaries names = [prefix + metric + suffix for suffix in suffices] diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py index 5ff1af12..2f7470eb 100644 --- a/popmon/visualization/traffic_light_section_generator.py +++ b/popmon/visualization/traffic_light_section_generator.py @@ -205,7 +205,6 @@ def _plot_metrics( if len(values) > 0: values = np.stack(values) - # make plot. note: slow! if style == "heatmap": plot = plot_traffic_lights_overview( feature, values, metrics=nonempty_metrics, labels=dates diff --git a/popmon/visualization/utils.py b/popmon/visualization/utils.py index 25f5194b..52e3c199 100644 --- a/popmon/visualization/utils.py +++ b/popmon/visualization/utils.py @@ -485,7 +485,6 @@ def xtick(lab): lab = lab[:17] + "..." return lab - # plt.xlim((0.0, float(len(date)))) plt.xticks(tick_pos_x, date, fontsize=20, rotation=90) plt.yticks(tick_pos_y, [xtick(lab) for lab in labels], fontsize=20) im_ratio = values.shape[0] / values.shape[1]