From 21059910f5afc15142ec115edac263ba7a6ac6b8 Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Thu, 16 Feb 2023 16:30:38 +0100 Subject: [PATCH] refactor: basic type hints mostly with `auto-typer` --- .../integrations/kibana/elastic_connector.py | 2 +- popmon/alerting/alerts_summary.py | 6 +- popmon/alerting/compute_tl_bounds.py | 58 ++++++++++--------- popmon/analysis/apply_func.py | 10 ++-- popmon/analysis/comparison/comparisons.py | 2 +- popmon/analysis/comparison/hist_comparer.py | 52 +++++++++-------- popmon/analysis/functions.py | 42 +++++++------- popmon/analysis/hist_numpy.py | 37 +++++++----- popmon/analysis/merge_statistics.py | 2 +- popmon/analysis/profiling/hist_profiler.py | 6 +- popmon/analysis/profiling/pull_calculator.py | 44 +++++++------- popmon/base/module.py | 15 ++--- popmon/base/pipeline.py | 8 +-- popmon/base/registry.py | 14 +++-- popmon/config.py | 4 +- popmon/extensions/extension.py | 4 +- popmon/extensions/profile_diptest.py | 2 +- popmon/extensions/utils.py | 0 popmon/hist/hist_splitter.py | 18 +++--- popmon/hist/hist_utils.py | 14 ++--- popmon/io/file_reader.py | 2 +- popmon/io/file_writer.py | 2 +- popmon/io/json_reader.py | 2 +- popmon/pipeline/amazing_pipeline.py | 4 +- popmon/pipeline/metrics.py | 5 +- popmon/pipeline/metrics_pipelines.py | 18 +++--- popmon/pipeline/report.py | 16 ++--- popmon/pipeline/report_pipelines.py | 12 ++-- popmon/pipeline/timing.py | 2 +- popmon/stats/numpy.py | 8 +-- popmon/stitching/hist_stitcher.py | 8 +-- popmon/utils.py | 2 +- .../visualization/alert_section_generator.py | 4 +- popmon/visualization/histogram_section.py | 6 +- popmon/visualization/overview_section.py | 4 +- popmon/visualization/report_generator.py | 4 +- popmon/visualization/section_generator.py | 12 ++-- .../traffic_light_section_generator.py | 6 +- popmon/visualization/utils.py | 17 +++--- tests/popmon/base/test_module.py | 2 +- tests/popmon/base/test_pipeline.py | 8 +-- 41 files changed, 254 insertions(+), 230 deletions(-) create mode 100644 popmon/extensions/utils.py diff --git a/examples/integrations/kibana/elastic_connector.py b/examples/integrations/kibana/elastic_connector.py index 3da1d2ac..a0ca9f5d 100644 --- a/examples/integrations/kibana/elastic_connector.py +++ b/examples/integrations/kibana/elastic_connector.py @@ -2,7 +2,7 @@ class ElasticConnector: - def __init__(self): + def __init__(self) -> None: self.host = "localhost" self.port = 9200 self.es = None diff --git a/popmon/alerting/alerts_summary.py b/popmon/alerting/alerts_summary.py index 126814cd..f82abc35 100644 --- a/popmon/alerting/alerts_summary.py +++ b/popmon/alerting/alerts_summary.py @@ -38,11 +38,11 @@ class AlertsSummary(Module): def __init__( self, read_key, - store_key="", + store_key: str = "", features=None, ignore_features=None, - combined_variable="_AGGREGATE_", - ): + combined_variable: str = "_AGGREGATE_", + ) -> None: """Initialize an instance of AlertsSummary module. :param str read_key: key of input data to read from datastore. diff --git a/popmon/alerting/compute_tl_bounds.py b/popmon/alerting/compute_tl_bounds.py index deea60ee..3b62f4ee 100644 --- a/popmon/alerting/compute_tl_bounds.py +++ b/popmon/alerting/compute_tl_bounds.py @@ -30,7 +30,7 @@ from popmon.base import Module, Pipeline -def traffic_light_summary(row, cols=None, prefix=""): +def traffic_light_summary(row, cols=None, prefix: str = ""): """Make a summary of traffic light alerts present in the dataframe Count number of green, yellow and red traffic lights. @@ -60,7 +60,9 @@ def traffic_light_summary(row, cols=None, prefix=""): return pd.Series(x) -def traffic_light(value, red_high, yellow_high, yellow_low=0, red_low=0): +def traffic_light( + value, red_high, yellow_high, yellow_low: int = 0, red_low: int = 0 +) -> int: """Get the corresponding traffic light given a value and traffic light bounds. :param float value: value to check @@ -120,17 +122,17 @@ def __init__( self, read_key, monitoring_rules=None, - store_key="", + store_key: str = "", features=None, ignore_features=None, - apply_funcs_key="", + apply_funcs_key: str = "", func=None, - metrics_wide=False, - prefix="traffic_light_", - suffix="", - entire=False, + metrics_wide: bool = False, + prefix: str = "traffic_light_", + suffix: str = "", + entire: bool = False, **kwargs, - ): + ) -> None: """Initialize an instance of TrafficLightBounds module. :param str read_key: key of input data to read from datastore @@ -171,7 +173,7 @@ def get_description(self): """Returns the traffic light function as description.""" return self.traffic_light_func.__name__ - def _set_traffic_lights(self, feature, cols, pattern, rule): + def _set_traffic_lights(self, feature, cols, pattern, rule) -> None: process_cols = fnmatch.filter(cols, pattern) for pcol in process_cols: @@ -250,10 +252,10 @@ def pull_bounds( row, red_high, yellow_high, - yellow_low=0, - red_low=0, - suffix_mean="_mean", - suffix_std="_std", + yellow_low: int = 0, + red_low: int = 0, + suffix_mean: str = "_mean", + suffix_std: str = "_std", cols=None, ): """Calculate traffic light pull bounds for list of cols @@ -307,10 +309,10 @@ def df_single_op_pull_bounds( df, red_high, yellow_high, - yellow_low=0, - red_low=0, - suffix_mean="_mean", - suffix_std="_std", + yellow_low: int = 0, + red_low: int = 0, + suffix_mean: str = "_mean", + suffix_std: str = "_std", cols=None, ): """Calculate traffic light pull bounds for list of cols on first row only @@ -341,10 +343,10 @@ def __init__( self, read_key, rules, - store_key="", - suffix_mean="_mean", - suffix_std="_std", - ): + store_key: str = "", + suffix_mean: str = "_mean", + suffix_std: str = "_std", + ) -> None: """Initialize an instance of DynamicTrafficLightBounds. :param str read_key: key of input data to read from data store, only used to extract feature list. @@ -385,10 +387,10 @@ def __init__( self, read_key, rules, - store_key="", - suffix_mean="_mean", - suffix_std="_std", - ): + store_key: str = "", + suffix_mean: str = "_mean", + suffix_std: str = "_std", + ) -> None: """Initialize an instance of StaticBounds. :param str read_key: key of input data to read from data store, only used to extract feature list. @@ -432,7 +434,9 @@ class TrafficLightAlerts(Pipeline): - Apply them to profiled test statistics data """ - def __init__(self, read_key, store_key, rules, expanded_rules_key=""): + def __init__( + self, read_key, store_key, rules, expanded_rules_key: str = "" + ) -> None: """Initialize an instance of TrafficLightBounds. :param str read_key: key of input data to read from data store, only used to extract feature list. diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py index 36aaa5b8..5fbcccff 100644 --- a/popmon/analysis/apply_func.py +++ b/popmon/analysis/apply_func.py @@ -40,14 +40,14 @@ class ApplyFunc(Module): def __init__( self, apply_to_key, - store_key="", - assign_to_key="", - apply_funcs_key="", + store_key: str = "", + assign_to_key: str = "", + apply_funcs_key: str = "", features=None, apply_funcs=None, metrics=None, - msg="", - ): + msg: str = "", + ) -> None: """Initialize an instance of ApplyFunc. :param str apply_to_key: key of the input data to apply funcs to. diff --git a/popmon/analysis/comparison/comparisons.py b/popmon/analysis/comparison/comparisons.py index 29cbb83b..df10d32f 100644 --- a/popmon/analysis/comparison/comparisons.py +++ b/popmon/analysis/comparison/comparisons.py @@ -168,7 +168,7 @@ def ks(p, q, *_): dim=1, htype="cat", ) -def unknown_labels(hist1, hist2): +def unknown_labels(hist1, hist2) -> bool: # check consistency of bin_labels labels1 = hist1.keySet labels2 = hist2.keySet diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py index ba069a9f..8adc272d 100644 --- a/popmon/analysis/comparison/hist_comparer.py +++ b/popmon/analysis/comparison/hist_comparer.py @@ -40,7 +40,7 @@ from popmon.hist.hist_utils import COMMON_HIST_TYPES, is_numeric -def hist_compare(row, hist_name1="", hist_name2=""): +def hist_compare(row, hist_name1: str = "", hist_name2: str = ""): """Function to compare two histograms Apply statistical tests to compare two input histograms, such as: @@ -107,11 +107,11 @@ def __init__( read_key, store_key, assign_to_key=None, - hist_col="histogram", - suffix="comp", + hist_col: str = "histogram", + suffix: str = "comp", *args, **kwargs, - ): + ) -> None: """Initialize an instance of RollingHistComparer. :param func_hist_collector: histogram collection function @@ -160,10 +160,10 @@ def __init__( read_key, store_key, window, - shift=1, - hist_col="histogram", - suffix="roll", - ): + shift: int = 1, + hist_col: str = "histogram", + suffix: str = "roll", + ) -> None: """Initialize an instance of RollingHistComparer. :param str read_key: key of input data to read from data store @@ -201,9 +201,9 @@ def __init__( self, read_key, store_key, - hist_col="histogram", - suffix="prev1", - ): + hist_col: str = "histogram", + suffix: str = "prev1", + ) -> None: """Initialize an instance of PreviousHistComparer. :param str read_key: key of input data to read from data store @@ -228,10 +228,10 @@ def __init__( self, read_key, store_key, - shift=1, - hist_col="histogram", - suffix="expanding", - ): + shift: int = 1, + hist_col: str = "histogram", + suffix: str = "expanding", + ) -> None: """Initialize an instance of ExpandingHistComparer. :param str read_key: key of input data to read from data store @@ -267,9 +267,9 @@ def __init__( reference_key, assign_to_key, store_key, - hist_col="histogram", - suffix="ref", - ): + hist_col: str = "histogram", + suffix: str = "ref", + ) -> None: """Initialize an instance of ReferenceHistComparer. :param str reference_key: key of input data to read from data store @@ -306,10 +306,10 @@ def __init__( read_key, store_key, assign_to_key=None, - hist_col="histogram", + hist_col: str = "histogram", *args, **kwargs, - ): + ) -> None: """Initialize an instance of NormHistComparer. :param func_hist_collector: histogram collection function @@ -349,7 +349,9 @@ def __init__( class RollingNormHistComparer(NormHistComparer): """Compare histogram to previous rolling normalized histograms""" - def __init__(self, read_key, store_key, window, shift=1, hist_col="histogram"): + def __init__( + self, read_key, store_key, window, shift: int = 1, hist_col: str = "histogram" + ) -> None: """Initialize an instance of RollingNormHistComparer. :param str read_key: key of input data to read from data store @@ -383,7 +385,9 @@ def transform(self, datastore): class ExpandingNormHistComparer(NormHistComparer): """Compare histogram to previous expanding normalized histograms""" - def __init__(self, read_key, store_key, shift=1, hist_col="histogram"): + def __init__( + self, read_key, store_key, shift: int = 1, hist_col: str = "histogram" + ) -> None: """Initialize an instance of ExpandingNormHistComparer. :param str read_key: key of input data to read from data store @@ -412,7 +416,9 @@ def transform(self, datastore): class ReferenceNormHistComparer(NormHistComparer): """Compare histogram to reference normalized histograms""" - def __init__(self, reference_key, assign_to_key, store_key, hist_col="histogram"): + def __init__( + self, reference_key, assign_to_key, store_key, hist_col: str = "histogram" + ) -> None: """Initialize an instance of ReferenceNormHistComparer. :param str reference_key: key of input data to read from data store diff --git a/popmon/analysis/functions.py b/popmon/analysis/functions.py index e0370b44..6d28f75c 100644 --- a/popmon/analysis/functions.py +++ b/popmon/analysis/functions.py @@ -34,7 +34,7 @@ from popmon.stats.numpy import probability_distribution_mean_covariance -def pull(row, suffix_mean="_mean", suffix_std="_std", cols=None): +def pull(row, suffix_mean: str = "_mean", suffix_std: str = "_std", cols=None): """Calculate normalized residual (pull) for list of cols Function can be used by ApplyFunc module. @@ -74,7 +74,7 @@ def pull(row, suffix_mean="_mean", suffix_std="_std", cols=None): return pd.Series(x) -def expanding_mean(df, shift=1): +def expanding_mean(df, shift: int = 1): """Calculate expanding mean of all numeric columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -86,7 +86,7 @@ def expanding_mean(df, shift=1): return df.shift(shift).expanding().mean() -def expanding_std(df, shift=1): +def expanding_std(df, shift: int = 1): """Calculate expanding std of all numeric columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -98,7 +98,7 @@ def expanding_std(df, shift=1): return df.shift(shift).expanding().std() -def expanding_apply(df, func, shift=1, *args, **kwargs): +def expanding_apply(df, func, shift: int = 1, *args, **kwargs): """Calculate expanding apply() to all columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -113,7 +113,7 @@ def expanding_apply(df, func, shift=1, *args, **kwargs): return df.shift(shift).expanding().apply(func, args=args, **kwargs) -def rolling_std(df, window, shift=1): +def rolling_std(df, window, shift: int = 1): """Calculate rolling std of all numeric columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -126,7 +126,7 @@ def rolling_std(df, window, shift=1): return df.shift(shift).rolling(window).std() -def rolling_mean(df, window, shift=1): +def rolling_mean(df, window, shift: int = 1): """Calculate rolling mean of all numeric columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -139,7 +139,7 @@ def rolling_mean(df, window, shift=1): return df.shift(shift).rolling(window).mean() -def rolling_apply(df, window, func, shift=1, *args, **kwargs): +def rolling_apply(df, window, func, shift: int = 1, *args, **kwargs): """Calculate rolling apply() to all columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -156,7 +156,7 @@ def rolling_apply(df, window, func, shift=1, *args, **kwargs): return df.shift(shift).rolling(window).apply(func, raw=False, args=args, **kwargs) -def rolling_lr(df, window, index=0, shift=0): +def rolling_lr(df, window, index: int = 0, shift: int = 0): """Calculate rolling scipy lin_regress() to all columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -175,7 +175,7 @@ def rolling_lr(df, window, index=0, shift=0): ) -def rolling_lr_zscore(df, window, shift=0): +def rolling_lr_zscore(df, window, shift: int = 0): """Calculate rolling z-score of scipy lin_regress() to all columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -202,7 +202,7 @@ def func(x): return roll(df, window=window, shift=shift).apply(func, axis=1) -def roll(df, window, shift=1): +def roll(df, window, shift: int = 1): """Implementation of rolling window that can handle non-numerical columns such as histograms :param pd.DataFrame df: input dataframe to apply rolling function to. @@ -250,7 +250,7 @@ def reshape(vs, i): return rolled_df -def expand(df, shift=1): +def expand(df, shift: int = 1): """Implementation of expanding window that can handle non-numerical values such as histograms Split up input array into expanding sub-arrays @@ -283,7 +283,7 @@ def reshape(vs, i): return expanded_df -def expanding_hist(df, shift=1, *args, **kwargs): +def expanding_hist(df, shift: int = 1, *args, **kwargs): """Apply expanding histogram sum Function can be used by ApplyFunc module. @@ -297,7 +297,7 @@ def expanding_hist(df, shift=1, *args, **kwargs): return expand(df, shift=shift).apply(hist_sum, axis=1, args=args, **kwargs) -def rolling_hist(df, window, shift=1, *args, **kwargs): +def rolling_hist(df, window, shift: int = 1, *args, **kwargs): """Apply rolling histogram sum Function can be used by ApplyFunc module. @@ -314,7 +314,7 @@ def rolling_hist(df, window, shift=1, *args, **kwargs): ) -def hist_sum(x, hist_name=""): +def hist_sum(x, hist_name: str = ""): """Return sum of histograms Usage: df['hists'].apply(hist_sum) ; series.apply(hist_sum) @@ -351,7 +351,7 @@ def hist_sum(x, hist_name=""): return pd.Series(o) -def roll_norm_hist_mean_cov(df, window, shift=1, *args, **kwargs): +def roll_norm_hist_mean_cov(df, window, shift: int = 1, *args, **kwargs): """Apply rolling normalized_hist_mean_cov function Function can be used by ApplyFunc module. @@ -368,7 +368,7 @@ def roll_norm_hist_mean_cov(df, window, shift=1, *args, **kwargs): ) -def expand_norm_hist_mean_cov(df, shift=1, *args, **kwargs): +def expand_norm_hist_mean_cov(df, shift: int = 1, *args, **kwargs): """Apply expanding normalized_hist_mean_cov function Function can be used by ApplyFunc module. @@ -384,7 +384,7 @@ def expand_norm_hist_mean_cov(df, shift=1, *args, **kwargs): ) -def normalized_hist_mean_cov(x, hist_name=""): +def normalized_hist_mean_cov(x, hist_name: str = ""): """Mean normalized histogram and its covariance of list of input histograms Usage: df['hists'].apply(normalized_hist_mean_cov) ; series.apply(normalized_hist_mean_cov) @@ -446,10 +446,10 @@ def normalized_hist_mean_cov(x, hist_name=""): def relative_chi_squared( row, - hist_name="histogram", - suffix_mean="_mean", - suffix_cov="_cov", - suffix_binning="_binning", + hist_name: str = "histogram", + suffix_mean: str = "_mean", + suffix_cov: str = "_cov", + suffix_binning: str = "_binning", ): """Calculate chi squared of normalized histogram with pre-calculated mean normalized histogram diff --git a/popmon/analysis/hist_numpy.py b/popmon/analysis/hist_numpy.py index abf2c201..bbe3a1c0 100644 --- a/popmon/analysis/hist_numpy.py +++ b/popmon/analysis/hist_numpy.py @@ -19,6 +19,7 @@ import warnings +from typing import List import histogrammar import numpy as np @@ -102,7 +103,7 @@ def set_ndgrid(hist, keys, n_dim): ) return grid - def flatten(histogram, keys, grid, dim=0, prefix=None): + def flatten(histogram, keys, grid, dim: int = 0, prefix=None): if prefix is None: prefix = [] @@ -137,7 +138,7 @@ def set_2dgrid(hist, keys): return set_ndgrid(hist, keys, n_dim=2) -def get_ndgrid(hist, get_bin_labels=False, n_dim=2): +def get_ndgrid(hist, get_bin_labels: bool = False, n_dim: int = 2): """Get filled n-d grid of first n dimensions of input histogram :param hist: input histogrammar histogram @@ -158,7 +159,7 @@ def get_ndgrid(hist, get_bin_labels=False, n_dim=2): return grid -def get_2dgrid(hist, get_bin_labels=False): +def get_2dgrid(hist, get_bin_labels: bool = False): """Get filled x,y grid of first two dimensions of input histogram :param hist: input histogrammar histogram @@ -167,7 +168,9 @@ def get_2dgrid(hist, get_bin_labels=False): return get_ndgrid(hist, get_bin_labels, n_dim=2) -def get_consistent_numpy_ndgrids(hist_list=None, get_bin_labels=False, dim=3): +def get_consistent_numpy_ndgrids( + hist_list=None, get_bin_labels: bool = False, dim: int = 3 +): """Get list of consistent x,y grids of first n dimensions of (sparse) input histograms :param list hist_list: list of input histogrammar histograms @@ -184,22 +187,22 @@ def get_consistent_numpy_ndgrids(hist_list=None, get_bin_labels=False, dim=3): ) assert_similar_hists(hist_list) - keys = [set() for _ in range(dim)] + keys: List[set] = [set() for _ in range(dim)] for hist in hist_list: hist_keys = prepare_ndgrid(hist, n_dim=dim) for i, h_keys in enumerate(hist_keys): keys[i] |= set(h_keys) - keys = [sorted(k) for k in keys] + sorted_keys = [sorted(k) for k in keys] - gridnd_list = [set_ndgrid(hist, keys, n_dim=dim) for hist in hist_list] + gridnd_list = [set_ndgrid(hist, sorted_keys, n_dim=dim) for hist in hist_list] if get_bin_labels: - return gridnd_list, keys + return gridnd_list, sorted_keys return gridnd_list -def get_consistent_numpy_2dgrids(hist_list=None, get_bin_labels=False): +def get_consistent_numpy_2dgrids(hist_list=None, get_bin_labels: bool = False): """Get list of consistent x,y grids of first two dimensions of (sparse) input histograms :param list hist_list: list of input histogrammar histograms @@ -211,7 +214,9 @@ def get_consistent_numpy_2dgrids(hist_list=None, get_bin_labels=False): return get_consistent_numpy_ndgrids(hist_list, get_bin_labels, dim=2) -def get_consistent_numpy_1dhists(hist_list, get_bin_labels=False, crop_range=False): +def get_consistent_numpy_1dhists( + hist_list, get_bin_labels: bool = False, crop_range: bool = False +): """Get list of consistent numpy hists for list of sparse (or bin) input histograms Works for sparse and bin histograms. @@ -294,7 +299,7 @@ def get_consistent_numpy_1dhists(hist_list, get_bin_labels=False, crop_range=Fal return nphist_list -def get_consistent_numpy_entries(hist_list, get_bin_labels=False): +def get_consistent_numpy_entries(hist_list, get_bin_labels: bool = False): """Get list of consistent numpy bin_entries for list of 1d input histograms Works for categorize, sparse and bin histograms. @@ -345,7 +350,7 @@ def get_consistent_numpy_entries(hist_list, get_bin_labels=False): return entries_list -def get_contentType(hist): +def get_contentType(hist) -> str: """Get content type of bins of histogram :param hist: input histogram @@ -362,7 +367,9 @@ def get_contentType(hist): return "Count" -def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types): +def check_similar_hists( + hist_list, check_type: bool = True, assert_type=used_hist_types +) -> bool: """Check consistent list of input histograms Check that type and dimension of all histograms in input list are the same. @@ -478,7 +485,9 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) return True -def assert_similar_hists(hist_list, check_type=True, assert_type=used_hist_types): +def assert_similar_hists( + hist_list, check_type: bool = True, assert_type=used_hist_types +): """Assert consistent list of input histograms Assert that type and dimension of all histograms in input list are the same. diff --git a/popmon/analysis/merge_statistics.py b/popmon/analysis/merge_statistics.py index 0cc95d2a..83a01c82 100644 --- a/popmon/analysis/merge_statistics.py +++ b/popmon/analysis/merge_statistics.py @@ -30,7 +30,7 @@ class MergeStatistics(Module): _input_keys = ("read_keys",) _output_keys = ("store_key",) - def __init__(self, read_keys: list[str], store_key: str): + def __init__(self, read_keys: list[str], store_key: str) -> None: """Initialize an instance of MergeStatistics. :param list read_keys: list of keys of input data to read from the datastore diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py index 63f78496..b067bdd8 100644 --- a/popmon/analysis/profiling/hist_profiler.py +++ b/popmon/analysis/profiling/hist_profiler.py @@ -56,10 +56,10 @@ def __init__( features=None, ignore_features=None, var_timestamp=None, - hist_col="histogram", - index_col="date", + hist_col: str = "histogram", + index_col: str = "date", stats_functions=None, - ): + ) -> None: super().__init__() self.read_key = read_key self.store_key = store_key diff --git a/popmon/analysis/profiling/pull_calculator.py b/popmon/analysis/profiling/pull_calculator.py index 6795559e..b1d0e0a6 100644 --- a/popmon/analysis/profiling/pull_calculator.py +++ b/popmon/analysis/profiling/pull_calculator.py @@ -52,13 +52,13 @@ def __init__( apply_to_key, assign_to_key=None, store_key=None, - suffix_mean="_mean", - suffix_std="_std", - suffix_pull="_pull", + suffix_mean: str = "_mean", + suffix_std: str = "_std", + suffix_pull: str = "_pull", features=None, *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str func_mean: applied-function to calculate mean of profiled statistics @@ -109,15 +109,15 @@ def __init__( self, read_key, window, - shift=1, + shift: int = 1, features=None, store_key=None, - suffix_mean="_roll_mean", - suffix_std="_roll_std", - suffix_pull="_roll_pull", + suffix_mean: str = "_roll_mean", + suffix_std: str = "_roll_std", + suffix_pull: str = "_roll_pull", *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str read_key: key of input data to read from data store @@ -161,15 +161,15 @@ class ExpandingPullCalculator(PullCalculator): def __init__( self, read_key, - shift=1, + shift: int = 1, features=None, store_key=None, - suffix_mean="_exp_mean", - suffix_std="_exp_std", - suffix_pull="_exp_pull", + suffix_mean: str = "_exp_mean", + suffix_std: str = "_exp_std", + suffix_pull: str = "_exp_pull", *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str read_key: key of input data to read from data store @@ -214,12 +214,12 @@ def __init__( assign_to_key, store_key=None, features=None, - suffix_mean="_ref_mean", - suffix_std="_ref_std", - suffix_pull="_ref_pull", + suffix_mean: str = "_ref_mean", + suffix_std: str = "_ref_std", + suffix_pull: str = "_ref_pull", *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str reference_key: key of input data to read from data store @@ -264,12 +264,12 @@ def __init__( assign_to_key, store_key=None, features=None, - suffix_mean="_ref_mean", - suffix_std="_ref_std", - suffix_pull="_ref_pull", + suffix_mean: str = "_ref_mean", + suffix_std: str = "_ref_std", + suffix_pull: str = "_ref_pull", *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str reference_key: key of input data to read from data store diff --git a/popmon/base/module.py b/popmon/base/module.py index 3c27d415..97fe39d8 100644 --- a/popmon/base/module.py +++ b/popmon/base/module.py @@ -21,6 +21,7 @@ import logging from abc import ABCMeta from functools import wraps +from typing import Optional, Tuple def datastore_helper(func): @@ -88,10 +89,10 @@ def combine_classes(*args): class Module(metaclass=combine_classes(ABCMeta, ModuleMetaClass)): """Abstract base class used for modules in a pipeline.""" - _input_keys = None - _output_keys = None + _input_keys: Optional[Tuple[str, ...]] = None + _output_keys: Optional[Tuple[str, ...]] = None - def __init__(self): + def __init__(self) -> None: """Module initialization""" self.logger = logging.getLogger() self.features = [] @@ -113,10 +114,10 @@ def get_inputs(self): def get_outputs(self): return self._get_values(self._output_keys) - def get_description(self): + def get_description(self) -> str: return "" - def set_logger(self, logger): + def set_logger(self, logger) -> None: """Set logger of module :param logger: input logger @@ -171,7 +172,7 @@ def get_features(self, all_features: list) -> list: features = [feature for feature in features if feature in all_features] return features - def transform(self, *args): + def transform(self, *args, **kwargs): """Central function of the module. Typically transform() takes something from the datastore, does something to it, and puts the results @@ -183,7 +184,7 @@ def transform(self, *args): """ raise NotImplementedError - def __repr__(self): + def __repr__(self) -> str: """String representation for modules when printing a pipeline/list of modules""" name = self.__class__.__name__ input_keys = [f"{v}='{getattr(self, v)}'" for v in self._input_keys] diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py index f3f753de..b9460933 100644 --- a/popmon/base/pipeline.py +++ b/popmon/base/pipeline.py @@ -24,7 +24,7 @@ class Pipeline: """Base class used for to run modules in a pipeline.""" - def __init__(self, modules, logger: logging.Logger | None = None): + def __init__(self, modules, logger: logging.Logger | None = None) -> None: """Initialization of the pipeline :param list modules: modules of the pipeline. @@ -33,7 +33,7 @@ def __init__(self, modules, logger: logging.Logger | None = None): self.modules = modules self.set_logger(logger) - def set_logger(self, logger: logging.Logger | None): + def set_logger(self, logger: logging.Logger | None) -> None: """Set the logger to be used by each module :param logger: input logger @@ -42,7 +42,7 @@ def set_logger(self, logger: logging.Logger | None): for module in self.modules: module.set_logger(self.logger) - def add_modules(self, modules): + def add_modules(self, modules) -> None: """Add more modules to existing list of modules. :param list modules: list of more modules @@ -67,7 +67,7 @@ def transform(self, datastore): datastore = module.transform(datastore) return datastore - def __repr__(self): + def __repr__(self) -> str: """String representation for pipeline""" name = self.__class__.__name__ ret = f"{name}: [\n" diff --git a/popmon/base/registry.py b/popmon/base/registry.py index b38d0627..96f14e09 100644 --- a/popmon/base/registry.py +++ b/popmon/base/registry.py @@ -19,17 +19,19 @@ from __future__ import annotations from collections import defaultdict -from typing import Callable +from typing import Any, Callable, DefaultDict, Dict, List, Optional, Tuple, Union class Registry: _properties = ("dim", "htype") - def __init__(self): - self._keys: list[str] = [] - self._descriptions: dict[str, str] = {} - self._properties_to_func = defaultdict(lambda: defaultdict(dict)) - self._func_name_to_properties = {} + def __init__(self) -> None: + self._keys: List[str] = [] + self._descriptions: Dict[str, str] = {} + self._properties_to_func: DefaultDict[ + str, DefaultDict[str, Dict[Any, Any]] + ] = defaultdict(lambda: defaultdict(dict)) + self._func_name_to_properties: Dict[Any, Any] = {} def register( self, diff --git a/popmon/config.py b/popmon/config.py index 51169cee..24a921c8 100644 --- a/popmon/config.py +++ b/popmon/config.py @@ -355,7 +355,7 @@ class Settings(ValidatedSettings): """ # Config utilities - def _ensure_features_time_axis(self): + def _ensure_features_time_axis(self) -> None: self.features = [ c if c.startswith(self.time_axis) else f"{self.time_axis}:{c}" for c in self.features @@ -375,7 +375,7 @@ def _set_time_axis_dataframe(self, df): f"Found {num} time-axes: {time_axes}. Set *one* time_axis manually!" ) - def _set_time_axis_hists(self, hists): + def _set_time_axis_hists(self, hists) -> None: # auto guess the time_axis: find the most frequent first column name in the histograms list first_cols = [k.split(":")[0] for k in list(hists.keys())] self.time_axis = max(set(first_cols), key=first_cols.count) diff --git a/popmon/extensions/extension.py b/popmon/extensions/extension.py index ad53218d..6149fbe2 100644 --- a/popmon/extensions/extension.py +++ b/popmon/extensions/extension.py @@ -22,7 +22,7 @@ from typing import Callable -def is_installed(package): +def is_installed(package) -> bool: is_present = importlib.util.find_spec(package) return is_present is not None @@ -33,7 +33,7 @@ class Extension: # should also be added to `pyproject.toml` optional-dependencies requirements: list[str] - def check(self): + def check(self) -> None: if all(is_installed(package) for package in self.requirements): func = self.extension func = func.__func__ diff --git a/popmon/extensions/profile_diptest.py b/popmon/extensions/profile_diptest.py index 92118655..e8200f3e 100644 --- a/popmon/extensions/profile_diptest.py +++ b/popmon/extensions/profile_diptest.py @@ -34,7 +34,7 @@ from popmon.extensions.extension import Extension -def extension(): +def extension() -> None: from diptest import diptest @Profiles.register( diff --git a/popmon/extensions/utils.py b/popmon/extensions/utils.py new file mode 100644 index 00000000..e69de29b diff --git a/popmon/hist/hist_splitter.py b/popmon/hist/hist_splitter.py index 168a729f..684dfa91 100644 --- a/popmon/hist/hist_splitter.py +++ b/popmon/hist/hist_splitter.py @@ -46,15 +46,15 @@ def __init__( store_key, features=None, ignore_features=None, - feature_begins_with="", - project_on_axes=True, - flatten_output=False, - short_keys=True, + feature_begins_with: str = "", + project_on_axes: bool = True, + flatten_output: bool = False, + short_keys: bool = True, var_timestamp=None, - index_col="date", - hist_col="histogram", - filter_empty_split_hists=True, - ): + index_col: str = "date", + hist_col: str = "histogram", + filter_empty_split_hists: bool = True, + ) -> None: """Initialize an instance. :param str read_key: key of input histogram-dict to read from data store @@ -90,7 +90,7 @@ def __init__( "flatten_output requires short_keys attribute to be False." ) - def get_description(self): + def get_description(self) -> str: return "" def update_divided(self, divided, split, yname): diff --git a/popmon/hist/hist_utils.py b/popmon/hist/hist_utils.py index 83967d65..75ffcbba 100644 --- a/popmon/hist/hist_utils.py +++ b/popmon/hist/hist_utils.py @@ -45,7 +45,7 @@ HG_FACTORY = histogrammar.Factory() -def sum_entries(hist, default=True): +def sum_entries(hist, default: bool = True): """Recursively get sum of entries of histogram Sometimes hist.entries gives zero as answer? This function always works though. @@ -164,7 +164,7 @@ def sum_over_x(hist): return h_proj -def project_split2dhist_on_axis(splitdict, axis="x"): +def project_split2dhist_on_axis(splitdict, axis: str = "x"): """Project a split 2d-histogram onto one axis Project a 2d hist that's been split with function split_hist_along_first_dimension @@ -246,11 +246,11 @@ def get_bin_centers(hist): def split_hist_along_first_dimension( hist, - xname="x", - yname="y", - short_keys=True, - convert_time_index=True, - filter_empty_split_hists=True, + xname: str = "x", + yname: str = "y", + short_keys: bool = True, + convert_time_index: bool = True, + filter_empty_split_hists: bool = True, ): """Split (multi-dimensional) hist into sub-hists along x-axis diff --git a/popmon/io/file_reader.py b/popmon/io/file_reader.py index 99175382..f33b1308 100644 --- a/popmon/io/file_reader.py +++ b/popmon/io/file_reader.py @@ -37,7 +37,7 @@ def __init__( file_path: str | Path, apply_func: Callable | None = None, **kwargs, - ): + ) -> None: """Initialize an instance. :param str store_key: key of input data to be stored in the datastore diff --git a/popmon/io/file_writer.py b/popmon/io/file_writer.py index e80db281..2ead8e52 100644 --- a/popmon/io/file_writer.py +++ b/popmon/io/file_writer.py @@ -39,7 +39,7 @@ def __init__( file_path: str | Path | None = None, apply_func: Callable | None = None, **kwargs, - ): + ) -> None: """Initialize an instance. :param str read_key: key of input histogram-dict to read from data store diff --git a/popmon/io/json_reader.py b/popmon/io/json_reader.py index e7f4a3be..811a6317 100644 --- a/popmon/io/json_reader.py +++ b/popmon/io/json_reader.py @@ -28,7 +28,7 @@ class JsonReader(FileReader): """Read json file's contents into the datastore.""" - def __init__(self, file_path: str | Path, store_key: str): + def __init__(self, file_path: str | Path, store_key: str) -> None: """Initialize an instance. :param str store_key: key of input data to be stored in the datastore diff --git a/popmon/pipeline/amazing_pipeline.py b/popmon/pipeline/amazing_pipeline.py index 8c26ab06..52c84715 100644 --- a/popmon/pipeline/amazing_pipeline.py +++ b/popmon/pipeline/amazing_pipeline.py @@ -27,7 +27,7 @@ class AmazingPipeline(Pipeline): - def __init__(self, histogram_path: str, **kwargs): + def __init__(self, histogram_path: str, **kwargs) -> None: modules = [ JsonReader(file_path=histogram_path, store_key=kwargs["hists_key"]), # Or ExternalReference, RollingReference etc. @@ -36,7 +36,7 @@ def __init__(self, histogram_path: str, **kwargs): super().__init__(modules) -def run(): +def run() -> None: """Example that run self-reference pipeline and produces monitoring report""" logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s [%(module)s]: %(message)s" diff --git a/popmon/pipeline/metrics.py b/popmon/pipeline/metrics.py index 19ecb1ca..7e3b0162 100644 --- a/popmon/pipeline/metrics.py +++ b/popmon/pipeline/metrics.py @@ -20,6 +20,7 @@ from __future__ import annotations import logging +from typing import Optional from histogrammar.dfinterface.make_histograms import get_bin_specs, make_histograms @@ -74,9 +75,9 @@ def stability_metrics( def df_stability_metrics( df, - settings: Settings = None, + settings: Optional[Settings] = None, time_width=None, - time_offset=0, + time_offset: int = 0, var_dtype=None, reference=None, **kwargs, diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py index bee1f270..258b3edf 100644 --- a/popmon/pipeline/metrics_pipelines.py +++ b/popmon/pipeline/metrics_pipelines.py @@ -73,7 +73,7 @@ def get_metrics_pipeline_class(reference_type, reference): def create_metrics_pipeline( settings: Settings, reference=None, - hists_key="hists", + hists_key: str = "hists", **kwargs, ): # configuration and datastore for report pipeline @@ -214,7 +214,7 @@ def __init__( self, settings: Settings, hists_key, - ): + ) -> None: """Example metrics pipeline for comparing test data with itself (full test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -269,9 +269,9 @@ class ExternalReferenceMetricsPipeline(Pipeline): def __init__( self, settings: Settings, - hists_key="test_hists", - ref_hists_key="ref_hists", - ): + hists_key: str = "test_hists", + ref_hists_key: str = "ref_hists", + ) -> None: """Example metrics pipeline for comparing test data with other (full) external reference set :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -332,8 +332,8 @@ class RollingReferenceMetricsPipeline(Pipeline): def __init__( self, settings: Settings, - hists_key="test_hists", - ): + hists_key: str = "test_hists", + ) -> None: """Example metrics pipeline for comparing test data with itself (rolling test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -391,8 +391,8 @@ class ExpandingReferenceMetricsPipeline(Pipeline): def __init__( self, settings: Settings, - hists_key="test_hists", - ): + hists_key: str = "test_hists", + ) -> None: """Example metrics pipeline for comparing test data with itself (expanding test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py index 865da325..8ce524eb 100644 --- a/popmon/pipeline/report.py +++ b/popmon/pipeline/report.py @@ -80,9 +80,9 @@ def stability_report( def df_stability_report( df, - settings: Settings = None, + settings: Optional[Settings] = None, time_width=None, - time_offset=0, + time_offset: int = 0, var_dtype=None, reference=None, split=None, @@ -210,7 +210,7 @@ class StabilityReport: as a HTML string, HTML file or Jupyter notebook's cell output. """ - def __init__(self, datastore, read_key="html_report"): + def __init__(self, datastore, read_key: str = "html_report") -> None: """Initialize an instance of StabilityReport. :param str read_key: key of HTML report data to read from data store. default is html_report. @@ -232,11 +232,11 @@ def _repr_html_(self): return display(self.to_notebook_iframe()) - def __repr__(self): + def __repr__(self) -> str: """Override so that Jupyter Notebook does not print the object.""" return "" - def to_html(self, escape=False): + def to_html(self, escape: bool = False): """HTML code representation of the report (represented as a string). :param bool escape: escape characters which could conflict with other HTML code. default: False @@ -249,7 +249,7 @@ def to_html(self, escape=False): return html.escape(self.html_report) return self.html_report - def to_file(self, filename): + def to_file(self, filename) -> None: """Store HTML report in the local file system. :param str filename: filename for the HTML report @@ -257,7 +257,7 @@ def to_file(self, filename): with open(filename, "w+") as file: file.write(self.to_html()) - def to_notebook_iframe(self, width="100%", height="100%"): + def to_notebook_iframe(self, width: str = "100%", height: str = "100%"): """HTML representation of the class (report) embedded in an iframe. :param str width: width of the frame to be shown @@ -280,7 +280,7 @@ def regenerate( self, store_key: str = "html_report", sections_key: str = "report_sections", - settings: Settings = None, + settings: Optional[Settings] = None, ): """Regenerate HTML report with different plot settings :param str sections_key: key to store sections data in the datastore. default is 'report_sections'. diff --git a/popmon/pipeline/report_pipelines.py b/popmon/pipeline/report_pipelines.py index 268507d6..2c8b8c3a 100644 --- a/popmon/pipeline/report_pipelines.py +++ b/popmon/pipeline/report_pipelines.py @@ -66,7 +66,7 @@ def __init__( self, settings: Settings, hists_key: str = "test_hists", - ): + ) -> None: """Example pipeline for comparing test data with itself (full test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -93,7 +93,7 @@ def __init__( settings: Settings, hists_key: str = "test_hists", ref_hists_key: str = "ref_hists", - ): + ) -> None: """Example pipeline for comparing test data with other (full) external reference set :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -122,7 +122,7 @@ def __init__( self, settings: Settings, hists_key: str = "test_hists", - ): + ) -> None: """Example pipeline for comparing test data with itself (rolling test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -148,7 +148,7 @@ def __init__( self, settings: Settings, hists_key: str = "test_hists", - ): + ) -> None: """Example pipeline for comparing test data with itself (expanding test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -177,7 +177,7 @@ def __init__( settings: Settings, sections_key: str = "report_sections", store_key: str = "html_report", - ): + ) -> None: """Initialize an instance of Report. :param Settings settings: the configuration object @@ -244,7 +244,7 @@ def __init__( ] if ( isinstance(settings.report.report_filepath, (str, Path)) - and len(settings.report.report_filepath) > 0 + and len(str(settings.report.report_filepath)) > 0 ): modules.append( FileWriter(store_key, file_path=settings.report.report_filepath) diff --git a/popmon/pipeline/timing.py b/popmon/pipeline/timing.py index 95079214..7e6f37c8 100644 --- a/popmon/pipeline/timing.py +++ b/popmon/pipeline/timing.py @@ -31,7 +31,7 @@ def __init__( self, store_key: str, **kwargs, - ): + ) -> None: """Initialize an instance. :param str store_key: key of input data to be stored in the datastore diff --git a/popmon/stats/numpy.py b/popmon/stats/numpy.py index cc41eea1..cb994572 100644 --- a/popmon/stats/numpy.py +++ b/popmon/stats/numpy.py @@ -22,7 +22,7 @@ import pandas as pd -def mean(a, weights=None, axis=None, dtype=None, keepdims=False, ddof=0): +def mean(a, weights=None, axis=None, dtype=None, keepdims: bool = False, ddof: int = 0): """ Compute the weighted mean along the specified axis. @@ -47,7 +47,7 @@ def mean(a, weights=None, axis=None, dtype=None, keepdims=False, ddof=0): ) -def std(a, weights=None, axis=None, dtype=None, ddof=0, keepdims=False): +def std(a, weights=None, axis=None, dtype=None, ddof: int = 0, keepdims: bool = False): """ Compute the weighted standard deviation along the specified axis. @@ -73,7 +73,7 @@ def std(a, weights=None, axis=None, dtype=None, ddof=0, keepdims=False): return np.sqrt(v) -def median(a, weights=None, axis=None, keepdims=False): +def median(a, weights=None, axis=None, keepdims: bool = False): """ Compute the weighted median along the specified axis. @@ -224,7 +224,7 @@ def covariance_multinomial_probability_distribution(entries): return covariance_matrix -def mad(a, c=0.6745, axis=0): +def mad(a, c=0.6745, axis: int = 0): """Median Absolute Deviation along given axis of an array mad = median(abs(a - median(a)))/c diff --git a/popmon/stitching/hist_stitcher.py b/popmon/stitching/hist_stitcher.py index be511a1b..9b677f41 100644 --- a/popmon/stitching/hist_stitcher.py +++ b/popmon/stitching/hist_stitcher.py @@ -33,13 +33,13 @@ class HistStitcher(Module): def __init__( self, - mode="add", + mode: str = "add", time_axis=None, time_bin_idx=None, read_key=None, delta_key=None, store_key=None, - ): + ) -> None: """Stitching histograms by first axis. :param str mode: options for histogram stitching: "add" or "replace". default is "add". @@ -64,7 +64,7 @@ def __init__( if self.mode not in self.allowed_modes: raise ValueError("mode should be either 'add' or 'replace'") - def get_description(self): + def get_description(self) -> str: return f"{self.mode}" def transform(self, hists_basis: dict, hists_delta: dict) -> dict: @@ -80,7 +80,7 @@ def stitch_histograms( hists_basis=None, hists_delta=None, hists_list=None, - time_axis="", + time_axis: str = "", time_bin_idx=None, ): """Stitching histograms by first axis. diff --git a/popmon/utils.py b/popmon/utils.py index 7364f643..e2242840 100644 --- a/popmon/utils.py +++ b/popmon/utils.py @@ -62,7 +62,7 @@ def filter_metrics( return metrics -def parallel(func: Callable, args_list, mode="args"): +def parallel(func: Callable, args_list, mode: str = "args"): """ Routine for parallel processing """ diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py index 9bd7747b..8afa0e9b 100644 --- a/popmon/visualization/alert_section_generator.py +++ b/popmon/visualization/alert_section_generator.py @@ -47,10 +47,10 @@ def __init__( ignore_features=None, static_bounds=None, dynamic_bounds=None, - prefix="traffic_light_", + prefix: str = "traffic_light_", suffices=None, ignore_stat_endswith=None, - ): + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py index d590a2e0..a50b95c9 100644 --- a/popmon/visualization/histogram_section.py +++ b/popmon/visualization/histogram_section.py @@ -53,8 +53,8 @@ def __init__( features=None, ignore_features=None, hist_names=None, - hist_name_starts_with="histogram", - ): + hist_name_starts_with: str = "histogram", + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting @@ -212,7 +212,7 @@ def transform(self, data_obj: dict, sections: list | None = None): return sections -def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000): +def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins: int = 1000): """Split off plot histogram generation to allow for parallel processing :param str feature: feature diff --git a/popmon/visualization/overview_section.py b/popmon/visualization/overview_section.py index 2b02ebcc..af403177 100644 --- a/popmon/visualization/overview_section.py +++ b/popmon/visualization/overview_section.py @@ -54,10 +54,10 @@ def __init__( ignore_features=None, static_bounds=None, dynamic_bounds=None, - prefix="traffic_light_", + prefix: str = "traffic_light_", suffices=None, ignore_stat_endswith=None, - ): + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting diff --git a/popmon/visualization/report_generator.py b/popmon/visualization/report_generator.py index f9b8c791..9c4ac232 100644 --- a/popmon/visualization/report_generator.py +++ b/popmon/visualization/report_generator.py @@ -34,7 +34,7 @@ class ReportGenerator(Module): _input_keys = ("read_key",) _output_keys = ("store_key",) - def __init__(self, read_key, store_key, settings: Report): + def __init__(self, read_key, store_key, settings: Report) -> None: """Initialize an instance of ReportGenerator. :param str read_key: key of input sections data to read from the datastore @@ -48,7 +48,7 @@ def __init__(self, read_key, store_key, settings: Report): self.online_report = settings.online_report self.tl_colors = settings.tl_colors - def get_description(self): + def get_description(self) -> str: return "HTML Report" def transform(self, sections: list) -> str: diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py index 7b58b1c7..d204f826 100644 --- a/popmon/visualization/section_generator.py +++ b/popmon/visualization/section_generator.py @@ -54,7 +54,7 @@ } -def get_stat_description(name: str): +def get_stat_description(name: str) -> str: """Gets the description of a statistic. :param str name: the name of the statistic. @@ -70,8 +70,8 @@ def get_stat_description(name: str): if name in "mean_trend10_zscore": return "Significance of (rolling) trend in means of features" - head, *tail = name.split("_") - tail = "_".join(tail) + head, *tails = name.split("_") + tail = "_".join(tails) if tail in comparisons and head in references: return comparisons[tail] @@ -98,11 +98,11 @@ def __init__( ignore_features=None, static_bounds=None, dynamic_bounds=None, - prefix="traffic_light_", + prefix: str = "traffic_light_", suffices=None, ignore_stat_endswith=None, - description="", - ): + description: str = "", + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py index 53bd505f..3c073a87 100644 --- a/popmon/visualization/traffic_light_section_generator.py +++ b/popmon/visualization/traffic_light_section_generator.py @@ -50,10 +50,10 @@ def __init__( ignore_features=None, static_bounds=None, dynamic_bounds=None, - prefix="traffic_light_", + prefix: str = "traffic_light_", suffices=None, ignore_stat_endswith=None, - ): + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting @@ -173,7 +173,7 @@ def _plot_metrics( skip_first_n, skip_last_n, tl_colors, - style="heatmap", + style: str = "heatmap", ): # prune dates and values dates = _prune(dates, last_n, skip_first_n, skip_last_n) diff --git a/popmon/visualization/utils.py b/popmon/visualization/utils.py index 38aa82a3..01e4394a 100644 --- a/popmon/visualization/utils.py +++ b/popmon/visualization/utils.py @@ -25,6 +25,7 @@ import math import warnings from collections import defaultdict +from typing import DefaultDict, Dict, List, Union import numpy as np import pandas as pd @@ -324,7 +325,7 @@ def plot_bars( def plot_traffic_lights_overview(feature, data, metrics: list[str], labels: list[str]): - colors = defaultdict(dict) + colors: DefaultDict[str, Dict[str, List[str]]] = defaultdict(dict) color_map = ["g", "y", "r"] for c1, metric in enumerate(metrics): for c2, label in enumerate(labels): @@ -365,7 +366,7 @@ def plot_traffic_lights_alerts_aggregate( yellow = hex_to_rgb(tl_colors["yellow"]) red = hex_to_rgb(tl_colors["red"]) - colors = defaultdict(dict) + colors: DefaultDict[str, Dict[str, List[str]]] = defaultdict(dict) for c1, metric in enumerate(metrics): row_max = np.max(data[c1]) for c2, label in enumerate(labels): @@ -437,11 +438,11 @@ def histogram_basic_checks(plots=None): def plot_histogram_overlay( plots=None, - is_num=True, - is_ts=False, - is_static_reference=True, - top=20, - n_choices=2, + is_num: bool = True, + is_ts: bool = False, + is_static_reference: bool = True, + top: int = 20, + n_choices: int = 2, ): """Create and plot (overlapping/grouped) histogram(s) of column values. @@ -796,7 +797,7 @@ def plot_heatmap( } -def _prune(values, last_n=0, skip_first_n=0, skip_last_n=0): +def _prune(values, last_n: int = 0, skip_first_n: int = 0, skip_last_n: int = 0): """inline function to select first or last items of input list :param values: input list to select from diff --git a/tests/popmon/base/test_module.py b/tests/popmon/base/test_module.py index ac9bfd19..b7f7dfdb 100644 --- a/tests/popmon/base/test_module.py +++ b/tests/popmon/base/test_module.py @@ -8,7 +8,7 @@ class Scaler(Module): _input_keys = ("input_key",) _output_keys = ("output_key",) - def __init__(self, input_key, output_key, mean, std): + def __init__(self, input_key, output_key, mean, std) -> None: super().__init__() self.input_key = input_key self.output_key = output_key diff --git a/tests/popmon/base/test_pipeline.py b/tests/popmon/base/test_pipeline.py index 75453d90..d7e4b38b 100644 --- a/tests/popmon/base/test_pipeline.py +++ b/tests/popmon/base/test_pipeline.py @@ -10,7 +10,7 @@ class LogTransformer(Module): _input_keys = ("input_key",) _output_keys = ("output_key",) - def __init__(self, input_key, output_key): + def __init__(self, input_key, output_key) -> None: super().__init__() self.input_key = input_key self.output_key = output_key @@ -25,7 +25,7 @@ class PowerTransformer(Module): _input_keys = ("input_key",) _output_keys = ("output_key",) - def __init__(self, input_key, output_key, power): + def __init__(self, input_key, output_key, power) -> None: super().__init__() self.input_key = input_key self.output_key = output_key @@ -40,7 +40,7 @@ class SumNormalizer(Module): _input_keys = ("input_key",) _output_keys = ("output_key",) - def __init__(self, input_key, output_key): + def __init__(self, input_key, output_key) -> None: super().__init__() self.input_key = input_key self.output_key = output_key @@ -54,7 +54,7 @@ class WeightedSum(Module): _input_keys = ("input_key", "weight_key") _output_keys = ("output_key",) - def __init__(self, input_key, weight_key, output_key): + def __init__(self, input_key, weight_key, output_key) -> None: super().__init__() self.input_key = input_key self.weight_key = weight_key