diff --git a/.github/workflows/license.yml b/.github/workflows/license.yml deleted file mode 100644 index bd736477..00000000 --- a/.github/workflows/license.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: license - -on: - push: - branches: - - master - - develop - pull_request: - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Set up Golang - uses: actions/setup-go@v2 - with: - go-version: '^1.16' # See https://github.com/google/go-licenses/issues/75 - - name: Install addlicense - run: | - export PATH=${PATH}:`go env GOPATH`/bin - go install github.com/google/addlicense@latest - - name: Check license - run: | - export PATH=${PATH}:`go env GOPATH`/bin - addlicense -check -l mit -c "ING Analytics Wholesale Banking" $(find popmon/ -type f -name '*.py' -not -path popmon/version.py) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index be8a035e..b58ce4bf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,12 +4,12 @@ repos: hooks: - id: black - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.270' + rev: 'v0.0.277' hooks: - id: ruff args: [--fix] - repo: https://github.com/asottile/blacken-docs - rev: 1.13.0 + rev: 1.14.0 hooks: - id: blacken-docs - repo: local @@ -24,5 +24,5 @@ repos: hooks: - id: nbqa-black - id: nbqa-ruff - additional_dependencies: [ruff==v0.0.270] + additional_dependencies: [ruff==v0.0.277] diff --git a/examples/integrations/kibana/elastic_connector.py b/examples/integrations/kibana/elastic_connector.py index 3da1d2ac..a0ca9f5d 100644 --- a/examples/integrations/kibana/elastic_connector.py +++ b/examples/integrations/kibana/elastic_connector.py @@ -2,7 +2,7 @@ class ElasticConnector: - def __init__(self): + def __init__(self) -> None: self.host = "localhost" self.port = 9200 self.es = None diff --git a/popmon/alerting/alerts_summary.py b/popmon/alerting/alerts_summary.py index 126814cd..f82abc35 100644 --- a/popmon/alerting/alerts_summary.py +++ b/popmon/alerting/alerts_summary.py @@ -38,11 +38,11 @@ class AlertsSummary(Module): def __init__( self, read_key, - store_key="", + store_key: str = "", features=None, ignore_features=None, - combined_variable="_AGGREGATE_", - ): + combined_variable: str = "_AGGREGATE_", + ) -> None: """Initialize an instance of AlertsSummary module. :param str read_key: key of input data to read from datastore. diff --git a/popmon/alerting/compute_tl_bounds.py b/popmon/alerting/compute_tl_bounds.py index deea60ee..3b62f4ee 100644 --- a/popmon/alerting/compute_tl_bounds.py +++ b/popmon/alerting/compute_tl_bounds.py @@ -30,7 +30,7 @@ from popmon.base import Module, Pipeline -def traffic_light_summary(row, cols=None, prefix=""): +def traffic_light_summary(row, cols=None, prefix: str = ""): """Make a summary of traffic light alerts present in the dataframe Count number of green, yellow and red traffic lights. @@ -60,7 +60,9 @@ def traffic_light_summary(row, cols=None, prefix=""): return pd.Series(x) -def traffic_light(value, red_high, yellow_high, yellow_low=0, red_low=0): +def traffic_light( + value, red_high, yellow_high, yellow_low: int = 0, red_low: int = 0 +) -> int: """Get the corresponding traffic light given a value and traffic light bounds. :param float value: value to check @@ -120,17 +122,17 @@ def __init__( self, read_key, monitoring_rules=None, - store_key="", + store_key: str = "", features=None, ignore_features=None, - apply_funcs_key="", + apply_funcs_key: str = "", func=None, - metrics_wide=False, - prefix="traffic_light_", - suffix="", - entire=False, + metrics_wide: bool = False, + prefix: str = "traffic_light_", + suffix: str = "", + entire: bool = False, **kwargs, - ): + ) -> None: """Initialize an instance of TrafficLightBounds module. :param str read_key: key of input data to read from datastore @@ -171,7 +173,7 @@ def get_description(self): """Returns the traffic light function as description.""" return self.traffic_light_func.__name__ - def _set_traffic_lights(self, feature, cols, pattern, rule): + def _set_traffic_lights(self, feature, cols, pattern, rule) -> None: process_cols = fnmatch.filter(cols, pattern) for pcol in process_cols: @@ -250,10 +252,10 @@ def pull_bounds( row, red_high, yellow_high, - yellow_low=0, - red_low=0, - suffix_mean="_mean", - suffix_std="_std", + yellow_low: int = 0, + red_low: int = 0, + suffix_mean: str = "_mean", + suffix_std: str = "_std", cols=None, ): """Calculate traffic light pull bounds for list of cols @@ -307,10 +309,10 @@ def df_single_op_pull_bounds( df, red_high, yellow_high, - yellow_low=0, - red_low=0, - suffix_mean="_mean", - suffix_std="_std", + yellow_low: int = 0, + red_low: int = 0, + suffix_mean: str = "_mean", + suffix_std: str = "_std", cols=None, ): """Calculate traffic light pull bounds for list of cols on first row only @@ -341,10 +343,10 @@ def __init__( self, read_key, rules, - store_key="", - suffix_mean="_mean", - suffix_std="_std", - ): + store_key: str = "", + suffix_mean: str = "_mean", + suffix_std: str = "_std", + ) -> None: """Initialize an instance of DynamicTrafficLightBounds. :param str read_key: key of input data to read from data store, only used to extract feature list. @@ -385,10 +387,10 @@ def __init__( self, read_key, rules, - store_key="", - suffix_mean="_mean", - suffix_std="_std", - ): + store_key: str = "", + suffix_mean: str = "_mean", + suffix_std: str = "_std", + ) -> None: """Initialize an instance of StaticBounds. :param str read_key: key of input data to read from data store, only used to extract feature list. @@ -432,7 +434,9 @@ class TrafficLightAlerts(Pipeline): - Apply them to profiled test statistics data """ - def __init__(self, read_key, store_key, rules, expanded_rules_key=""): + def __init__( + self, read_key, store_key, rules, expanded_rules_key: str = "" + ) -> None: """Initialize an instance of TrafficLightBounds. :param str read_key: key of input data to read from data store, only used to extract feature list. diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py index 36aaa5b8..876c0968 100644 --- a/popmon/analysis/apply_func.py +++ b/popmon/analysis/apply_func.py @@ -16,10 +16,9 @@ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - +from __future__ import annotations import warnings -from typing import Optional import numpy as np import pandas as pd @@ -40,14 +39,14 @@ class ApplyFunc(Module): def __init__( self, apply_to_key, - store_key="", - assign_to_key="", - apply_funcs_key="", + store_key: str = "", + assign_to_key: str = "", + apply_funcs_key: str = "", features=None, apply_funcs=None, metrics=None, - msg="", - ): + msg: str = "", + ) -> None: """Initialize an instance of ApplyFunc. :param str apply_to_key: key of the input data to apply funcs to. @@ -147,8 +146,8 @@ def add_apply_func( def transform( self, apply_to_data: dict, - assign_to_data: Optional[dict] = None, - apply_funcs: Optional[list] = None, + assign_to_data: dict | None = None, + apply_funcs: list | None = None, ): """ Apply functions to specified feature and metrics diff --git a/popmon/analysis/comparison/comparisons.py b/popmon/analysis/comparison/comparisons.py index 29cbb83b..df10d32f 100644 --- a/popmon/analysis/comparison/comparisons.py +++ b/popmon/analysis/comparison/comparisons.py @@ -168,7 +168,7 @@ def ks(p, q, *_): dim=1, htype="cat", ) -def unknown_labels(hist1, hist2): +def unknown_labels(hist1, hist2) -> bool: # check consistency of bin_labels labels1 = hist1.keySet labels2 = hist2.keySet diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py index ba069a9f..8adc272d 100644 --- a/popmon/analysis/comparison/hist_comparer.py +++ b/popmon/analysis/comparison/hist_comparer.py @@ -40,7 +40,7 @@ from popmon.hist.hist_utils import COMMON_HIST_TYPES, is_numeric -def hist_compare(row, hist_name1="", hist_name2=""): +def hist_compare(row, hist_name1: str = "", hist_name2: str = ""): """Function to compare two histograms Apply statistical tests to compare two input histograms, such as: @@ -107,11 +107,11 @@ def __init__( read_key, store_key, assign_to_key=None, - hist_col="histogram", - suffix="comp", + hist_col: str = "histogram", + suffix: str = "comp", *args, **kwargs, - ): + ) -> None: """Initialize an instance of RollingHistComparer. :param func_hist_collector: histogram collection function @@ -160,10 +160,10 @@ def __init__( read_key, store_key, window, - shift=1, - hist_col="histogram", - suffix="roll", - ): + shift: int = 1, + hist_col: str = "histogram", + suffix: str = "roll", + ) -> None: """Initialize an instance of RollingHistComparer. :param str read_key: key of input data to read from data store @@ -201,9 +201,9 @@ def __init__( self, read_key, store_key, - hist_col="histogram", - suffix="prev1", - ): + hist_col: str = "histogram", + suffix: str = "prev1", + ) -> None: """Initialize an instance of PreviousHistComparer. :param str read_key: key of input data to read from data store @@ -228,10 +228,10 @@ def __init__( self, read_key, store_key, - shift=1, - hist_col="histogram", - suffix="expanding", - ): + shift: int = 1, + hist_col: str = "histogram", + suffix: str = "expanding", + ) -> None: """Initialize an instance of ExpandingHistComparer. :param str read_key: key of input data to read from data store @@ -267,9 +267,9 @@ def __init__( reference_key, assign_to_key, store_key, - hist_col="histogram", - suffix="ref", - ): + hist_col: str = "histogram", + suffix: str = "ref", + ) -> None: """Initialize an instance of ReferenceHistComparer. :param str reference_key: key of input data to read from data store @@ -306,10 +306,10 @@ def __init__( read_key, store_key, assign_to_key=None, - hist_col="histogram", + hist_col: str = "histogram", *args, **kwargs, - ): + ) -> None: """Initialize an instance of NormHistComparer. :param func_hist_collector: histogram collection function @@ -349,7 +349,9 @@ def __init__( class RollingNormHistComparer(NormHistComparer): """Compare histogram to previous rolling normalized histograms""" - def __init__(self, read_key, store_key, window, shift=1, hist_col="histogram"): + def __init__( + self, read_key, store_key, window, shift: int = 1, hist_col: str = "histogram" + ) -> None: """Initialize an instance of RollingNormHistComparer. :param str read_key: key of input data to read from data store @@ -383,7 +385,9 @@ def transform(self, datastore): class ExpandingNormHistComparer(NormHistComparer): """Compare histogram to previous expanding normalized histograms""" - def __init__(self, read_key, store_key, shift=1, hist_col="histogram"): + def __init__( + self, read_key, store_key, shift: int = 1, hist_col: str = "histogram" + ) -> None: """Initialize an instance of ExpandingNormHistComparer. :param str read_key: key of input data to read from data store @@ -412,7 +416,9 @@ def transform(self, datastore): class ReferenceNormHistComparer(NormHistComparer): """Compare histogram to reference normalized histograms""" - def __init__(self, reference_key, assign_to_key, store_key, hist_col="histogram"): + def __init__( + self, reference_key, assign_to_key, store_key, hist_col: str = "histogram" + ) -> None: """Initialize an instance of ReferenceNormHistComparer. :param str reference_key: key of input data to read from data store diff --git a/popmon/analysis/functions.py b/popmon/analysis/functions.py index e0370b44..6d28f75c 100644 --- a/popmon/analysis/functions.py +++ b/popmon/analysis/functions.py @@ -34,7 +34,7 @@ from popmon.stats.numpy import probability_distribution_mean_covariance -def pull(row, suffix_mean="_mean", suffix_std="_std", cols=None): +def pull(row, suffix_mean: str = "_mean", suffix_std: str = "_std", cols=None): """Calculate normalized residual (pull) for list of cols Function can be used by ApplyFunc module. @@ -74,7 +74,7 @@ def pull(row, suffix_mean="_mean", suffix_std="_std", cols=None): return pd.Series(x) -def expanding_mean(df, shift=1): +def expanding_mean(df, shift: int = 1): """Calculate expanding mean of all numeric columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -86,7 +86,7 @@ def expanding_mean(df, shift=1): return df.shift(shift).expanding().mean() -def expanding_std(df, shift=1): +def expanding_std(df, shift: int = 1): """Calculate expanding std of all numeric columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -98,7 +98,7 @@ def expanding_std(df, shift=1): return df.shift(shift).expanding().std() -def expanding_apply(df, func, shift=1, *args, **kwargs): +def expanding_apply(df, func, shift: int = 1, *args, **kwargs): """Calculate expanding apply() to all columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -113,7 +113,7 @@ def expanding_apply(df, func, shift=1, *args, **kwargs): return df.shift(shift).expanding().apply(func, args=args, **kwargs) -def rolling_std(df, window, shift=1): +def rolling_std(df, window, shift: int = 1): """Calculate rolling std of all numeric columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -126,7 +126,7 @@ def rolling_std(df, window, shift=1): return df.shift(shift).rolling(window).std() -def rolling_mean(df, window, shift=1): +def rolling_mean(df, window, shift: int = 1): """Calculate rolling mean of all numeric columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -139,7 +139,7 @@ def rolling_mean(df, window, shift=1): return df.shift(shift).rolling(window).mean() -def rolling_apply(df, window, func, shift=1, *args, **kwargs): +def rolling_apply(df, window, func, shift: int = 1, *args, **kwargs): """Calculate rolling apply() to all columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -156,7 +156,7 @@ def rolling_apply(df, window, func, shift=1, *args, **kwargs): return df.shift(shift).rolling(window).apply(func, raw=False, args=args, **kwargs) -def rolling_lr(df, window, index=0, shift=0): +def rolling_lr(df, window, index: int = 0, shift: int = 0): """Calculate rolling scipy lin_regress() to all columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -175,7 +175,7 @@ def rolling_lr(df, window, index=0, shift=0): ) -def rolling_lr_zscore(df, window, shift=0): +def rolling_lr_zscore(df, window, shift: int = 0): """Calculate rolling z-score of scipy lin_regress() to all columns of a pandas dataframe Function can be used by ApplyFunc module. @@ -202,7 +202,7 @@ def func(x): return roll(df, window=window, shift=shift).apply(func, axis=1) -def roll(df, window, shift=1): +def roll(df, window, shift: int = 1): """Implementation of rolling window that can handle non-numerical columns such as histograms :param pd.DataFrame df: input dataframe to apply rolling function to. @@ -250,7 +250,7 @@ def reshape(vs, i): return rolled_df -def expand(df, shift=1): +def expand(df, shift: int = 1): """Implementation of expanding window that can handle non-numerical values such as histograms Split up input array into expanding sub-arrays @@ -283,7 +283,7 @@ def reshape(vs, i): return expanded_df -def expanding_hist(df, shift=1, *args, **kwargs): +def expanding_hist(df, shift: int = 1, *args, **kwargs): """Apply expanding histogram sum Function can be used by ApplyFunc module. @@ -297,7 +297,7 @@ def expanding_hist(df, shift=1, *args, **kwargs): return expand(df, shift=shift).apply(hist_sum, axis=1, args=args, **kwargs) -def rolling_hist(df, window, shift=1, *args, **kwargs): +def rolling_hist(df, window, shift: int = 1, *args, **kwargs): """Apply rolling histogram sum Function can be used by ApplyFunc module. @@ -314,7 +314,7 @@ def rolling_hist(df, window, shift=1, *args, **kwargs): ) -def hist_sum(x, hist_name=""): +def hist_sum(x, hist_name: str = ""): """Return sum of histograms Usage: df['hists'].apply(hist_sum) ; series.apply(hist_sum) @@ -351,7 +351,7 @@ def hist_sum(x, hist_name=""): return pd.Series(o) -def roll_norm_hist_mean_cov(df, window, shift=1, *args, **kwargs): +def roll_norm_hist_mean_cov(df, window, shift: int = 1, *args, **kwargs): """Apply rolling normalized_hist_mean_cov function Function can be used by ApplyFunc module. @@ -368,7 +368,7 @@ def roll_norm_hist_mean_cov(df, window, shift=1, *args, **kwargs): ) -def expand_norm_hist_mean_cov(df, shift=1, *args, **kwargs): +def expand_norm_hist_mean_cov(df, shift: int = 1, *args, **kwargs): """Apply expanding normalized_hist_mean_cov function Function can be used by ApplyFunc module. @@ -384,7 +384,7 @@ def expand_norm_hist_mean_cov(df, shift=1, *args, **kwargs): ) -def normalized_hist_mean_cov(x, hist_name=""): +def normalized_hist_mean_cov(x, hist_name: str = ""): """Mean normalized histogram and its covariance of list of input histograms Usage: df['hists'].apply(normalized_hist_mean_cov) ; series.apply(normalized_hist_mean_cov) @@ -446,10 +446,10 @@ def normalized_hist_mean_cov(x, hist_name=""): def relative_chi_squared( row, - hist_name="histogram", - suffix_mean="_mean", - suffix_cov="_cov", - suffix_binning="_binning", + hist_name: str = "histogram", + suffix_mean: str = "_mean", + suffix_cov: str = "_cov", + suffix_binning: str = "_binning", ): """Calculate chi squared of normalized histogram with pre-calculated mean normalized histogram diff --git a/popmon/analysis/hist_numpy.py b/popmon/analysis/hist_numpy.py index abf2c201..edcfc6d4 100644 --- a/popmon/analysis/hist_numpy.py +++ b/popmon/analysis/hist_numpy.py @@ -17,6 +17,7 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +from __future__ import annotations import warnings @@ -102,7 +103,7 @@ def set_ndgrid(hist, keys, n_dim): ) return grid - def flatten(histogram, keys, grid, dim=0, prefix=None): + def flatten(histogram, keys, grid, dim: int = 0, prefix=None): if prefix is None: prefix = [] @@ -137,7 +138,7 @@ def set_2dgrid(hist, keys): return set_ndgrid(hist, keys, n_dim=2) -def get_ndgrid(hist, get_bin_labels=False, n_dim=2): +def get_ndgrid(hist, get_bin_labels: bool = False, n_dim: int = 2): """Get filled n-d grid of first n dimensions of input histogram :param hist: input histogrammar histogram @@ -158,7 +159,7 @@ def get_ndgrid(hist, get_bin_labels=False, n_dim=2): return grid -def get_2dgrid(hist, get_bin_labels=False): +def get_2dgrid(hist, get_bin_labels: bool = False): """Get filled x,y grid of first two dimensions of input histogram :param hist: input histogrammar histogram @@ -167,7 +168,9 @@ def get_2dgrid(hist, get_bin_labels=False): return get_ndgrid(hist, get_bin_labels, n_dim=2) -def get_consistent_numpy_ndgrids(hist_list=None, get_bin_labels=False, dim=3): +def get_consistent_numpy_ndgrids( + hist_list=None, get_bin_labels: bool = False, dim: int = 3 +): """Get list of consistent x,y grids of first n dimensions of (sparse) input histograms :param list hist_list: list of input histogrammar histograms @@ -184,22 +187,22 @@ def get_consistent_numpy_ndgrids(hist_list=None, get_bin_labels=False, dim=3): ) assert_similar_hists(hist_list) - keys = [set() for _ in range(dim)] + keys: list[set] = [set() for _ in range(dim)] for hist in hist_list: hist_keys = prepare_ndgrid(hist, n_dim=dim) for i, h_keys in enumerate(hist_keys): keys[i] |= set(h_keys) - keys = [sorted(k) for k in keys] + sorted_keys = [sorted(k) for k in keys] - gridnd_list = [set_ndgrid(hist, keys, n_dim=dim) for hist in hist_list] + gridnd_list = [set_ndgrid(hist, sorted_keys, n_dim=dim) for hist in hist_list] if get_bin_labels: - return gridnd_list, keys + return gridnd_list, sorted_keys return gridnd_list -def get_consistent_numpy_2dgrids(hist_list=None, get_bin_labels=False): +def get_consistent_numpy_2dgrids(hist_list=None, get_bin_labels: bool = False): """Get list of consistent x,y grids of first two dimensions of (sparse) input histograms :param list hist_list: list of input histogrammar histograms @@ -211,7 +214,9 @@ def get_consistent_numpy_2dgrids(hist_list=None, get_bin_labels=False): return get_consistent_numpy_ndgrids(hist_list, get_bin_labels, dim=2) -def get_consistent_numpy_1dhists(hist_list, get_bin_labels=False, crop_range=False): +def get_consistent_numpy_1dhists( + hist_list, get_bin_labels: bool = False, crop_range: bool = False +): """Get list of consistent numpy hists for list of sparse (or bin) input histograms Works for sparse and bin histograms. @@ -294,7 +299,7 @@ def get_consistent_numpy_1dhists(hist_list, get_bin_labels=False, crop_range=Fal return nphist_list -def get_consistent_numpy_entries(hist_list, get_bin_labels=False): +def get_consistent_numpy_entries(hist_list, get_bin_labels: bool = False): """Get list of consistent numpy bin_entries for list of 1d input histograms Works for categorize, sparse and bin histograms. @@ -345,7 +350,7 @@ def get_consistent_numpy_entries(hist_list, get_bin_labels=False): return entries_list -def get_contentType(hist): +def get_contentType(hist) -> str: """Get content type of bins of histogram :param hist: input histogram @@ -362,7 +367,9 @@ def get_contentType(hist): return "Count" -def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types): +def check_similar_hists( + hist_list, check_type: bool = True, assert_type=used_hist_types +) -> bool: """Check consistent list of input histograms Check that type and dimension of all histograms in input list are the same. @@ -478,7 +485,9 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) return True -def assert_similar_hists(hist_list, check_type=True, assert_type=used_hist_types): +def assert_similar_hists( + hist_list, check_type: bool = True, assert_type=used_hist_types +): """Assert consistent list of input histograms Assert that type and dimension of all histograms in input list are the same. diff --git a/popmon/analysis/merge_statistics.py b/popmon/analysis/merge_statistics.py index 0cc95d2a..83a01c82 100644 --- a/popmon/analysis/merge_statistics.py +++ b/popmon/analysis/merge_statistics.py @@ -30,7 +30,7 @@ class MergeStatistics(Module): _input_keys = ("read_keys",) _output_keys = ("store_key",) - def __init__(self, read_keys: list[str], store_key: str): + def __init__(self, read_keys: list[str], store_key: str) -> None: """Initialize an instance of MergeStatistics. :param list read_keys: list of keys of input data to read from the datastore diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py index 63f78496..b067bdd8 100644 --- a/popmon/analysis/profiling/hist_profiler.py +++ b/popmon/analysis/profiling/hist_profiler.py @@ -56,10 +56,10 @@ def __init__( features=None, ignore_features=None, var_timestamp=None, - hist_col="histogram", - index_col="date", + hist_col: str = "histogram", + index_col: str = "date", stats_functions=None, - ): + ) -> None: super().__init__() self.read_key = read_key self.store_key = store_key diff --git a/popmon/analysis/profiling/pull_calculator.py b/popmon/analysis/profiling/pull_calculator.py index 6795559e..b1d0e0a6 100644 --- a/popmon/analysis/profiling/pull_calculator.py +++ b/popmon/analysis/profiling/pull_calculator.py @@ -52,13 +52,13 @@ def __init__( apply_to_key, assign_to_key=None, store_key=None, - suffix_mean="_mean", - suffix_std="_std", - suffix_pull="_pull", + suffix_mean: str = "_mean", + suffix_std: str = "_std", + suffix_pull: str = "_pull", features=None, *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str func_mean: applied-function to calculate mean of profiled statistics @@ -109,15 +109,15 @@ def __init__( self, read_key, window, - shift=1, + shift: int = 1, features=None, store_key=None, - suffix_mean="_roll_mean", - suffix_std="_roll_std", - suffix_pull="_roll_pull", + suffix_mean: str = "_roll_mean", + suffix_std: str = "_roll_std", + suffix_pull: str = "_roll_pull", *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str read_key: key of input data to read from data store @@ -161,15 +161,15 @@ class ExpandingPullCalculator(PullCalculator): def __init__( self, read_key, - shift=1, + shift: int = 1, features=None, store_key=None, - suffix_mean="_exp_mean", - suffix_std="_exp_std", - suffix_pull="_exp_pull", + suffix_mean: str = "_exp_mean", + suffix_std: str = "_exp_std", + suffix_pull: str = "_exp_pull", *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str read_key: key of input data to read from data store @@ -214,12 +214,12 @@ def __init__( assign_to_key, store_key=None, features=None, - suffix_mean="_ref_mean", - suffix_std="_ref_std", - suffix_pull="_ref_pull", + suffix_mean: str = "_ref_mean", + suffix_std: str = "_ref_std", + suffix_pull: str = "_ref_pull", *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str reference_key: key of input data to read from data store @@ -264,12 +264,12 @@ def __init__( assign_to_key, store_key=None, features=None, - suffix_mean="_ref_mean", - suffix_std="_ref_std", - suffix_pull="_ref_pull", + suffix_mean: str = "_ref_mean", + suffix_std: str = "_ref_std", + suffix_pull: str = "_ref_pull", *args, **kwargs, - ): + ) -> None: """Initialize an instance of HistComparer. :param str reference_key: key of input data to read from data store diff --git a/popmon/base/module.py b/popmon/base/module.py index 3c27d415..fdd7f45c 100644 --- a/popmon/base/module.py +++ b/popmon/base/module.py @@ -17,6 +17,7 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +from __future__ import annotations import logging from abc import ABCMeta @@ -88,10 +89,10 @@ def combine_classes(*args): class Module(metaclass=combine_classes(ABCMeta, ModuleMetaClass)): """Abstract base class used for modules in a pipeline.""" - _input_keys = None - _output_keys = None + _input_keys: tuple[str, ...] | None = None + _output_keys: tuple[str, ...] | None = None - def __init__(self): + def __init__(self) -> None: """Module initialization""" self.logger = logging.getLogger() self.features = [] @@ -113,10 +114,10 @@ def get_inputs(self): def get_outputs(self): return self._get_values(self._output_keys) - def get_description(self): + def get_description(self) -> str: return "" - def set_logger(self, logger): + def set_logger(self, logger) -> None: """Set logger of module :param logger: input logger @@ -171,7 +172,7 @@ def get_features(self, all_features: list) -> list: features = [feature for feature in features if feature in all_features] return features - def transform(self, *args): + def transform(self, *args, **kwargs): """Central function of the module. Typically transform() takes something from the datastore, does something to it, and puts the results @@ -183,7 +184,7 @@ def transform(self, *args): """ raise NotImplementedError - def __repr__(self): + def __repr__(self) -> str: """String representation for modules when printing a pipeline/list of modules""" name = self.__class__.__name__ input_keys = [f"{v}='{getattr(self, v)}'" for v in self._input_keys] diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py index f3f753de..b9460933 100644 --- a/popmon/base/pipeline.py +++ b/popmon/base/pipeline.py @@ -24,7 +24,7 @@ class Pipeline: """Base class used for to run modules in a pipeline.""" - def __init__(self, modules, logger: logging.Logger | None = None): + def __init__(self, modules, logger: logging.Logger | None = None) -> None: """Initialization of the pipeline :param list modules: modules of the pipeline. @@ -33,7 +33,7 @@ def __init__(self, modules, logger: logging.Logger | None = None): self.modules = modules self.set_logger(logger) - def set_logger(self, logger: logging.Logger | None): + def set_logger(self, logger: logging.Logger | None) -> None: """Set the logger to be used by each module :param logger: input logger @@ -42,7 +42,7 @@ def set_logger(self, logger: logging.Logger | None): for module in self.modules: module.set_logger(self.logger) - def add_modules(self, modules): + def add_modules(self, modules) -> None: """Add more modules to existing list of modules. :param list modules: list of more modules @@ -67,7 +67,7 @@ def transform(self, datastore): datastore = module.transform(datastore) return datastore - def __repr__(self): + def __repr__(self) -> str: """String representation for pipeline""" name = self.__class__.__name__ ret = f"{name}: [\n" diff --git a/popmon/base/registry.py b/popmon/base/registry.py index b38d0627..94caba4d 100644 --- a/popmon/base/registry.py +++ b/popmon/base/registry.py @@ -19,17 +19,19 @@ from __future__ import annotations from collections import defaultdict -from typing import Callable +from typing import Any, Callable class Registry: _properties = ("dim", "htype") - def __init__(self): + def __init__(self) -> None: self._keys: list[str] = [] self._descriptions: dict[str, str] = {} - self._properties_to_func = defaultdict(lambda: defaultdict(dict)) - self._func_name_to_properties = {} + self._properties_to_func: defaultdict[ + str, defaultdict[str, dict[Any, Any]] + ] = defaultdict(lambda: defaultdict(dict)) + self._func_name_to_properties: dict[Any, Any] = {} def register( self, diff --git a/popmon/config.py b/popmon/config.py index a85feba1..24a921c8 100644 --- a/popmon/config.py +++ b/popmon/config.py @@ -21,7 +21,8 @@ import pandas as pd from histogrammar.dfinterface.make_histograms import get_time_axes -from pydantic import BaseModel, BaseSettings +from pydantic import BaseModel +from pydantic_settings import BaseSettings from typing_extensions import Literal # Global configuration for the joblib parallelization. Could be used to change the number of jobs, and/or change @@ -32,7 +33,7 @@ class ValidatedSettings(BaseSettings): class Config: - validate_all = True + validate_default = True validate_assignment = True @@ -354,7 +355,7 @@ class Settings(ValidatedSettings): """ # Config utilities - def _ensure_features_time_axis(self): + def _ensure_features_time_axis(self) -> None: self.features = [ c if c.startswith(self.time_axis) else f"{self.time_axis}:{c}" for c in self.features @@ -374,7 +375,7 @@ def _set_time_axis_dataframe(self, df): f"Found {num} time-axes: {time_axes}. Set *one* time_axis manually!" ) - def _set_time_axis_hists(self, hists): + def _set_time_axis_hists(self, hists) -> None: # auto guess the time_axis: find the most frequent first column name in the histograms list first_cols = [k.split(":")[0] for k in list(hists.keys())] self.time_axis = max(set(first_cols), key=first_cols.count) diff --git a/popmon/extensions/__init__.py b/popmon/extensions/__init__.py index 0dd04665..2cc37d70 100644 --- a/popmon/extensions/__init__.py +++ b/popmon/extensions/__init__.py @@ -21,5 +21,3 @@ from popmon.extensions.profile_diptest import Diptest extensions = [Diptest()] -for extension in extensions: - extension.check() diff --git a/popmon/extensions/extension.py b/popmon/extensions/extension.py index ad53218d..401ccc3d 100644 --- a/popmon/extensions/extension.py +++ b/popmon/extensions/extension.py @@ -18,23 +18,9 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from __future__ import annotations -import importlib.util from typing import Callable -def is_installed(package): - is_present = importlib.util.find_spec(package) - return is_present is not None - - class Extension: name: str extension: Callable - # should also be added to `pyproject.toml` optional-dependencies - requirements: list[str] - - def check(self): - if all(is_installed(package) for package in self.requirements): - func = self.extension - func = func.__func__ - func() diff --git a/popmon/extensions/profile_diptest.py b/popmon/extensions/profile_diptest.py index 21b597c3..e8200f3e 100644 --- a/popmon/extensions/profile_diptest.py +++ b/popmon/extensions/profile_diptest.py @@ -34,7 +34,7 @@ from popmon.extensions.extension import Extension -def extension(): +def extension() -> None: from diptest import diptest @Profiles.register( @@ -68,4 +68,3 @@ def diptest_profile(bin_centers, bin_values, bin_width, rng=None): class Diptest(Extension): name = "diptest" extension = extension - requirements = ["diptest"] diff --git a/popmon/hist/hist_splitter.py b/popmon/hist/hist_splitter.py index 168a729f..684dfa91 100644 --- a/popmon/hist/hist_splitter.py +++ b/popmon/hist/hist_splitter.py @@ -46,15 +46,15 @@ def __init__( store_key, features=None, ignore_features=None, - feature_begins_with="", - project_on_axes=True, - flatten_output=False, - short_keys=True, + feature_begins_with: str = "", + project_on_axes: bool = True, + flatten_output: bool = False, + short_keys: bool = True, var_timestamp=None, - index_col="date", - hist_col="histogram", - filter_empty_split_hists=True, - ): + index_col: str = "date", + hist_col: str = "histogram", + filter_empty_split_hists: bool = True, + ) -> None: """Initialize an instance. :param str read_key: key of input histogram-dict to read from data store @@ -90,7 +90,7 @@ def __init__( "flatten_output requires short_keys attribute to be False." ) - def get_description(self): + def get_description(self) -> str: return "" def update_divided(self, divided, split, yname): diff --git a/popmon/hist/hist_utils.py b/popmon/hist/hist_utils.py index 83967d65..75ffcbba 100644 --- a/popmon/hist/hist_utils.py +++ b/popmon/hist/hist_utils.py @@ -45,7 +45,7 @@ HG_FACTORY = histogrammar.Factory() -def sum_entries(hist, default=True): +def sum_entries(hist, default: bool = True): """Recursively get sum of entries of histogram Sometimes hist.entries gives zero as answer? This function always works though. @@ -164,7 +164,7 @@ def sum_over_x(hist): return h_proj -def project_split2dhist_on_axis(splitdict, axis="x"): +def project_split2dhist_on_axis(splitdict, axis: str = "x"): """Project a split 2d-histogram onto one axis Project a 2d hist that's been split with function split_hist_along_first_dimension @@ -246,11 +246,11 @@ def get_bin_centers(hist): def split_hist_along_first_dimension( hist, - xname="x", - yname="y", - short_keys=True, - convert_time_index=True, - filter_empty_split_hists=True, + xname: str = "x", + yname: str = "y", + short_keys: bool = True, + convert_time_index: bool = True, + filter_empty_split_hists: bool = True, ): """Split (multi-dimensional) hist into sub-hists along x-axis diff --git a/popmon/io/file_reader.py b/popmon/io/file_reader.py index 99175382..f33b1308 100644 --- a/popmon/io/file_reader.py +++ b/popmon/io/file_reader.py @@ -37,7 +37,7 @@ def __init__( file_path: str | Path, apply_func: Callable | None = None, **kwargs, - ): + ) -> None: """Initialize an instance. :param str store_key: key of input data to be stored in the datastore diff --git a/popmon/io/file_writer.py b/popmon/io/file_writer.py index e80db281..2ead8e52 100644 --- a/popmon/io/file_writer.py +++ b/popmon/io/file_writer.py @@ -39,7 +39,7 @@ def __init__( file_path: str | Path | None = None, apply_func: Callable | None = None, **kwargs, - ): + ) -> None: """Initialize an instance. :param str read_key: key of input histogram-dict to read from data store diff --git a/popmon/io/json_reader.py b/popmon/io/json_reader.py index e7f4a3be..811a6317 100644 --- a/popmon/io/json_reader.py +++ b/popmon/io/json_reader.py @@ -28,7 +28,7 @@ class JsonReader(FileReader): """Read json file's contents into the datastore.""" - def __init__(self, file_path: str | Path, store_key: str): + def __init__(self, file_path: str | Path, store_key: str) -> None: """Initialize an instance. :param str store_key: key of input data to be stored in the datastore diff --git a/popmon/pipeline/amazing_pipeline.py b/popmon/pipeline/amazing_pipeline.py index 8c26ab06..52c84715 100644 --- a/popmon/pipeline/amazing_pipeline.py +++ b/popmon/pipeline/amazing_pipeline.py @@ -27,7 +27,7 @@ class AmazingPipeline(Pipeline): - def __init__(self, histogram_path: str, **kwargs): + def __init__(self, histogram_path: str, **kwargs) -> None: modules = [ JsonReader(file_path=histogram_path, store_key=kwargs["hists_key"]), # Or ExternalReference, RollingReference etc. @@ -36,7 +36,7 @@ def __init__(self, histogram_path: str, **kwargs): super().__init__(modules) -def run(): +def run() -> None: """Example that run self-reference pipeline and produces monitoring report""" logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s [%(module)s]: %(message)s" diff --git a/popmon/pipeline/metrics.py b/popmon/pipeline/metrics.py index 19ecb1ca..33e767be 100644 --- a/popmon/pipeline/metrics.py +++ b/popmon/pipeline/metrics.py @@ -74,9 +74,9 @@ def stability_metrics( def df_stability_metrics( df, - settings: Settings = None, + settings: Settings | None = None, time_width=None, - time_offset=0, + time_offset: int = 0, var_dtype=None, reference=None, **kwargs, diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py index bee1f270..258b3edf 100644 --- a/popmon/pipeline/metrics_pipelines.py +++ b/popmon/pipeline/metrics_pipelines.py @@ -73,7 +73,7 @@ def get_metrics_pipeline_class(reference_type, reference): def create_metrics_pipeline( settings: Settings, reference=None, - hists_key="hists", + hists_key: str = "hists", **kwargs, ): # configuration and datastore for report pipeline @@ -214,7 +214,7 @@ def __init__( self, settings: Settings, hists_key, - ): + ) -> None: """Example metrics pipeline for comparing test data with itself (full test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -269,9 +269,9 @@ class ExternalReferenceMetricsPipeline(Pipeline): def __init__( self, settings: Settings, - hists_key="test_hists", - ref_hists_key="ref_hists", - ): + hists_key: str = "test_hists", + ref_hists_key: str = "ref_hists", + ) -> None: """Example metrics pipeline for comparing test data with other (full) external reference set :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -332,8 +332,8 @@ class RollingReferenceMetricsPipeline(Pipeline): def __init__( self, settings: Settings, - hists_key="test_hists", - ): + hists_key: str = "test_hists", + ) -> None: """Example metrics pipeline for comparing test data with itself (rolling test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -391,8 +391,8 @@ class ExpandingReferenceMetricsPipeline(Pipeline): def __init__( self, settings: Settings, - hists_key="test_hists", - ): + hists_key: str = "test_hists", + ) -> None: """Example metrics pipeline for comparing test data with itself (expanding test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py index 865da325..fe29b925 100644 --- a/popmon/pipeline/report.py +++ b/popmon/pipeline/report.py @@ -80,9 +80,9 @@ def stability_report( def df_stability_report( df, - settings: Settings = None, + settings: Settings | None | None = None, time_width=None, - time_offset=0, + time_offset: int = 0, var_dtype=None, reference=None, split=None, @@ -210,7 +210,7 @@ class StabilityReport: as a HTML string, HTML file or Jupyter notebook's cell output. """ - def __init__(self, datastore, read_key="html_report"): + def __init__(self, datastore, read_key: str = "html_report") -> None: """Initialize an instance of StabilityReport. :param str read_key: key of HTML report data to read from data store. default is html_report. @@ -232,11 +232,11 @@ def _repr_html_(self): return display(self.to_notebook_iframe()) - def __repr__(self): + def __repr__(self) -> str: """Override so that Jupyter Notebook does not print the object.""" return "" - def to_html(self, escape=False): + def to_html(self, escape: bool = False): """HTML code representation of the report (represented as a string). :param bool escape: escape characters which could conflict with other HTML code. default: False @@ -249,7 +249,7 @@ def to_html(self, escape=False): return html.escape(self.html_report) return self.html_report - def to_file(self, filename): + def to_file(self, filename) -> None: """Store HTML report in the local file system. :param str filename: filename for the HTML report @@ -257,7 +257,7 @@ def to_file(self, filename): with open(filename, "w+") as file: file.write(self.to_html()) - def to_notebook_iframe(self, width="100%", height="100%"): + def to_notebook_iframe(self, width: str = "100%", height: str = "100%"): """HTML representation of the class (report) embedded in an iframe. :param str width: width of the frame to be shown @@ -280,7 +280,7 @@ def regenerate( self, store_key: str = "html_report", sections_key: str = "report_sections", - settings: Settings = None, + settings: Settings | None = None, ): """Regenerate HTML report with different plot settings :param str sections_key: key to store sections data in the datastore. default is 'report_sections'. diff --git a/popmon/pipeline/report_pipelines.py b/popmon/pipeline/report_pipelines.py index 268507d6..2c8b8c3a 100644 --- a/popmon/pipeline/report_pipelines.py +++ b/popmon/pipeline/report_pipelines.py @@ -66,7 +66,7 @@ def __init__( self, settings: Settings, hists_key: str = "test_hists", - ): + ) -> None: """Example pipeline for comparing test data with itself (full test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -93,7 +93,7 @@ def __init__( settings: Settings, hists_key: str = "test_hists", ref_hists_key: str = "ref_hists", - ): + ) -> None: """Example pipeline for comparing test data with other (full) external reference set :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -122,7 +122,7 @@ def __init__( self, settings: Settings, hists_key: str = "test_hists", - ): + ) -> None: """Example pipeline for comparing test data with itself (rolling test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -148,7 +148,7 @@ def __init__( self, settings: Settings, hists_key: str = "test_hists", - ): + ) -> None: """Example pipeline for comparing test data with itself (expanding test set) :param str hists_key: key to test histograms in datastore. default is 'test_hists' @@ -177,7 +177,7 @@ def __init__( settings: Settings, sections_key: str = "report_sections", store_key: str = "html_report", - ): + ) -> None: """Initialize an instance of Report. :param Settings settings: the configuration object @@ -244,7 +244,7 @@ def __init__( ] if ( isinstance(settings.report.report_filepath, (str, Path)) - and len(settings.report.report_filepath) > 0 + and len(str(settings.report.report_filepath)) > 0 ): modules.append( FileWriter(store_key, file_path=settings.report.report_filepath) diff --git a/popmon/pipeline/timing.py b/popmon/pipeline/timing.py index 95079214..7e6f37c8 100644 --- a/popmon/pipeline/timing.py +++ b/popmon/pipeline/timing.py @@ -31,7 +31,7 @@ def __init__( self, store_key: str, **kwargs, - ): + ) -> None: """Initialize an instance. :param str store_key: key of input data to be stored in the datastore diff --git a/popmon/stats/numpy.py b/popmon/stats/numpy.py index cc41eea1..cb994572 100644 --- a/popmon/stats/numpy.py +++ b/popmon/stats/numpy.py @@ -22,7 +22,7 @@ import pandas as pd -def mean(a, weights=None, axis=None, dtype=None, keepdims=False, ddof=0): +def mean(a, weights=None, axis=None, dtype=None, keepdims: bool = False, ddof: int = 0): """ Compute the weighted mean along the specified axis. @@ -47,7 +47,7 @@ def mean(a, weights=None, axis=None, dtype=None, keepdims=False, ddof=0): ) -def std(a, weights=None, axis=None, dtype=None, ddof=0, keepdims=False): +def std(a, weights=None, axis=None, dtype=None, ddof: int = 0, keepdims: bool = False): """ Compute the weighted standard deviation along the specified axis. @@ -73,7 +73,7 @@ def std(a, weights=None, axis=None, dtype=None, ddof=0, keepdims=False): return np.sqrt(v) -def median(a, weights=None, axis=None, keepdims=False): +def median(a, weights=None, axis=None, keepdims: bool = False): """ Compute the weighted median along the specified axis. @@ -224,7 +224,7 @@ def covariance_multinomial_probability_distribution(entries): return covariance_matrix -def mad(a, c=0.6745, axis=0): +def mad(a, c=0.6745, axis: int = 0): """Median Absolute Deviation along given axis of an array mad = median(abs(a - median(a)))/c diff --git a/popmon/stitching/hist_stitcher.py b/popmon/stitching/hist_stitcher.py index be511a1b..9b677f41 100644 --- a/popmon/stitching/hist_stitcher.py +++ b/popmon/stitching/hist_stitcher.py @@ -33,13 +33,13 @@ class HistStitcher(Module): def __init__( self, - mode="add", + mode: str = "add", time_axis=None, time_bin_idx=None, read_key=None, delta_key=None, store_key=None, - ): + ) -> None: """Stitching histograms by first axis. :param str mode: options for histogram stitching: "add" or "replace". default is "add". @@ -64,7 +64,7 @@ def __init__( if self.mode not in self.allowed_modes: raise ValueError("mode should be either 'add' or 'replace'") - def get_description(self): + def get_description(self) -> str: return f"{self.mode}" def transform(self, hists_basis: dict, hists_delta: dict) -> dict: @@ -80,7 +80,7 @@ def stitch_histograms( hists_basis=None, hists_delta=None, hists_list=None, - time_axis="", + time_axis: str = "", time_bin_idx=None, ): """Stitching histograms by first axis. diff --git a/popmon/utils.py b/popmon/utils.py index 7364f643..e2242840 100644 --- a/popmon/utils.py +++ b/popmon/utils.py @@ -62,7 +62,7 @@ def filter_metrics( return metrics -def parallel(func: Callable, args_list, mode="args"): +def parallel(func: Callable, args_list, mode: str = "args"): """ Routine for parallel processing """ diff --git a/popmon/version.py b/popmon/version.py index 98da246a..8e21e55a 100644 --- a/popmon/version.py +++ b/popmon/version.py @@ -1 +1,20 @@ +# Copyright (c) 2023 ING Analytics Wholesale Banking +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + version = "1.4.4" diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py index 9bd7747b..8afa0e9b 100644 --- a/popmon/visualization/alert_section_generator.py +++ b/popmon/visualization/alert_section_generator.py @@ -47,10 +47,10 @@ def __init__( ignore_features=None, static_bounds=None, dynamic_bounds=None, - prefix="traffic_light_", + prefix: str = "traffic_light_", suffices=None, ignore_stat_endswith=None, - ): + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py index 87c128c7..a50b95c9 100644 --- a/popmon/visualization/histogram_section.py +++ b/popmon/visualization/histogram_section.py @@ -53,8 +53,8 @@ def __init__( features=None, ignore_features=None, hist_names=None, - hist_name_starts_with="histogram", - ): + hist_name_starts_with: str = "histogram", + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting @@ -187,10 +187,7 @@ def transform(self, data_obj: dict, sections: list | None = None): histogram = [] # filter out potential empty heatmap plots, then prepend them to the sorted histograms - hplots = [] - for h in heatmaps: - if isinstance(h, dict) and len(h["plot"]): - hplots.append(h) + hplots = [h for h in heatmaps if isinstance(h, dict) and len(h["plot"])] if len(hplots) > 0: plot_type_layouts["heatmap"] = hplots[0]["layout"] @@ -215,7 +212,7 @@ def transform(self, data_obj: dict, sections: list | None = None): return sections -def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000): +def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins: int = 1000): """Split off plot histogram generation to allow for parallel processing :param str feature: feature diff --git a/popmon/visualization/overview_section.py b/popmon/visualization/overview_section.py index 2b02ebcc..af403177 100644 --- a/popmon/visualization/overview_section.py +++ b/popmon/visualization/overview_section.py @@ -54,10 +54,10 @@ def __init__( ignore_features=None, static_bounds=None, dynamic_bounds=None, - prefix="traffic_light_", + prefix: str = "traffic_light_", suffices=None, ignore_stat_endswith=None, - ): + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting diff --git a/popmon/visualization/report_generator.py b/popmon/visualization/report_generator.py index f9b8c791..9c4ac232 100644 --- a/popmon/visualization/report_generator.py +++ b/popmon/visualization/report_generator.py @@ -34,7 +34,7 @@ class ReportGenerator(Module): _input_keys = ("read_key",) _output_keys = ("store_key",) - def __init__(self, read_key, store_key, settings: Report): + def __init__(self, read_key, store_key, settings: Report) -> None: """Initialize an instance of ReportGenerator. :param str read_key: key of input sections data to read from the datastore @@ -48,7 +48,7 @@ def __init__(self, read_key, store_key, settings: Report): self.online_report = settings.online_report self.tl_colors = settings.tl_colors - def get_description(self): + def get_description(self) -> str: return "HTML Report" def transform(self, sections: list) -> str: diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py index 7b58b1c7..d204f826 100644 --- a/popmon/visualization/section_generator.py +++ b/popmon/visualization/section_generator.py @@ -54,7 +54,7 @@ } -def get_stat_description(name: str): +def get_stat_description(name: str) -> str: """Gets the description of a statistic. :param str name: the name of the statistic. @@ -70,8 +70,8 @@ def get_stat_description(name: str): if name in "mean_trend10_zscore": return "Significance of (rolling) trend in means of features" - head, *tail = name.split("_") - tail = "_".join(tail) + head, *tails = name.split("_") + tail = "_".join(tails) if tail in comparisons and head in references: return comparisons[tail] @@ -98,11 +98,11 @@ def __init__( ignore_features=None, static_bounds=None, dynamic_bounds=None, - prefix="traffic_light_", + prefix: str = "traffic_light_", suffices=None, ignore_stat_endswith=None, - description="", - ): + description: str = "", + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting diff --git a/popmon/visualization/templates/card.html b/popmon/visualization/templates/card.html index 0fbb6e00..b86ba207 100644 --- a/popmon/visualization/templates/card.html +++ b/popmon/visualization/templates/card.html @@ -12,7 +12,7 @@

{{metric.name | fmt_metric}}

{%- if metric.type in ['traffic_light', 'alert'] -%} - {{ metric.plot }} + {{ metric.plot | safe }} {%- else -%}
{%- include 'header.html' -%} - {{ sections }} + {{ sections | safe }} {%- include 'footer.html' -%} \ No newline at end of file diff --git a/popmon/visualization/templates/section.html b/popmon/visualization/templates/section.html index 6c505b2b..207abd14 100644 --- a/popmon/visualization/templates/section.html +++ b/popmon/visualization/templates/section.html @@ -25,7 +25,7 @@

{{ section_title }}

@@ -57,7 +57,7 @@

{{ feature.titles.get(ref, ref) }}

{%- for metric in plots -%} {%- with metric=metric -%} {%- include 'card.html' -%} diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py index 53bd505f..3c073a87 100644 --- a/popmon/visualization/traffic_light_section_generator.py +++ b/popmon/visualization/traffic_light_section_generator.py @@ -50,10 +50,10 @@ def __init__( ignore_features=None, static_bounds=None, dynamic_bounds=None, - prefix="traffic_light_", + prefix: str = "traffic_light_", suffices=None, ignore_stat_endswith=None, - ): + ) -> None: """Initialize an instance of SectionGenerator. :param str read_key: key of input data to read from the datastore and use for plotting @@ -173,7 +173,7 @@ def _plot_metrics( skip_first_n, skip_last_n, tl_colors, - style="heatmap", + style: str = "heatmap", ): # prune dates and values dates = _prune(dates, last_n, skip_first_n, skip_last_n) diff --git a/popmon/visualization/utils.py b/popmon/visualization/utils.py index 38aa82a3..06b05e1c 100644 --- a/popmon/visualization/utils.py +++ b/popmon/visualization/utils.py @@ -324,7 +324,7 @@ def plot_bars( def plot_traffic_lights_overview(feature, data, metrics: list[str], labels: list[str]): - colors = defaultdict(dict) + colors: defaultdict[str, dict[str, list[str]]] = defaultdict(dict) color_map = ["g", "y", "r"] for c1, metric in enumerate(metrics): for c2, label in enumerate(labels): @@ -365,7 +365,7 @@ def plot_traffic_lights_alerts_aggregate( yellow = hex_to_rgb(tl_colors["yellow"]) red = hex_to_rgb(tl_colors["red"]) - colors = defaultdict(dict) + colors: defaultdict[str, dict[str, list[str]]] = defaultdict(dict) for c1, metric in enumerate(metrics): row_max = np.max(data[c1]) for c2, label in enumerate(labels): @@ -437,11 +437,11 @@ def histogram_basic_checks(plots=None): def plot_histogram_overlay( plots=None, - is_num=True, - is_ts=False, - is_static_reference=True, - top=20, - n_choices=2, + is_num: bool = True, + is_ts: bool = False, + is_static_reference: bool = True, + top: int = 20, + n_choices: int = 2, ): """Create and plot (overlapping/grouped) histogram(s) of column values. @@ -796,7 +796,7 @@ def plot_heatmap( } -def _prune(values, last_n=0, skip_first_n=0, skip_last_n=0): +def _prune(values, last_n: int = 0, skip_first_n: int = 0, skip_last_n: int = 0): """inline function to select first or last items of input list :param values: input list to select from diff --git a/pyproject.toml b/pyproject.toml index 61b0083b..fb9db0e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,8 @@ dependencies = [ "plotly>=5.8.0", "joblib>=0.14.0", "htmlmin", - "pydantic", + "pydantic>=2", + "pydantic-settings", "typing_extensions" ] classifiers = [ @@ -75,7 +76,7 @@ popmon_run = "popmon.pipeline.amazing_pipeline:run" target-version = "py37" line-length = 120 namespace-packages = ["popmon"] -select = ["ALL"] +select = ["ALL", "CPY001"] ignore = [ # COM812 is incompatible with COM819 "COM", @@ -105,6 +106,10 @@ ignore = [ "TRY003", "E501", "DTZ", + "PERF203", # `try`-`except` within a loop incurs performance overhead + + # False positive + "PERF401", # Use a list comprehension to create a transformed list # Prefer autofix "PD011", # .to_numpy() instead of values @@ -139,6 +144,8 @@ ignore = [ "PT011", # Too many statements "PLR0915", + # Copyright + "CPY001", ] # Sphinx config @@ -151,6 +158,13 @@ ignore = [ # implicit namespaces "INP001", + + # Copyright + "CPY001", +] + +"popmon/config.py" = [ + "RUF012", # Mutable class attributes should be annotated with `typing.ClassVar` ] # Notebooks & NBQA @@ -167,10 +181,17 @@ ignore = [ # Found useless expression. Either assign it to a variable or remove it. "B018", + + # Copyright + "CPY001", ] +[tool.ruff.flake8-copyright] +notice-rgx = """(?mis)Copyright \\(c\\) 2023 ING Analytics Wholesale Banking.+""" + [tool.pytest.ini_options] markers = ["spark"] +#filterwarnings = ["error"] [tool.pytest.ini_options.spark_options] "spark.executor.id" = "driver" diff --git a/tests/popmon/analysis/test_hist_numpy.py b/tests/popmon/analysis/test_hist_numpy.py index 03b72ecc..d33da9de 100644 --- a/tests/popmon/analysis/test_hist_numpy.py +++ b/tests/popmon/analysis/test_hist_numpy.py @@ -30,7 +30,7 @@ def get_test_histograms1(): """Get set 1 of test histograms""" # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 - df = pd.util.testing.makeMixedDataFrame() + df = pd._testing.makeMixedDataFrame() df["date"] = df["D"].apply(to_ns) df["boolT"] = True df["boolF"] = False @@ -56,7 +56,7 @@ def get_test_histograms2(): """Get set 2 of test histograms""" # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 - df = pd.util.testing.makeMixedDataFrame() + df = pd._testing.makeMixedDataFrame() # building 1d-, 2d-histogram (iteratively) hist1 = hg.Categorize(unit("C")) @@ -351,7 +351,7 @@ def test_check_similar_hists(): """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 - df = pd.util.testing.makeMixedDataFrame() + df = pd._testing.makeMixedDataFrame() df["date"] = df["D"].apply(to_ns) # building 1d-, 2d-, and 3d-histogram (iteratively) @@ -391,7 +391,7 @@ def test_assert_similar_hists(): """ # dummy dataset with mixed types # convert timestamp (col D) to nanosec since 1970-1-1 - df = pd.util.testing.makeMixedDataFrame() + df = pd._testing.makeMixedDataFrame() df["date"] = df["D"].apply(to_ns) # building 1d-, 2d-, and 3d-histogram (iteratively) diff --git a/tests/popmon/base/test_module.py b/tests/popmon/base/test_module.py index ac9bfd19..b7f7dfdb 100644 --- a/tests/popmon/base/test_module.py +++ b/tests/popmon/base/test_module.py @@ -8,7 +8,7 @@ class Scaler(Module): _input_keys = ("input_key",) _output_keys = ("output_key",) - def __init__(self, input_key, output_key, mean, std): + def __init__(self, input_key, output_key, mean, std) -> None: super().__init__() self.input_key = input_key self.output_key = output_key diff --git a/tests/popmon/base/test_pipeline.py b/tests/popmon/base/test_pipeline.py index 75453d90..d7e4b38b 100644 --- a/tests/popmon/base/test_pipeline.py +++ b/tests/popmon/base/test_pipeline.py @@ -10,7 +10,7 @@ class LogTransformer(Module): _input_keys = ("input_key",) _output_keys = ("output_key",) - def __init__(self, input_key, output_key): + def __init__(self, input_key, output_key) -> None: super().__init__() self.input_key = input_key self.output_key = output_key @@ -25,7 +25,7 @@ class PowerTransformer(Module): _input_keys = ("input_key",) _output_keys = ("output_key",) - def __init__(self, input_key, output_key, power): + def __init__(self, input_key, output_key, power) -> None: super().__init__() self.input_key = input_key self.output_key = output_key @@ -40,7 +40,7 @@ class SumNormalizer(Module): _input_keys = ("input_key",) _output_keys = ("output_key",) - def __init__(self, input_key, output_key): + def __init__(self, input_key, output_key) -> None: super().__init__() self.input_key = input_key self.output_key = output_key @@ -54,7 +54,7 @@ class WeightedSum(Module): _input_keys = ("input_key", "weight_key") _output_keys = ("output_key",) - def __init__(self, input_key, weight_key, output_key): + def __init__(self, input_key, weight_key, output_key) -> None: super().__init__() self.input_key = input_key self.weight_key = weight_key diff --git a/tests/popmon/hist/test_histogram.py b/tests/popmon/hist/test_histogram.py index c2a1888a..18d83e17 100644 --- a/tests/popmon/hist/test_histogram.py +++ b/tests/popmon/hist/test_histogram.py @@ -15,7 +15,7 @@ def get_test_data(): - df = pd.util.testing.makeMixedDataFrame() + df = pd._testing.makeMixedDataFrame() df["date"] = df["D"].apply(lambda x: pd.to_datetime(x).value) return df diff --git a/tests/popmon/pipeline/test_metrics.py b/tests/popmon/pipeline/test_metrics.py index 567db9aa..bf747363 100644 --- a/tests/popmon/pipeline/test_metrics.py +++ b/tests/popmon/pipeline/test_metrics.py @@ -42,7 +42,9 @@ def test_df_stability_metrics(): # generate metrics directly from dataframe bin_specs = { "date": { - "bin_width": pd.Timedelta("1y").value, + # Note that 2000 is a leap year (366 days), timedelta cannot account for this + # if this is important for the analysis, use an IDs for each year + "bin_width": pd.Timedelta("365d").value, "bin_offset": pd.Timestamp("2000-1-1").value, }, "latitude": {"bin_width": 5.0, "bin_offset": 0.0}, diff --git a/tests/popmon/pipeline/test_report.py b/tests/popmon/pipeline/test_report.py index 33caa521..42cb6c0c 100644 --- a/tests/popmon/pipeline/test_report.py +++ b/tests/popmon/pipeline/test_report.py @@ -42,7 +42,9 @@ def test_df_stability_report(): features = ["date:isActive", "date:eyeColor", "date:latitude"] bin_specs = { "date": { - "bin_width": pd.Timedelta("1y").value, + # Note that 2000 is a leap year (366 days), timedelta cannot account for this + # if this is important for the analysis, use an IDs for each year + "bin_width": pd.Timedelta("365d").value, "bin_offset": pd.Timestamp("2000-1-1").value, }, "latitude": {"bin_width": 5.0, "bin_offset": 0.0}, @@ -69,7 +71,9 @@ def test_df_stability_report(): def test_df_stability_report_self(): - time_width = "1y" + # Note that 2020 is a leap year (366 days), timedelta cannot account for this + # if this is important for the analysis, use an IDs for each year + time_width = "365d" time_offset = "2020-1-1" # generate report directly from dataframe