diff --git a/MANIFEST.in b/MANIFEST.in index 88bdd08d..6c3f4ebd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include requirements.txt include LICENSE -include NOTICE \ No newline at end of file +include NOTICE +include extras.json \ No newline at end of file diff --git a/extras.json b/extras.json new file mode 100644 index 00000000..35033643 --- /dev/null +++ b/extras.json @@ -0,0 +1 @@ +{"diptest": ["diptest"]} \ No newline at end of file diff --git a/popmon/__init__.py b/popmon/__init__.py index ecd13edb..2e770a6f 100644 --- a/popmon/__init__.py +++ b/popmon/__init__.py @@ -29,6 +29,7 @@ from popmon import decorators from .config import Settings +from .extensions import extensions from .pipeline.metrics import df_stability_metrics, stability_metrics from .pipeline.report import df_stability_report, stability_report from .stitching import stitch_histograms @@ -46,4 +47,5 @@ "stitch_histograms", "__version__", "Settings", + "extensions", ] diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py index 53d51714..e2db9148 100644 --- a/popmon/analysis/profiling/hist_profiler.py +++ b/popmon/analysis/profiling/hist_profiler.py @@ -95,9 +95,15 @@ def _profile_1d_histogram(self, name, hist): # calc 1d-histogram statistics profile = {} - args = [bin_labels, bin_counts] + if otype == "cat": + args = [bin_labels, bin_counts] + else: + bin_width = hist.bin_width() + args = [bin_labels, bin_counts, bin_width] profile.update(Profiles.run(args, dim=1, htype=otype)) + + args = [bin_labels, bin_counts] profile.update(Profiles.run(args, dim=1, htype="all")) # difference between htype=None and htype="all" are arguments (bin labels vs hist) diff --git a/popmon/analysis/profiling/profiles.py b/popmon/analysis/profiling/profiles.py index 2f9970e8..1a348b1c 100644 --- a/popmon/analysis/profiling/profiles.py +++ b/popmon/analysis/profiling/profiles.py @@ -45,7 +45,7 @@ dim=1, htype="num", ) -def profile_quantiles(x, w): +def profile_quantiles(x, w, bin_width): return tuple( pm_np.quantile( x, q=[0.0, 1.0, 0.01, 0.05, 0.16, 0.50, 0.84, 0.95, 0.99], weights=w @@ -54,12 +54,12 @@ def profile_quantiles(x, w): @Profiles.register(key="mean", description="Mean value", dim=1, htype="num") -def profile_mean(x, w): +def profile_mean(x, w, bin_width): return pm_np.mean(x, w) @Profiles.register(key="std", description="Standard deviation", dim=1, htype="num") -def profile_std(x, w): +def profile_std(x, w, bin_width): return pm_np.std(x, w) diff --git a/popmon/extensions/__init__.py b/popmon/extensions/__init__.py new file mode 100644 index 00000000..cef93965 --- /dev/null +++ b/popmon/extensions/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +from popmon.extensions.profile_diptest import Diptest + +extensions = [Diptest()] +for extension in extensions: + extension.check() diff --git a/popmon/extensions/extension.py b/popmon/extensions/extension.py new file mode 100644 index 00000000..c7892e91 --- /dev/null +++ b/popmon/extensions/extension.py @@ -0,0 +1,43 @@ +# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +import importlib.util +from typing import Callable, List + + +def is_installed(package): + is_present = importlib.util.find_spec(package) + return is_present is not None + + +class Extension: + name: str + requirements: List[str] + extension: Callable + + @property + def extras(self): + return {self.name: self.requirements} + + def check(self): + if all(is_installed(package) for package in self.requirements): + func = self.extension + func = func.__func__ + func() diff --git a/popmon/extensions/profile_diptest.py b/popmon/extensions/profile_diptest.py new file mode 100644 index 00000000..22ec8460 --- /dev/null +++ b/popmon/extensions/profile_diptest.py @@ -0,0 +1,71 @@ +# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +"""Hartigan & Hartigan's dip test for unimodality + +How to enable this extension: + - Install te diptest package: `pip install diptest` or `pip install popmon[diptest]` + - To show the diptest values in your report: + settings.report.show_stats.append("diptest*") + OR + settings.report.extended_report = True + +""" +import numpy as np + +from popmon.analysis import Profiles +from popmon.extensions.extension import Extension + + +def extension(): + from diptest import diptest + + @Profiles.register( + key=["diptest_value", "diptest_pvalue"], + description=[ + "diptest value for Hartigan & Hartigan's test for unimodality", + "p-value for the diptest", + ], + dim=1, + htype="num", + ) + def diptest_profile(bin_centers, bin_values, bin_width, rng=None): + if rng is None: + rng = np.random.default_rng() + + counts = bin_values.astype(int) + n = counts.sum() + hbw = bin_width / 2 + + # unpack histogram into ordered samples + sample = np.repeat(bin_centers, counts) + + # uniform noise + sample_noise = sample + rng.uniform(-hbw, hbw, n) + + # compute diptest + dip, pval = diptest(sample_noise) + return dip, pval + + +class Diptest(Extension): + name = "diptest" + requirements = ["diptest"] + extension = extension diff --git a/popmon/extensions/utils.py b/popmon/extensions/utils.py new file mode 100644 index 00000000..fb5ea54a --- /dev/null +++ b/popmon/extensions/utils.py @@ -0,0 +1,45 @@ +# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import json +from pathlib import Path + +from popmon.extensions import extensions + + +def get_extras(): + """Obtain extras from extensions""" + extras = {} + for extension in extensions: + extras.update(extension.extras) + + return extras + + +def write_extras(): + """Write extras to extras.json for setup.py""" + extras = get_extras() + file_path = Path(__file__).parent.parent.parent / "extras.json" + + with file_path.open("w") as f: + json.dump(extras, f) + + +if __name__ == "__main__": + write_extras() diff --git a/setup.py b/setup.py index 65a9e2bf..2485d02b 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +import json + from setuptools import find_packages, setup __version__ = "1.0.0" @@ -9,6 +11,10 @@ with open("README.rst", encoding="utf-8") as f: long_description = f.read() +# read dynamically generated extras from json file +with open("extras.json") as f: + EXTRAS = json.loads(f.read()) + def setup_package() -> None: """The main setup method. @@ -28,6 +34,7 @@ def setup_package() -> None: python_requires=">=3.6", packages=find_packages(), install_requires=REQUIREMENTS, + extras_require=EXTRAS, classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License",