Skip to content

Commit

Permalink
feat: extension functionality + diptest implementation @RUrlus
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed Aug 19, 2022
1 parent 3a29135 commit 8487991
Show file tree
Hide file tree
Showing 10 changed files with 206 additions and 5 deletions.
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
include requirements.txt
include LICENSE
include NOTICE
include NOTICE
include extras.json
1 change: 1 addition & 0 deletions extras.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"diptest": ["diptest"]}
2 changes: 2 additions & 0 deletions popmon/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from popmon import decorators

from .config import Settings
from .extensions import extensions
from .pipeline.metrics import df_stability_metrics, stability_metrics
from .pipeline.report import df_stability_report, stability_report
from .stitching import stitch_histograms
Expand All @@ -46,4 +47,5 @@
"stitch_histograms",
"__version__",
"Settings",
"extensions",
]
8 changes: 7 additions & 1 deletion popmon/analysis/profiling/hist_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,15 @@ def _profile_1d_histogram(self, name, hist):

# calc 1d-histogram statistics
profile = {}
args = [bin_labels, bin_counts]
if otype == "cat":
args = [bin_labels, bin_counts]
else:
bin_width = hist.bin_width()
args = [bin_labels, bin_counts, bin_width]

profile.update(Profiles.run(args, dim=1, htype=otype))

args = [bin_labels, bin_counts]
profile.update(Profiles.run(args, dim=1, htype="all"))

# difference between htype=None and htype="all" are arguments (bin labels vs hist)
Expand Down
6 changes: 3 additions & 3 deletions popmon/analysis/profiling/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
dim=1,
htype="num",
)
def profile_quantiles(x, w):
def profile_quantiles(x, w, bin_width):
return tuple(
pm_np.quantile(
x, q=[0.0, 1.0, 0.01, 0.05, 0.16, 0.50, 0.84, 0.95, 0.99], weights=w
Expand All @@ -54,12 +54,12 @@ def profile_quantiles(x, w):


@Profiles.register(key="mean", description="Mean value", dim=1, htype="num")
def profile_mean(x, w):
def profile_mean(x, w, bin_width):
return pm_np.mean(x, w)


@Profiles.register(key="std", description="Standard deviation", dim=1, htype="num")
def profile_std(x, w):
def profile_std(x, w, bin_width):
return pm_np.std(x, w)


Expand Down
25 changes: 25 additions & 0 deletions popmon/extensions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


from popmon.extensions.profile_diptest import Diptest

extensions = [Diptest()]
for extension in extensions:
extension.check()
43 changes: 43 additions & 0 deletions popmon/extensions/extension.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


import importlib.util
from typing import Callable, List


def is_installed(package):
is_present = importlib.util.find_spec(package)
return is_present is not None


class Extension:
name: str
requirements: List[str]
extension: Callable

@property
def extras(self):
return {self.name: self.requirements}

def check(self):
if all(is_installed(package) for package in self.requirements):
func = self.extension
func = func.__func__
func()
71 changes: 71 additions & 0 deletions popmon/extensions/profile_diptest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


"""Hartigan & Hartigan's dip test for unimodality
How to enable this extension:
- Install te diptest package: `pip install diptest` or `pip install popmon[diptest]`
- To show the diptest values in your report:
settings.report.show_stats.append("diptest*")
OR
settings.report.extended_report = True
"""
import numpy as np

from popmon.analysis import Profiles
from popmon.extensions.extension import Extension


def extension():
from diptest import diptest

@Profiles.register(
key=["diptest_value", "diptest_pvalue"],
description=[
"diptest value for Hartigan & Hartigan's test for unimodality",
"p-value for the diptest",
],
dim=1,
htype="num",
)
def diptest_profile(bin_centers, bin_values, bin_width, rng=None):
if rng is None:
rng = np.random.default_rng()

counts = bin_values.astype(int)
n = counts.sum()
hbw = bin_width / 2

# unpack histogram into ordered samples
sample = np.repeat(bin_centers, counts)

# uniform noise
sample_noise = sample + rng.uniform(-hbw, hbw, n)

# compute diptest
dip, pval = diptest(sample_noise)
return dip, pval


class Diptest(Extension):
name = "diptest"
requirements = ["diptest"]
extension = extension
45 changes: 45 additions & 0 deletions popmon/extensions/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import json
from pathlib import Path

from popmon.extensions import extensions


def get_extras():
"""Obtain extras from extensions"""
extras = {}
for extension in extensions:
extras.update(extension.extras)

return extras


def write_extras():
"""Write extras to extras.json for setup.py"""
extras = get_extras()
file_path = Path(__file__).parent.parent.parent / "extras.json"

with file_path.open("w") as f:
json.dump(extras, f)


if __name__ == "__main__":
write_extras()
7 changes: 7 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

from setuptools import find_packages, setup

__version__ = "1.0.0"
Expand All @@ -9,6 +11,10 @@
with open("README.rst", encoding="utf-8") as f:
long_description = f.read()

# read dynamically generated extras from json file
with open("extras.json") as f:
EXTRAS = json.loads(f.read())


def setup_package() -> None:
"""The main setup method.
Expand All @@ -28,6 +34,7 @@ def setup_package() -> None:
python_requires=">=3.6",
packages=find_packages(),
install_requires=REQUIREMENTS,
extras_require=EXTRAS,
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
Expand Down

0 comments on commit 8487991

Please sign in to comment.