Skip to content

Commit

Permalink
Merge pull request #254 from ing-bank/develop
Browse files Browse the repository at this point in the history
Release
  • Loading branch information
sbrugman authored Sep 9, 2022
2 parents 1a3fd44 + a82d729 commit f8633aa
Show file tree
Hide file tree
Showing 13 changed files with 38 additions and 90 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
- name: Merge master back to dev
run: |
git fetch --unshallow
git checkout dev
git checkout develop
git pull
git merge --no-ff master -m "chore: auto-merge master back to develop"
git push
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: 22.6.0
rev: 22.8.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
Expand Down
2 changes: 2 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ Presentations
+------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+-------------------+-------------------------+
| Title | Host | Date | Speaker |
+------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+-------------------+-------------------------+
| popmon: Analysis Package for Dataset Shift Detection | `SciPy Conference 2022 <https://www.scipy2022.scipy.org/>`_ | July 13, 2022 | Simon Brugman |
+------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+-------------------+-------------------------+
| Popmon - population monitoring made easy | `Big Data Technology Warsaw Summit 2021 <https://bigdatatechwarsaw.eu/>`_ | February 25, 2021 | Simon Brugman |
+------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+-------------------+-------------------------+
| Popmon - population monitoring made easy | `Data Lunch @ Eneco <https://www.eneco.nl/>`_ | October 29, 2020 | Max Baak, Simon Brugman |
Expand Down
5 changes: 3 additions & 2 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
sphinx_rtd_theme
sphinx_rtd_theme>=1.0.0
myst_parser
sphinx_autodoc_typehints
sphinx_autodoc_typehints
docutils<0.17
2 changes: 1 addition & 1 deletion popmon/analysis/comparison/comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def unknown_labels(hist1, hist2):
labels1 = hist1.keySet
labels2 = hist2.keySet
subset = labels1 <= labels2
return int(not subset)
return not subset


@Comparisons.register(
Expand Down
21 changes: 1 addition & 20 deletions popmon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import warnings
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

import pandas as pd
from histogrammar.dfinterface.make_histograms import get_time_axes
from pydantic import BaseModel, BaseSettings
from pydantic.class_validators import validator
from typing_extensions import Literal

# Global configuration for the joblib parallelization. Could be used to change the number of jobs, and/or change
Expand All @@ -32,12 +30,6 @@
parallel_args = {"n_jobs": 1}


class ValidatedBaseModel(BaseModel):
class Config:
validate_all = True
validate_assignment = True


class ValidatedSettings(BaseSettings):
class Config:
validate_all = True
Expand Down Expand Up @@ -174,23 +166,12 @@ class Section(BaseModel):
"""Configuration related to the traffic lights section"""


class Report(ValidatedBaseModel):
class Report(BaseModel):
"""Report-specific configuration"""

title: str = "POPMON Report"
"""Report title in browser and navbar. May contain HTML."""

skip_empty_plots: bool = False
"""(deprecated) if false, also show empty plots in report with only nans or zeroes (optional)"""

@validator("skip_empty_plots")
def skip_empty_plots_deprecated(cls, v):
if v:
warnings.warn(
"The 'skip_empty_plots' parameter is deprecated and will be removed in the next release."
)
return v

last_n: int = 0
"""plot statistic data for last 'n' periods (optional)"""

Expand Down
3 changes: 1 addition & 2 deletions popmon/notebooks/popmon_tutorial_advanced.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,7 @@
"report_settings.report.last_n = 0\n",
"report_settings.report.skip_first_n = 0\n",
"report_settings.report.skip_last_n = 0\n",
"report_settings.report.section.histograms.plot_hist_n = 2\n",
"report_settings.report.skip_empty_plots = True\n",
"report_settings.report.section.histograms.plot_hist_n = 0\n",
"report_settings.report.report_filepath = None\n",
"\n",
"report.regenerate(\n",
Expand Down
9 changes: 2 additions & 7 deletions popmon/visualization/alert_section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def __init__(
self.last_n = settings.last_n
self.skip_first_n = settings.skip_first_n
self.skip_last_n = settings.skip_last_n
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats if not settings.extended_report else None

self.section_name = settings.section.alerts.name
Expand Down Expand Up @@ -110,9 +109,7 @@ def transform(
features = self.get_features(list(data_obj.keys()))
features_w_metrics = []

self.logger.info(
f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
)
self.logger.info(f'Generating section "{self.section_name}"')

for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
Expand Down Expand Up @@ -141,15 +138,13 @@ def transform(
0,
0,
0,
0,
self.tl_colors,
style="alerts",
)
]

# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]
plots = [e for e in plots if len(e["plot"])]

features_w_metrics.append(
{
Expand Down
23 changes: 5 additions & 18 deletions popmon/visualization/overview_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def __init__(
self.last_n = settings.last_n
self.skip_first_n = settings.skip_first_n
self.skip_last_n = settings.skip_last_n
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats if not settings.extended_report else None
self.section_name = settings.section.overview.name
self.description = settings.section.overview.description
Expand All @@ -104,9 +103,7 @@ def transform(

features = self.get_features(list(data_obj.keys()))

self.logger.info(
f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
)
self.logger.info(f'Generating section "{self.section_name}"')

values = {}
for feature in tqdm(features, ncols=100):
Expand All @@ -132,15 +129,11 @@ def transform(
self.last_n,
self.skip_first_n,
self.skip_last_n,
self.skip_empty_plots,
)

plots = [_plot_metrics(values)]

# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]

plots = [e for e in plots if len(e["plot"])]
plots = sorted(plots, key=lambda plot: plot["name"])

sections.append(
Expand Down Expand Up @@ -190,16 +183,10 @@ def _get_metrics(
last_n,
skip_first_n,
skip_last_n,
skip_empty,
):
values = []
nonempty_metrics = []
for metric in metrics:
value = _prune(df[metric], last_n, skip_first_n, skip_last_n)

if not skip_empty or np.sum(value) > 0:
values.append(value)
nonempty_metrics.append(metric)
values = [
_prune(df[metric], last_n, skip_first_n, skip_last_n) for metric in metrics
]

empty = {0: 0, 1: 0, 2: 0}
if len(values) > 0:
Expand Down
11 changes: 2 additions & 9 deletions popmon/visualization/section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ def __init__(
self.prefix = prefix
self.suffices = suffices
self.ignore_stat_endswith = ignore_stat_endswith or []
self.skip_empty_plots = settings.skip_empty_plots
self.description = description
self.show_stats = settings.show_stats if not settings.extended_report else None
self.primary_color = settings.primary_color
Expand All @@ -158,9 +157,7 @@ def transform(
features = self.get_features(list(data_obj.keys()))
features_w_metrics = []

self.logger.info(
f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
)
self.logger.info(f'Generating section "{self.section_name}"')

for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
Expand Down Expand Up @@ -193,7 +190,6 @@ def transform(
self.last_n,
self.skip_first_n,
self.skip_last_n,
self.skip_empty_plots,
self.primary_color,
self.tl_colors,
)
Expand All @@ -202,8 +198,7 @@ def transform(
plots = parallel(_plot_metric, args)

# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]
plots = [e for e in plots if len(e["plot"])]

layouts = ""
if len(plots) > 0:
Expand Down Expand Up @@ -262,7 +257,6 @@ def _plot_metric(
last_n,
skip_first_n,
skip_last_n,
skip_empty,
primary_color,
zline_color,
):
Expand Down Expand Up @@ -290,7 +284,6 @@ def _plot_metric(
labels=dates,
ylim=True,
bounds=bounds,
skip_empty=skip_empty,
primary_color=primary_color,
tl_colors=zline_color,
metric=metric,
Expand Down
26 changes: 8 additions & 18 deletions popmon/visualization/traffic_light_section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def __init__(
self.prefix = prefix
self.suffices = suffices
self.ignore_stat_endswith = ignore_stat_endswith or []
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats if not settings.extended_report else None

self.section_name = settings.section.traffic_lights.name
Expand All @@ -109,9 +108,7 @@ def transform(
features = self.get_features(list(data_obj.keys()))
features_w_metrics = []

self.logger.info(
f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
)
self.logger.info(f'Generating section "{self.section_name}"')

for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
Expand Down Expand Up @@ -140,14 +137,13 @@ def transform(
self.last_n,
self.skip_first_n,
self.skip_last_n,
self.skip_empty_plots,
tl_colors=self.tl_colors,
)
]

# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]
plots = [e for e in plots if len(e["plot"])]

features_w_metrics.append(
{
"name": feature,
Expand All @@ -174,34 +170,28 @@ def _plot_metrics(
last_n,
skip_first_n,
skip_last_n,
skip_empty,
tl_colors,
style="heatmap",
):
# prune dates and values
dates = _prune(dates, last_n, skip_first_n, skip_last_n)

values = []
nonempty_metrics = []
for metric in metrics:
value = _prune(df[metric], last_n, skip_first_n, skip_last_n)

if not skip_empty or np.sum(value) > 0:
values.append(value)
nonempty_metrics.append(metric)
values = [
_prune(df[metric], last_n, skip_first_n, skip_last_n) for metric in metrics
]

if len(values) > 0:
values = np.stack(values)

if style == "heatmap":
plot = plot_traffic_lights_overview(
feature, values, metrics=nonempty_metrics, labels=dates
feature, values, metrics=metrics, labels=dates
)
elif style == "alerts":
plot = plot_traffic_lights_alerts_aggregate(
feature,
values,
metrics=nonempty_metrics,
metrics=metrics,
labels=dates,
tl_colors=tl_colors,
)
Expand Down
20 changes: 10 additions & 10 deletions popmon/visualization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def plot_bars(
labels: List[str],
bounds: tuple,
ylim: bool,
skip_empty: bool,
primary_color: str,
tl_colors: Dict[str, str],
metric: str,
Expand All @@ -62,7 +61,6 @@ def plot_bars(
:param labels: common bin labels for all histograms. default is None.
:param bounds: traffic light bounds (y-coordinates). default is None.
:param ylim: place y-axis limits for zooming into the data. default is False.
:param skip_empty: if false, also plot empty plots with only nans or only zeroes. default is True.
:return: JSON plot image
:rtype: str
"""
Expand All @@ -72,14 +70,12 @@ def plot_bars(
raise ValueError("shape mismatch: x-axis labels do not match the data shape")

# skip plot generation for empty datasets
if skip_empty:
n_data = len(data)
n_zero = n_data - np.count_nonzero(data)
n_nan = pd.isnull(data).sum()
n_inf = np.sum([np.isinf(x) for x in data if isinstance(x, float)])
if n_nan + n_zero + n_inf == n_data:
logger.debug("skipping plot with empty data.")
return ""
n_data = len(data)
n_nan = pd.isnull(data).sum()
n_inf = np.sum([np.isinf(x) for x in data if isinstance(x, float)])
if n_nan + n_inf == n_data:
logger.debug("skipping plot with empty data.")
return ""

# plot bar
fig = go.Figure(
Expand Down Expand Up @@ -121,6 +117,10 @@ def plot_bars(
linecolor="black",
mirror=True,
)
fig.update_traces(
marker_line_color=primary_color,
marker_line_width=1,
)
# plot boundaries
try:
all_nan = (np.isnan(data)).all()
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
numpy>=1.18.0
pandas>=0.25.1
scipy>=1.5.2
histogrammar>=1.0.31
histogrammar>=1.0.32
phik
jinja2
tqdm
Expand Down

0 comments on commit f8633aa

Please sign in to comment.