Skip to content

Commit

Permalink
fix: histogram section
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed Jun 14, 2022
1 parent d3bb81c commit 6e93f25
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 12 deletions.
1 change: 0 additions & 1 deletion popmon/pipeline/report_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ def __init__(
hist_name_starts_with="histogram",
settings=settings.section.histograms,
top_n=settings.top_n,
last_n=settings.last_n,
),
# section showing all traffic light alerts of monitored statistics
TrafficLightSectionGenerator(
Expand Down
17 changes: 6 additions & 11 deletions popmon/visualization/histogram_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def __init__(
hist_names=None,
hist_name_starts_with="histogram",
top_n=None,
last_n=None,
):
"""Initialize an instance of SectionGenerator.
Expand All @@ -61,7 +60,6 @@ def __init__(
:param str section_name: key of output data to store in the datastore
:param list features: list of features to pick up from input data (optional)
:param list ignore_features: ignore list of features, if present (optional)
:param int last_n: plot histogram for last 'n' periods. default is 1 (optional)
:param int top_n: plot heatmap for top 'n' categories. default is 20 (optional)
:param list hist_names: list of histogram names to plot
:param str hist_name_starts_with: find histograms in case hist_names is empty. default is histogram.
Expand All @@ -77,7 +75,6 @@ def __init__(
self.hist_name_starts_with = hist_name_starts_with

self.top_n = top_n
self.last_n = last_n

# section specific
self.section_name = settings.name
Expand Down Expand Up @@ -106,9 +103,7 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
last_n = (
len(df.index)
if len(df.index) < self.last_n or self.last_n == 0
else self.last_n
len(df.index) if len(df.index) < self.plot_hist_n else self.plot_hist_n
)
hist_names = [hn for hn in self.hist_names if hn in df.columns]
if len(hist_names) == 0 and len(self.hist_name_starts_with) > 0:
Expand Down Expand Up @@ -148,8 +143,8 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
]

args = [
(feature, dates[-i], hists[-i], hist_names, self.top_n)
for i in range(self.plot_hist_n)
(feature, dates[i], hists[i], hist_names, self.top_n)
for i in range(last_n)
]
plots = parallel(_plot_histograms, args)

Expand Down Expand Up @@ -195,14 +190,14 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):
hist_names = [hn for i, hn in enumerate(hist_names) if i not in none_hists]
# more basic checks
if len(hc_list) == 0:
return {"name": date, "description": get_stat_description(date), "plot": ""}
return {"name": date, "description": "", "plot": ""}
assert_similar_hists(hc_list)

# make plot. note: slow!
if hc_list[0].n_dim == 1:
if all(h.size == 0 for h in hc_list):
# triviality checks, skip all histograms empty
return {"name": date, "description": get_stat_description(date), "plot": ""}
return {"name": date, "description": "", "plot": ""}

props = get_hist_props(hc_list[0])
is_num = props["is_num"]
Expand All @@ -221,7 +216,7 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):

# skip histograms with too many bins to plot (default more than 1000)
if len(bins) > max_nbins:
return {"name": date, "description": get_stat_description(date), "plot": ""}
return {"name": date, "description": "", "plot": ""}

# normalize histograms for plotting (comparison!) in case there is more than one.
if len(hc_list) >= 2:
Expand Down

0 comments on commit 6e93f25

Please sign in to comment.