Skip to content

Commit

Permalink
feat: plotly express
Browse files Browse the repository at this point in the history
The histograms, heatmaps and comparisons have been replaced with interactive Plotly graphs.
Plotly.js is used to build the graphs on the go from JSON. Initial tests show that plotly
reports are smaller in size compared to matplotlib and the takes way less time for report
generation compared to matplotlib. use parameter 'online_report' to use plotly.js from CDN
 server and use report online. Else, plotly.js is embedded in the report and can be used
 offline too.

BREAKING CHANGE: matplotlib-related config is removed
pradyot-09 committed Jul 5, 2022
1 parent a1ed9eb commit 2c2395c
Showing 22 changed files with 531 additions and 447 deletions.
2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@
# pyyaml: https://github.com/yaml/pyyaml/blob/master/LICENSE
# jinja2: https://github.com/noirbizarre/jinja2/blob/master/LICENSE
# tqdm: https://github.com/tqdm/tqdm/blob/master/LICENCE
# matplotlib: https://github.com/matplotlib/matplotlib/blob/master/LICENSE/LICENSE
# plotly: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
# joblib: https://github.com/joblib/joblib/blob/master/LICENSE.txt
# pybase64: https://github.com/mayeut/pybase64/blob/master/LICENSE
# htmlmin: https://github.com/mankyd/htmlmin/blob/master/LICENSE
11 changes: 7 additions & 4 deletions popmon/config.py
Original file line number Diff line number Diff line change
@@ -29,9 +29,6 @@
# (see https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html for details)
parallel_args = {"n_jobs": 1}

# Usage the `ing_matplotlib_theme`
themed = True


class SectionModel(BaseModel):
name: str
@@ -108,7 +105,7 @@ class HistogramSectionModel(SectionModel):
top_n: int = 20
"""plot heatmap for top 'n' categories. default is 20 (optional)"""

cmap: str = "autumn_r"
cmap: str = "ylorrd"
"""colormap for histogram heatmaps"""


@@ -171,6 +168,9 @@ class Report(BaseModel):
"""if True, show all the generated statistics in the report (optional)
if set to False, then smaller show_stats (see below)"""

online_report: bool = True
"""Use a CDN to host resources, or embed them into the report."""

show_stats: List[str] = [
"distinct*",
"filled*",
@@ -194,6 +194,9 @@ class Report(BaseModel):
]
"""list of statistic name patterns to show in the report. If None, show all (optional)"""

zline_color: List[str] = ["#FF0000", "#FFC800"]
""""Configure line colors in barplots of Comparisons and Profiles section. First and second elements as hex color code in list will replace the default red and yellow respectively"""

section: Section = Section()
"""Configuration for the individual sections"""

12 changes: 10 additions & 2 deletions popmon/notebooks/popmon_tutorial_advanced.ipynb
Original file line number Diff line number Diff line change
@@ -467,7 +467,11 @@
" store_key=\"report_sections\",\n",
" settings=report_settings,\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
" ReportGenerator(\n",
" read_key=\"report_sections\",\n",
" store_key=\"html_report\",\n",
" settings=report_settings,\n",
" ),\n",
" ]\n",
" super().__init__(modules)\n",
"\n",
@@ -525,7 +529,11 @@
" store_key=\"report_sections\",\n",
" settings=report_settings,\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
" ReportGenerator(\n",
" read_key=\"report_sections\",\n",
" store_key=\"html_report\",\n",
" settings=report_settings,\n",
" ),\n",
" ]\n",
" super().__init__(modules)\n",
"\n",
4 changes: 3 additions & 1 deletion popmon/pipeline/report_pipelines.py
Original file line number Diff line number Diff line change
@@ -233,7 +233,9 @@ def __init__(
settings=settings,
),
# generate report
ReportGenerator(read_key=sections_key, store_key=store_key),
ReportGenerator(
read_key=sections_key, store_key=store_key, settings=settings
),
]
if (
isinstance(settings.report_filepath, (str, Path))
27 changes: 26 additions & 1 deletion popmon/resources.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@


# Resources lookup file for popmon

import json
import pathlib

from jinja2 import Environment, FileSystemLoader
@@ -53,6 +53,31 @@
_TEMPLATES_ENV.filters["fmt_metric"] = lambda x: x.replace("_", " ")


def js_list(encoder, data):
pairs = [js_val(encoder, v) for v in data]
return "[" + ", ".join(pairs) + "]"


def js_dict(encoder, data):
pairs = [k + ": " + js_val(encoder, v) for k, v in data.items()]
return "{" + ", ".join(pairs) + "}"


def js_val(encoder, data):
if isinstance(data, dict):
val = js_dict(encoder, data)
elif isinstance(data, list):
val = js_list(encoder, data)
else:
val = encoder.encode(data)
return val


_TEMPLATES_ENV.filters["json_plot"] = lambda x: js_val(
json.JSONEncoder(ensure_ascii=False), x
)


def _resource(resource_type, name: str) -> str:
"""Return the full path filename of a resource.
7 changes: 0 additions & 7 deletions popmon/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -28,13 +28,6 @@
TrafficLightSectionGenerator,
)

# set matplotlib backend to batch mode when running in shell
# need to do this *before* matplotlib.pyplot gets imported
from ..visualization.backend import set_matplotlib_backend

set_matplotlib_backend()


__all__ = [
"SectionGenerator",
"HistogramSection",
6 changes: 5 additions & 1 deletion popmon/visualization/alert_section_generator.py
Original file line number Diff line number Diff line change
@@ -150,7 +150,11 @@ def transform(
plots = [e for e in plots if len(e["plot"])]

features_w_metrics.append(
{"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
{
"name": feature,
"plot_type_layouts": {"traffic_lights": ""},
"plots": sorted(plots, key=lambda plot: plot["name"]),
}
)

sections.append(
152 changes: 0 additions & 152 deletions popmon/visualization/backend.py

This file was deleted.

46 changes: 35 additions & 11 deletions popmon/visualization/histogram_section.py
Original file line number Diff line number Diff line change
@@ -140,17 +140,33 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
]
plots = parallel(_plot_histograms, args)

plot_type_layouts = {}

# filter out potential empty plots
plots = [e for e in plots if len(e["plot"])]
plots = sorted(plots, key=lambda plot: plot["name"])
if len(plots) > 0:
plot_type_layouts["histogram"] = plots[0]["layout"]

# filter out potential empty heatmap plots, then prepend them to the sorted histograms
hplots = [h for h in heatmaps if isinstance(h, dict) and len(h["plot"])]

plots = hplots + plots
hplots = []
for h in heatmaps:
if isinstance(h, dict):
if len(h["plot"]):
hplots.append(h)

features_w_metrics.append({"name": feature, "plots": plots})
if len(hplots) > 0:
plot_type_layouts["heatmap"] = hplots[0]["layout"]

plots = hplots + plots
# print(plot_types,layouts)
features_w_metrics.append(
{
"name": feature,
"plot_type_layouts": plot_type_layouts,
"plots": plots,
}
)
sections.append(
{
"section_title": self.section_name,
@@ -230,11 +246,17 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):
hists, feature, hist_names, y_label, is_num, is_ts
)
elif hc_list[0].n_dim == 2:
plot = ""
plot = {}
else:
plot = ""
plot = {}

return {"name": date, "description": "", "plot": plot}
return {
"name": date,
"type": "histogram",
"description": "",
"plot": plot.get("data", ""),
"layout": plot.get("layout", ""),
}


def _plot_heatmap(
@@ -321,13 +343,15 @@ def _plot_heatmap(
if isinstance(heatmaps, list):
plot = [hist_lookup(heatmaps, hist_name) for hist_name in hist_names]
elif isinstance(heatmaps, dict):
plot = [heatmaps["plot"]]
plot = [heatmaps]

plots = [
{
"name": hist_names_formatted[hist_name],
"description": descriptions[hist_name],
"plot": pl,
"type": "heatmap",
"description": "",
"plot": pl["plot"],
"layout": pl["layout"],
"full_width": True,
}
for pl, hist_name in zip(plot, hist_names)
@@ -364,4 +388,4 @@ def get_top_categories(entries_list, bins, top_n):
def hist_lookup(plot, hist_name):
for pl in plot:
if pl["name"] == hist_name:
return pl["plot"]
return pl
1 change: 1 addition & 0 deletions popmon/visualization/overview_section.py
Original file line number Diff line number Diff line change
@@ -177,6 +177,7 @@ def _plot_metrics(

return {
"name": "Alert frequency per Feature",
"type": "alert",
"description": "",
"plot": plot,
"full_width": True,
6 changes: 5 additions & 1 deletion popmon/visualization/report_generator.py
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@
import htmlmin

from ..base import Module
from ..config import Report
from ..resources import templates_env
from ..version import version

@@ -33,15 +34,17 @@ class ReportGenerator(Module):
_input_keys = ("read_key",)
_output_keys = ("store_key",)

def __init__(self, read_key, store_key):
def __init__(self, read_key, store_key, settings: Report):
"""Initialize an instance of ReportGenerator.
:param str read_key: key of input sections data to read from the datastore
:param str store_key: key for storing the html report code in the datastore
:para bool online_report: if false (default), the plotly.js code is included in the html report, else the report takes js code from cdn server which requires internet connection
"""
super().__init__()
self.read_key = read_key
self.store_key = store_key
self.online_report = settings.online_report

def get_description(self):
return "HTML Report"
@@ -60,5 +63,6 @@ def transform(self, sections: list) -> str:
filename="core.html",
generator=f"popmon {version}",
sections=sections_html,
online_report=self.online_report,
)
)
42 changes: 40 additions & 2 deletions popmon/visualization/section_generator.py
Original file line number Diff line number Diff line change
@@ -117,6 +117,7 @@ def __init__(
self.last_n = settings.last_n
self.skip_first_n = settings.skip_first_n
self.skip_last_n = settings.skip_last_n
self.zline_color = settings.zline_color
self.prefix = prefix
self.suffices = suffices
self.ignore_stat_endswith = ignore_stat_endswith or []
@@ -180,6 +181,7 @@ def transform(
self.skip_first_n,
self.skip_last_n,
self.skip_empty_plots,
self.zline_color,
)
for metric in metrics
]
@@ -188,8 +190,21 @@ def transform(
# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]

layouts = ""
if len(plots) > 0:
layouts = plots[0]["layout"]
if "shapes" in layouts:
del layouts["shapes"]
if "range" in layouts["yaxis"]:
del layouts["yaxis"]["range"]

features_w_metrics.append(
{"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
{
"name": feature,
"plot_type_layouts": {"barplot": layouts},
"plots": sorted(plots, key=lambda plot: plot["name"]),
}
)

sections.append(
@@ -215,6 +230,7 @@ def _plot_metric(
skip_first_n,
skip_last_n,
skip_empty,
zline_color,
):
"""Split off plot histogram generation to allow for parallel processing"""
# pick up static traffic light boundaries
@@ -241,6 +257,28 @@ def _plot_metric(
ylim=True,
bounds=bounds,
skip_empty=skip_empty,
zline_color=zline_color,
)

return {"name": metric, "description": get_stat_description(metric), "plot": plot}
if not isinstance(plot, dict):
return {
"name": metric,
"type": "barplot",
"description": get_stat_description(metric),
"plot": plot,
"layout": plot,
}

return {
"name": metric,
"type": "barplot",
"description": get_stat_description(metric),
"plot": plot["data"],
"shapes": plot["layout"]["shapes"] if "shapes" in plot["layout"] else "",
"yaxis_range": [
"null" if r is None else r for r in plot["layout"]["yaxis"]["range"]
]
if "range" in plot["layout"]["yaxis"]
else "",
"layout": plot["layout"],
}
37 changes: 31 additions & 6 deletions popmon/visualization/templates/assets/css/custom-style.css
Original file line number Diff line number Diff line change
@@ -10,10 +10,6 @@ section {
padding: 150px 0;
}

.card-footer {
text-align: center;
}

section {
padding: 70px 0
}
@@ -29,12 +25,18 @@ a.nav-link {
}

.card-body {
padding-bottom: 0 !important
padding-bottom: 0 !important;
text-align: center;
}

p.card-text {
font-size: 11px;
font-weight: 300;
min-height: 33px;
}

.card-footer {
text-align: center;
}

div.section-description {
@@ -55,6 +57,29 @@ section h2 {
padding-bottom: 7px;
}

.skeleton-loader {
width: 80%;
height: 285px;
margin: 0 auto;
margin-top: 25px;
margin-bottom: 140px;
background-size: 50px 350px;
background-position: 0 0;
background-repeat: no-repeat;
border-radius: 3px;
animation: skeleton-loading 1s linear infinite alternate;
opacity: 0.7
}

@keyframes skeleton-loading {
0% {
background-color: hsl(200, 20%, 70%);
}
100% {
background-color: hsl(200, 20%, 95%);
}
}

/* overview tables */
table.overview{
margin: 25px;
@@ -105,4 +130,4 @@ table.overview tfoot td span{
.tl-container{
display: flex;
width: 100%;
}
}
19 changes: 19 additions & 0 deletions popmon/visualization/templates/assets/js/custom-script.js
Original file line number Diff line number Diff line change
@@ -71,3 +71,22 @@ $("#toggleDescriptions").change(function() {
$("p.card-text").hide();
}
});

var plotly_config = {scrollZoom: true, displaylogo: false, modeBarButtonsToRemove: ['lasso2d']} ;


const deepCopy = (inObject) => {
let outObject, value, key
if (typeof inObject !== "object" || inObject === null) {
return inObject
}

outObject = Array.isArray(inObject) ? [] : {}

for (key in inObject) {
value = inObject[key]
outObject[key] = deepCopy(value)
}

return outObject
}
65 changes: 65 additions & 0 deletions popmon/visualization/templates/assets/js/plotly.js

Large diffs are not rendered by default.

56 changes: 45 additions & 11 deletions popmon/visualization/templates/card.html
Original file line number Diff line number Diff line change
@@ -1,16 +1,50 @@
<div class="col-md-{% if 'full_width' in metric %}12{% else %}6{% endif %} mb-5">
<a name="{% if feature%}{{ feature.name }}-{%endif%}{{ metric.name }}"></a>
{%- with card_id = feature.name + '-' + metric.name if feature else metric.name -%}
<div class="col-md-{% if 'full_width' in metric %}12{% else %}6{% endif %} mb-5" >
<a name="{{ card_id }}"></a>
<div class="card shadow-sm">
<div class="card-body" style="text-align: center">
<div class="card-body" id="{{ card_id }}-card">
<h4 class="card-title">{{metric.name | fmt_metric}}</h4>
{% if metric.description|length %}
<p class="card-text">{{metric.description}}</p>
{% endif %}
{%- if metric.description | length -%}
<p class="card-text">
{{metric.description}}
</p>
{%- endif -%}
</div>
{% if 'table' in metric.plot %}
<div id="{{ card_id }}"> </div>
{%- if metric.type in ['traffic_light', 'alert'] -%}
{{ metric.plot }}
{% else %}
<img class="card-img-top" src="data:image/png;base64,{{metric.plot}}" alt="" />
{% endif %}
{%- else -%}
<div class="skeleton-loader" id="{% if feature%}{{ feature.name }}-{%endif%}{{ metric.name }}-loading"></div>
<script>
var feature{{ section_index }}{{ curr }}{{ plt }}_rendered = false ;
function render_{{ section_index }}{{ curr }}{{ plt }}(){
var layout = deepCopy(feature{{ section_index }}{{ curr }}_layout["{{ metric.type }}"]);
{%- if metric.shapes | length -%}
layout["shapes"] = {{ metric.shapes | json_plot}} ;
{%- endif -%}
{%- if metric.yaxis_range | length -%}
layout["yaxis"]["range"] = {{ metric.yaxis_range | json_plot }} ;
{%- endif -%}
Plotly.newPlot(document.getElementById("{{ card_id }}"), {{ metric.plot | json_plot }}, layout, plotly_config).then(function() { document.getElementById("{{ card_id }}-loading").remove(); feature{{ section_index }}{{ curr }}{{ plt }}_rendered = true ;});
}

var io = new IntersectionObserver(function(entries) {
var entry = entries[0];
if(entry.isIntersecting === true && feature{{ section_index }}{{ curr }}{{ plt }}_rendered === false){
if(document.readyState === "complete"){
render_{{ section_index }}{{ curr }}{{ plt }}();
io.unobserve(entry.target);
}else{
document.addEventListener('DOMContentLoaded', function() {
render_{{ section_index }}{{ curr }}{{ plt }}();
io.unobserve(entry.target);
});
}
}
}, { threshold: [0] });
io.observe(document.getElementById("{{ card_id }}-card").parentNode.parentNode);
</script>
{%- endif -%}
</div>
</div>
</div>
{%- endwith -%}
8 changes: 7 additions & 1 deletion popmon/visualization/templates/footer.html
Original file line number Diff line number Diff line change
@@ -9,10 +9,16 @@
<!-- Bootstrap core JavaScript -->
<!-- Plugin JavaScript -->
<!-- Custom JavaScript for this theme -->
{% if online_report %}
<script src="https://cdn.plot.ly/plotly-2.12.1.min.js"></script>
{% endif %}
<script>
{% if not online_report %}
{% include 'assets/js/plotly.js' %}
{% endif %}
{% include 'assets/js/jquery.min.js' %}
{% include 'assets/js/bootstrap.bundle.min.js' %}
{% include 'assets/js/jquery.easing.min.js' %}
{% include 'assets/js/scrolling-nav.js' %}
{% include 'assets/js/custom-script.js' %}
</script>
</script>
12 changes: 11 additions & 1 deletion popmon/visualization/templates/section.html
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<section data-section-title="{{ section_title }}" class="{{'bg-light' if section_index % 2 == 0 }}">
<section data-section-title="{{ section_title }}" class="{{'bg-light' if section_index % 2 == 0 }}" id="plt">
<div class="container">

<div class="d-flex justify-content-between align-items-center">
@@ -22,8 +22,18 @@ <h2>{{ section_title }}</h2>
{% endif %}
{% if features | length %}
{% for feature in features %}
<script>
{% set curr = loop.index %}
var feature{{ section_index }}{{ curr }}_layout = {}
</script>
{% for plot_type, layout in feature.plot_type_layouts.items() %}
<script>
feature{{ section_index }}{{ curr }}_layout["{{ plot_type }}"] = JSON.parse('{{ layout | tojson }}');
</script>
{% endfor %}
<div class="row section_feature" data-section-feature="{{ feature.name }}">
{% for metric in feature.plots %}
{% set plt = loop.index %}
{% with metric=metric %}
{% include 'card.html' %}
{% endwith %}
14 changes: 12 additions & 2 deletions popmon/visualization/traffic_light_section_generator.py
Original file line number Diff line number Diff line change
@@ -147,7 +147,11 @@ def transform(
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]
features_w_metrics.append(
{"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
{
"name": feature,
"plot_type_layouts": {"traffic_lights": ""},
"plots": sorted(plots, key=lambda plot: plot["name"]),
}
)

sections.append(
@@ -202,4 +206,10 @@ def _plot_metrics(
else:
plot = ""

return {"name": "Overview", "description": "", "plot": plot, "full_width": True}
return {
"name": "Overview",
"type": "traffic_light",
"description": "",
"plot": plot,
"full_width": True,
}
442 changes: 202 additions & 240 deletions popmon/visualization/utils.py

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -5,10 +5,9 @@ histogrammar>=1.0.30
phik
jinja2
tqdm
matplotlib>=2.2.3
plotly>=5.8.0
joblib>=0.14.0
pybase64>=1.0.1
htmlmin
ing_theme_matplotlib>=0.1.8
pydantic
typing_extensions
6 changes: 5 additions & 1 deletion tests/popmon/visualization/test_report_generator.py
Original file line number Diff line number Diff line change
@@ -40,7 +40,11 @@ def test_report_generator():
section_name="Comparisons",
settings=settings.report,
),
ReportGenerator(read_key="all_sections", store_key="final_report"),
ReportGenerator(
read_key="all_sections",
store_key="final_report",
settings=settings.report,
),
]
)
datastore = pipeline.transform(datastore={})

0 comments on commit 2c2395c

Please sign in to comment.