feat: plotly express

The histograms, heatmaps and comparisons have been replaced with interactive Plotly graphs. Plotly.js is used to build the graphs on the go from JSON. Initial tests show that plotly reports are smaller in size compared to matplotlib and the takes way less time for report generation compared to matplotlib. use parameter 'online_report' to use plotly.js from CDN server and use report online. Else, plotly.js is embedded in the report and can be used offline too. BREAKING CHANGE: matplotlib-related config is removed
ing-bank · Jul 5, 2022 · 2c2395c · 2c2395c
1 parent a1ed9eb
commit 2c2395c
Showing 22 changed files with 531 additions and 447 deletions.
diff --git a/NOTICE b/NOTICE
@@ -21,7 +21,7 @@
 # pyyaml: https://github.com/yaml/pyyaml/blob/master/LICENSE
 # jinja2: https://github.com/noirbizarre/jinja2/blob/master/LICENSE
 # tqdm: https://github.com/tqdm/tqdm/blob/master/LICENCE
-# matplotlib: https://github.com/matplotlib/matplotlib/blob/master/LICENSE/LICENSE
+# plotly: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
 # joblib: https://github.com/joblib/joblib/blob/master/LICENSE.txt
 # pybase64: https://github.com/mayeut/pybase64/blob/master/LICENSE
 # htmlmin: https://github.com/mankyd/htmlmin/blob/master/LICENSE

diff --git a/popmon/config.py b/popmon/config.py
@@ -29,9 +29,6 @@
 # (see https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html for details)
 parallel_args = {"n_jobs": 1}
 
-# Usage the `ing_matplotlib_theme`
-themed = True
-
 
 class SectionModel(BaseModel):
     name: str
@@ -108,7 +105,7 @@ class HistogramSectionModel(SectionModel):
     top_n: int = 20
     """plot heatmap for top 'n' categories. default is 20 (optional)"""
 
-    cmap: str = "autumn_r"
+    cmap: str = "ylorrd"
     """colormap for histogram heatmaps"""
 
 
@@ -171,6 +168,9 @@ class Report(BaseModel):
     """if True, show all the generated statistics in the report (optional)
     if set to False, then smaller show_stats (see below)"""
 
+    online_report: bool = True
+    """Use a CDN to host resources, or embed them into the report."""
+
     show_stats: List[str] = [
         "distinct*",
         "filled*",
@@ -194,6 +194,9 @@ class Report(BaseModel):
     ]
     """list of statistic name patterns to show in the report. If None, show all (optional)"""
 
+    zline_color: List[str] = ["#FF0000", "#FFC800"]
+    """"Configure line colors in barplots of Comparisons and Profiles section. First and second elements as hex color code in list will replace the default red and yellow respectively"""
+
     section: Section = Section()
     """Configuration for the individual sections"""
 

diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb
@@ -467,7 +467,11 @@
     "                store_key=\"report_sections\",\n",
     "                settings=report_settings,\n",
     "            ),\n",
-    "            ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
+    "            ReportGenerator(\n",
+    "                read_key=\"report_sections\",\n",
+    "                store_key=\"html_report\",\n",
+    "                settings=report_settings,\n",
+    "            ),\n",
     "        ]\n",
     "        super().__init__(modules)\n",
     "\n",
@@ -525,7 +529,11 @@
     "                store_key=\"report_sections\",\n",
     "                settings=report_settings,\n",
     "            ),\n",
-    "            ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
+    "            ReportGenerator(\n",
+    "                read_key=\"report_sections\",\n",
+    "                store_key=\"html_report\",\n",
+    "                settings=report_settings,\n",
+    "            ),\n",
     "        ]\n",
     "        super().__init__(modules)\n",
     "\n",

diff --git a/popmon/pipeline/report_pipelines.py b/popmon/pipeline/report_pipelines.py
@@ -233,7 +233,9 @@ def __init__(
                 settings=settings,
             ),
             # generate report
-            ReportGenerator(read_key=sections_key, store_key=store_key),
+            ReportGenerator(
+                read_key=sections_key, store_key=store_key, settings=settings
+            ),
         ]
         if (
             isinstance(settings.report_filepath, (str, Path))

diff --git a/popmon/resources.py b/popmon/resources.py
@@ -19,7 +19,7 @@
 
 
 # Resources lookup file for popmon
-
+import json
 import pathlib
 
 from jinja2 import Environment, FileSystemLoader
@@ -53,6 +53,31 @@
 _TEMPLATES_ENV.filters["fmt_metric"] = lambda x: x.replace("_", " ")
 
 
+def js_list(encoder, data):
+    pairs = [js_val(encoder, v) for v in data]
+    return "[" + ", ".join(pairs) + "]"
+
+
+def js_dict(encoder, data):
+    pairs = [k + ": " + js_val(encoder, v) for k, v in data.items()]
+    return "{" + ", ".join(pairs) + "}"
+
+
+def js_val(encoder, data):
+    if isinstance(data, dict):
+        val = js_dict(encoder, data)
+    elif isinstance(data, list):
+        val = js_list(encoder, data)
+    else:
+        val = encoder.encode(data)
+    return val
+
+
+_TEMPLATES_ENV.filters["json_plot"] = lambda x: js_val(
+    json.JSONEncoder(ensure_ascii=False), x
+)
+
+
 def _resource(resource_type, name: str) -> str:
     """Return the full path filename of a resource.
 

diff --git a/popmon/visualization/__init__.py b/popmon/visualization/__init__.py
@@ -28,13 +28,6 @@
     TrafficLightSectionGenerator,
 )
 
-# set matplotlib backend to batch mode when running in shell
-# need to do this *before* matplotlib.pyplot gets imported
-from ..visualization.backend import set_matplotlib_backend
-
-set_matplotlib_backend()
-
-
 __all__ = [
     "SectionGenerator",
     "HistogramSection",

diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py
@@ -150,7 +150,11 @@ def transform(
                 plots = [e for e in plots if len(e["plot"])]
 
             features_w_metrics.append(
-                {"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
+                {
+                    "name": feature,
+                    "plot_type_layouts": {"traffic_lights": ""},
+                    "plots": sorted(plots, key=lambda plot: plot["name"]),
+                }
             )
 
         sections.append(

diff --git a/popmon/visualization/backend.py b/popmon/visualization/backend.py
diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py
@@ -140,17 +140,33 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
             ]
             plots = parallel(_plot_histograms, args)
 
+            plot_type_layouts = {}
+
             # filter out potential empty plots
             plots = [e for e in plots if len(e["plot"])]
             plots = sorted(plots, key=lambda plot: plot["name"])
+            if len(plots) > 0:
+                plot_type_layouts["histogram"] = plots[0]["layout"]
 
             # filter out potential empty heatmap plots, then prepend them to the sorted histograms
-            hplots = [h for h in heatmaps if isinstance(h, dict) and len(h["plot"])]
-
-            plots = hplots + plots
+            hplots = []
+            for h in heatmaps:
+                if isinstance(h, dict):
+                    if len(h["plot"]):
+                        hplots.append(h)
 
-            features_w_metrics.append({"name": feature, "plots": plots})
+            if len(hplots) > 0:
+                plot_type_layouts["heatmap"] = hplots[0]["layout"]
 
+            plots = hplots + plots
+            # print(plot_types,layouts)
+            features_w_metrics.append(
+                {
+                    "name": feature,
+                    "plot_type_layouts": plot_type_layouts,
+                    "plots": plots,
+                }
+            )
         sections.append(
             {
                 "section_title": self.section_name,
@@ -230,11 +246,17 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):
             hists, feature, hist_names, y_label, is_num, is_ts
         )
     elif hc_list[0].n_dim == 2:
-        plot = ""
+        plot = {}
     else:
-        plot = ""
+        plot = {}
 
-    return {"name": date, "description": "", "plot": plot}
+    return {
+        "name": date,
+        "type": "histogram",
+        "description": "",
+        "plot": plot.get("data", ""),
+        "layout": plot.get("layout", ""),
+    }
 
 
 def _plot_heatmap(
@@ -321,13 +343,15 @@ def _plot_heatmap(
         if isinstance(heatmaps, list):
             plot = [hist_lookup(heatmaps, hist_name) for hist_name in hist_names]
         elif isinstance(heatmaps, dict):
-            plot = [heatmaps["plot"]]
+            plot = [heatmaps]
 
         plots = [
             {
                 "name": hist_names_formatted[hist_name],
-                "description": descriptions[hist_name],
-                "plot": pl,
+                "type": "heatmap",
+                "description": "",
+                "plot": pl["plot"],
+                "layout": pl["layout"],
                 "full_width": True,
             }
             for pl, hist_name in zip(plot, hist_names)
@@ -364,4 +388,4 @@ def get_top_categories(entries_list, bins, top_n):
 def hist_lookup(plot, hist_name):
     for pl in plot:
         if pl["name"] == hist_name:
-            return pl["plot"]
+            return pl
diff --git a/popmon/visualization/overview_section.py b/popmon/visualization/overview_section.py
@@ -177,6 +177,7 @@ def _plot_metrics(
 
     return {
         "name": "Alert frequency per Feature",
+        "type": "alert",
         "description": "",
         "plot": plot,
         "full_width": True,

diff --git a/popmon/visualization/report_generator.py b/popmon/visualization/report_generator.py
@@ -21,6 +21,7 @@
 import htmlmin
 
 from ..base import Module
+from ..config import Report
 from ..resources import templates_env
 from ..version import version
 
@@ -33,15 +34,17 @@ class ReportGenerator(Module):
     _input_keys = ("read_key",)
     _output_keys = ("store_key",)
 
-    def __init__(self, read_key, store_key):
+    def __init__(self, read_key, store_key, settings: Report):
         """Initialize an instance of ReportGenerator.
 
         :param str read_key: key of input sections data to read from the datastore
         :param str store_key: key for storing the html report code in the datastore
+        :para bool online_report: if false (default), the plotly.js code is included in the html report, else the report takes js code from cdn server which requires internet connection
         """
         super().__init__()
         self.read_key = read_key
         self.store_key = store_key
+        self.online_report = settings.online_report
 
     def get_description(self):
         return "HTML Report"
@@ -60,5 +63,6 @@ def transform(self, sections: list) -> str:
                 filename="core.html",
                 generator=f"popmon {version}",
                 sections=sections_html,
+                online_report=self.online_report,
             )
         )
diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py
@@ -117,6 +117,7 @@ def __init__(
         self.last_n = settings.last_n
         self.skip_first_n = settings.skip_first_n
         self.skip_last_n = settings.skip_last_n
+        self.zline_color = settings.zline_color
         self.prefix = prefix
         self.suffices = suffices
         self.ignore_stat_endswith = ignore_stat_endswith or []
@@ -180,6 +181,7 @@ def transform(
                     self.skip_first_n,
                     self.skip_last_n,
                     self.skip_empty_plots,
+                    self.zline_color,
                 )
                 for metric in metrics
             ]
@@ -188,8 +190,21 @@ def transform(
             # filter out potential empty plots (from skip empty plots)
             if self.skip_empty_plots:
                 plots = [e for e in plots if len(e["plot"])]
+
+            layouts = ""
+            if len(plots) > 0:
+                layouts = plots[0]["layout"]
+                if "shapes" in layouts:
+                    del layouts["shapes"]
+                if "range" in layouts["yaxis"]:
+                    del layouts["yaxis"]["range"]
+
             features_w_metrics.append(
-                {"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
+                {
+                    "name": feature,
+                    "plot_type_layouts": {"barplot": layouts},
+                    "plots": sorted(plots, key=lambda plot: plot["name"]),
+                }
             )
 
         sections.append(
@@ -215,6 +230,7 @@ def _plot_metric(
     skip_first_n,
     skip_last_n,
     skip_empty,
+    zline_color,
 ):
     """Split off plot histogram generation to allow for parallel processing"""
     # pick up static traffic light boundaries
@@ -241,6 +257,28 @@ def _plot_metric(
         ylim=True,
         bounds=bounds,
         skip_empty=skip_empty,
+        zline_color=zline_color,
     )
 
-    return {"name": metric, "description": get_stat_description(metric), "plot": plot}
+    if not isinstance(plot, dict):
+        return {
+            "name": metric,
+            "type": "barplot",
+            "description": get_stat_description(metric),
+            "plot": plot,
+            "layout": plot,
+        }
+
+    return {
+        "name": metric,
+        "type": "barplot",
+        "description": get_stat_description(metric),
+        "plot": plot["data"],
+        "shapes": plot["layout"]["shapes"] if "shapes" in plot["layout"] else "",
+        "yaxis_range": [
+            "null" if r is None else r for r in plot["layout"]["yaxis"]["range"]
+        ]
+        if "range" in plot["layout"]["yaxis"]
+        else "",
+        "layout": plot["layout"],
+    }
diff --git a/popmon/visualization/templates/assets/css/custom-style.css b/popmon/visualization/templates/assets/css/custom-style.css
@@ -10,10 +10,6 @@ section {
   padding: 150px 0;
 }
 
-.card-footer {
-  text-align: center;
-}
-
 section {
   padding: 70px 0
 }
@@ -29,12 +25,18 @@ a.nav-link {
 }
 
 .card-body {
-  padding-bottom: 0 !important
+  padding-bottom: 0 !important;
+  text-align: center;
 }
 
 p.card-text {
   font-size: 11px;
   font-weight: 300;
+  min-height: 33px;
+}
+
+.card-footer {
+  text-align: center;
 }
 
 div.section-description {
@@ -55,6 +57,29 @@ section h2 {
   padding-bottom: 7px;
 }
 
+.skeleton-loader {
+  width: 80%;
+  height: 285px;
+  margin: 0 auto;
+  margin-top: 25px;
+  margin-bottom: 140px;
+  background-size: 50px 350px;
+  background-position: 0 0;
+  background-repeat: no-repeat;
+  border-radius: 3px;
+  animation: skeleton-loading 1s linear infinite alternate;
+  opacity: 0.7
+}
+
+@keyframes skeleton-loading {
+  0% {
+    background-color: hsl(200, 20%, 70%);
+  }
+  100% {
+    background-color: hsl(200, 20%, 95%);
+  }
+}
+
 /* overview tables */
 table.overview{
     margin: 25px;
@@ -105,4 +130,4 @@ table.overview tfoot td span{
 .tl-container{
     display: flex;
     width: 100%;
-}
+}
diff --git a/popmon/visualization/templates/assets/js/custom-script.js b/popmon/visualization/templates/assets/js/custom-script.js
@@ -71,3 +71,22 @@ $("#toggleDescriptions").change(function() {
         $("p.card-text").hide();
     }
 });
+
+var plotly_config = {scrollZoom: true, displaylogo: false, modeBarButtonsToRemove: ['lasso2d']} ;
+
+
+const deepCopy = (inObject) => {
+      let outObject, value, key
+      if (typeof inObject !== "object" || inObject === null) {
+        return inObject
+      }
+
+      outObject = Array.isArray(inObject) ? [] : {}
+
+      for (key in inObject) {
+        value = inObject[key]
+        outObject[key] = deepCopy(value)
+      }
+
+      return outObject
+}
diff --git a/popmon/visualization/templates/assets/js/plotly.js b/popmon/visualization/templates/assets/js/plotly.js
diff --git a/popmon/visualization/templates/card.html b/popmon/visualization/templates/card.html
@@ -1,16 +1,50 @@
-<div class="col-md-{% if 'full_width' in metric %}12{% else %}6{% endif %} mb-5">
-    <a name="{% if feature%}{{ feature.name }}-{%endif%}{{ metric.name }}"></a>
+{%- with card_id = feature.name + '-' + metric.name if feature else metric.name -%}
+<div class="col-md-{% if 'full_width' in metric %}12{% else %}6{% endif %} mb-5" >
+    <a name="{{ card_id }}"></a>
     <div class="card shadow-sm">
-        <div class="card-body" style="text-align: center">
+        <div class="card-body" id="{{ card_id }}-card">
             <h4 class="card-title">{{metric.name | fmt_metric}}</h4>
-            {% if metric.description|length %}
-                <p class="card-text">{{metric.description}}</p>
-            {% endif %}
+                {%- if metric.description | length -%}
+                <p class="card-text">
+                    {{metric.description}}
+                </p>
+                {%- endif -%}
         </div>
-        {% if 'table' in metric.plot %}
+        <div id="{{ card_id }}"> </div>
+        {%- if metric.type in ['traffic_light', 'alert'] -%}
             {{ metric.plot }}
-        {% else %}
-        <img class="card-img-top" src="data:image/png;base64,{{metric.plot}}" alt="" />
-        {% endif %}
+        {%- else -%}
+            <div class="skeleton-loader" id="{% if feature%}{{ feature.name }}-{%endif%}{{ metric.name }}-loading"></div>
+            <script>
+            var feature{{ section_index }}{{ curr }}{{ plt }}_rendered = false ;
+            function render_{{ section_index }}{{ curr }}{{ plt }}(){
+                var layout = deepCopy(feature{{ section_index }}{{ curr }}_layout["{{ metric.type }}"]);
+                {%- if metric.shapes | length -%}
+                    layout["shapes"] = {{ metric.shapes | json_plot}} ;
+                {%- endif -%}
+                {%- if metric.yaxis_range | length -%}
+                    layout["yaxis"]["range"] = {{ metric.yaxis_range | json_plot }} ;
+                {%- endif -%}
+                Plotly.newPlot(document.getElementById("{{ card_id }}"), {{ metric.plot | json_plot }}, layout, plotly_config).then(function() { document.getElementById("{{ card_id }}-loading").remove(); feature{{ section_index }}{{ curr }}{{ plt }}_rendered = true ;});
+            }
+
+            var io = new IntersectionObserver(function(entries) {
+                var entry = entries[0];
+                if(entry.isIntersecting === true && feature{{ section_index }}{{ curr }}{{ plt }}_rendered === false){
+                    if(document.readyState === "complete"){
+                        render_{{ section_index }}{{ curr }}{{ plt }}();
+                        io.unobserve(entry.target);
+                    }else{
+                        document.addEventListener('DOMContentLoaded', function() {
+                            render_{{ section_index }}{{ curr }}{{ plt }}();
+                            io.unobserve(entry.target);
+                        });
+                    }
+                }
+            }, { threshold: [0] });
+            io.observe(document.getElementById("{{ card_id }}-card").parentNode.parentNode);
+            </script>
+        {%- endif -%}
     </div>
-</div>
+</div>
+{%- endwith -%}
diff --git a/popmon/visualization/templates/footer.html b/popmon/visualization/templates/footer.html
@@ -9,10 +9,16 @@
 <!-- Bootstrap core JavaScript -->
 <!-- Plugin JavaScript -->
 <!-- Custom JavaScript for this theme -->
+{% if online_report %}
+    <script src="https://cdn.plot.ly/plotly-2.12.1.min.js"></script>
+{% endif %}
 <script>
+    {% if not online_report %}
+        {% include 'assets/js/plotly.js' %}
+    {% endif %}
     {% include 'assets/js/jquery.min.js' %}
     {% include 'assets/js/bootstrap.bundle.min.js' %}
     {% include 'assets/js/jquery.easing.min.js' %}
     {% include 'assets/js/scrolling-nav.js' %}
     {% include 'assets/js/custom-script.js' %}
-</script>
+</script>
diff --git a/popmon/visualization/templates/section.html b/popmon/visualization/templates/section.html
@@ -1,4 +1,4 @@
-<section data-section-title="{{ section_title }}" class="{{'bg-light' if section_index % 2 == 0 }}">
+<section data-section-title="{{ section_title }}" class="{{'bg-light' if section_index % 2 == 0 }}" id="plt">
   <div class="container">
 
     <div class="d-flex justify-content-between align-items-center">
@@ -22,8 +22,18 @@ <h2>{{ section_title }}</h2>
     {% endif %}
     {% if features | length %}
       {% for feature in features %}
+        <script> 
+          {% set curr = loop.index  %}
+          var feature{{ section_index }}{{ curr }}_layout = {}
+        </script>
+        {% for plot_type, layout in feature.plot_type_layouts.items() %}
+          <script> 
+            feature{{ section_index }}{{ curr }}_layout["{{ plot_type }}"] = JSON.parse('{{ layout | tojson }}');
+          </script>
+        {% endfor %}
         <div class="row section_feature" data-section-feature="{{ feature.name }}">
             {% for metric in feature.plots %}
+              {% set plt = loop.index  %}
               {% with metric=metric %}
                 {% include 'card.html' %}
               {% endwith %}

diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py
@@ -147,7 +147,11 @@ def transform(
             if self.skip_empty_plots:
                 plots = [e for e in plots if len(e["plot"])]
             features_w_metrics.append(
-                {"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
+                {
+                    "name": feature,
+                    "plot_type_layouts": {"traffic_lights": ""},
+                    "plots": sorted(plots, key=lambda plot: plot["name"]),
+                }
             )
 
         sections.append(
@@ -202,4 +206,10 @@ def _plot_metrics(
     else:
         plot = ""
 
-    return {"name": "Overview", "description": "", "plot": plot, "full_width": True}
+    return {
+        "name": "Overview",
+        "type": "traffic_light",
+        "description": "",
+        "plot": plot,
+        "full_width": True,
+    }
diff --git a/popmon/visualization/utils.py b/popmon/visualization/utils.py
diff --git a/requirements.txt b/requirements.txt
@@ -5,10 +5,9 @@ histogrammar>=1.0.30
 phik
 jinja2
 tqdm
-matplotlib>=2.2.3
+plotly>=5.8.0
 joblib>=0.14.0
 pybase64>=1.0.1
 htmlmin
-ing_theme_matplotlib>=0.1.8
 pydantic
 typing_extensions
diff --git a/tests/popmon/visualization/test_report_generator.py b/tests/popmon/visualization/test_report_generator.py
@@ -40,7 +40,11 @@ def test_report_generator():
                 section_name="Comparisons",
                 settings=settings.report,
             ),
-            ReportGenerator(read_key="all_sections", store_key="final_report"),
+            ReportGenerator(
+                read_key="all_sections",
+                store_key="final_report",
+                settings=settings.report,
+            ),
         ]
     )
     datastore = pipeline.transform(datastore={})