Skip to content

Commit

Permalink
ci: enable qa on jupyter notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
sbrugman committed Jul 4, 2022
1 parent 41ea553 commit 77fab29
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 39 deletions.
10 changes: 9 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,12 @@ repos:
- id: docautogenerate
name: docsautogenerate
entry: bash -c 'cd ./docs/ && bash autogenerate.sh'
language: system
language: system
- repo: https://github.com/nbQA-dev/nbQA
rev: 1.3.1
hooks:
- id: nbqa-black
- id: nbqa-pyupgrade
args: ['--py36-plus']
- id: nbqa-isort
args: ['--profile=black']
62 changes: 40 additions & 22 deletions popmon/notebooks/popmon_tutorial_advanced.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"source": [
"# install popmon (if not installed yet)\n",
"import sys\n",
"\n",
"!\"{sys.executable}\" -m pip install -q popmon"
]
},
Expand All @@ -47,7 +48,7 @@
"\n",
"import popmon\n",
"from popmon import resources\n",
"from popmon.config import Settings, Report"
"from popmon.config import Report, Settings"
]
},
{
Expand All @@ -64,7 +65,9 @@
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"])"
"df = pd.read_csv(\n",
" resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n",
")"
]
},
{
Expand Down Expand Up @@ -126,7 +129,7 @@
"outputs": [],
"source": [
"# reuse the previous settings\n",
"settings.monitoring.pull_rules={\"*_pull\": [10, 7, -7, -10]}\n",
"settings.monitoring.pull_rules = {\"*_pull\": [10, 7, -7, -10]}\n",
"\n",
"df.pm_stability_report(\n",
" time_axis=\"DATE\",\n",
Expand Down Expand Up @@ -157,8 +160,8 @@
"source": [
"# download histogrammar jar files if not already installed, used for histogramming of spark dataframe\n",
"try:\n",
" from pyspark.sql import SparkSession\n",
" from pyspark import __version__ as pyspark_version\n",
" from pyspark.sql import SparkSession\n",
"\n",
" pyspark_installed = True\n",
"except ImportError:\n",
Expand All @@ -173,12 +176,12 @@
"outputs": [],
"source": [
"if pyspark_installed:\n",
" scala = '2.12' if int(pyspark_version[0]) >= 3 else '2.11'\n",
" hist_jar = f'io.github.histogrammar:histogrammar_{scala}:1.0.20'\n",
" hist_spark_jar = f'io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.20'\n",
" \n",
" scala = \"2.12\" if int(pyspark_version[0]) >= 3 else \"2.11\"\n",
" hist_jar = f\"io.github.histogrammar:histogrammar_{scala}:1.0.20\"\n",
" hist_spark_jar = f\"io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.20\"\n",
"\n",
" spark = SparkSession.builder.config(\n",
" \"spark.jars.packages\", f'{hist_spark_jar},{hist_jar}'\n",
" \"spark.jars.packages\", f\"{hist_spark_jar},{hist_jar}\"\n",
" ).getOrCreate()\n",
"\n",
" sdf = spark.createDataFrame(df)\n",
Expand Down Expand Up @@ -441,25 +444,31 @@
"metadata": {},
"outputs": [],
"source": [
"from popmon.hist.hist_splitter import HistSplitter\n",
"from popmon.analysis.profiling import HistProfiler\n",
"from popmon.pipeline.report import StabilityReport\n",
"from popmon.base import Pipeline\n",
"from popmon.visualization import SectionGenerator, ReportGenerator\n",
"\n",
"from popmon.hist.hist_splitter import HistSplitter\n",
"from popmon.pipeline.report import StabilityReport\n",
"from popmon.visualization import ReportGenerator, SectionGenerator\n",
"\n",
"datastore = {\n",
" \"hists\": df.pm_make_histograms(time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\")\n",
" \"hists\": df.pm_make_histograms(\n",
" time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
" )\n",
"}\n",
"\n",
"\n",
"class CustomPipeline(Pipeline):\n",
" def __init__(self):\n",
" modules = [\n",
" HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
" HistSplitter(\n",
" read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"\n",
" ),\n",
" HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
" SectionGenerator(\n",
" section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\", settings=report_settings\n",
" section_name=\"Profiles\",\n",
" read_key=\"profiles\",\n",
" store_key=\"report_sections\",\n",
" settings=report_settings,\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
" ]\n",
Expand Down Expand Up @@ -488,33 +497,42 @@
"source": [
"from popmon.analysis.comparison.hist_comparer import ReferenceHistComparer\n",
"\n",
"\n",
"datastore = {\n",
" \"hists\": df.pm_make_histograms(time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\")\n",
" \"hists\": df.pm_make_histograms(\n",
" time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
" )\n",
"}\n",
"\n",
"\n",
"class CustomComparisonsPipeline(Pipeline):\n",
" def __init__(self):\n",
" modules = [\n",
" HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
" HistSplitter(\n",
" read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"\n",
" ),\n",
" HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
" ReferenceHistComparer(\n",
" reference_key=\"split_hists\",\n",
" assign_to_key=\"split_hists\",\n",
" store_key=\"comparisons\",\n",
" ),\n",
" SectionGenerator(\n",
" section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\", settings=report_settings\n",
" section_name=\"Profiles\",\n",
" read_key=\"profiles\",\n",
" store_key=\"report_sections\",\n",
" settings=report_settings,\n",
" ),\n",
" SectionGenerator(\n",
" section_name=\"Comparisons\", read_key=\"comparisons\", store_key=\"report_sections\", settings=report_settings\n",
" section_name=\"Comparisons\",\n",
" read_key=\"comparisons\",\n",
" store_key=\"report_sections\",\n",
" settings=report_settings,\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
" ]\n",
" super().__init__(modules)\n",
"\n",
" \n",
"\n",
"pipeline = CustomComparisonsPipeline()\n",
"datastore = pipeline.transform(datastore)\n",
"\n",
Expand Down
4 changes: 3 additions & 1 deletion popmon/notebooks/popmon_tutorial_basic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"outputs": [],
"source": [
"# (optional) Adjust the jupyter notebook style for easier navigation of the reports\n",
"from IPython.core.display import display, HTML\n",
"from IPython.core.display import HTML, display\n",
"\n",
"# Wider notebook\n",
"display(HTML(\"<style>.container { width:80% !important; }</style>\"))\n",
Expand Down Expand Up @@ -57,6 +57,7 @@
"outputs": [],
"source": [
"import sys\n",
"\n",
"!\"{sys.executable}\" -m pip install -q popmon"
]
},
Expand All @@ -74,6 +75,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"import popmon\n",
"from popmon import resources\n",
"from popmon.config import Report"
Expand Down
4 changes: 2 additions & 2 deletions popmon/notebooks/popmon_tutorial_incremental_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"outputs": [],
"source": [
"import sys\n",
"\n",
"!\"{sys.executable}\" -m pip install -q popmon"
]
},
Expand All @@ -49,8 +50,7 @@
"import pandas as pd\n",
"\n",
"import popmon\n",
"from popmon import stability_report, stitch_histograms, get_bin_specs\n",
"from popmon import resources"
"from popmon import get_bin_specs, resources, stability_report, stitch_histograms"
]
},
{
Expand Down
30 changes: 17 additions & 13 deletions popmon/notebooks/popmon_tutorial_reports.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"])\n",
"df = pd.read_csv(\n",
" resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n",
")\n",
"report = df.pm_stability_report(time_axis=\"DATE\", time_width=\"1w\")"
]
},
Expand Down Expand Up @@ -67,7 +69,7 @@
"metadata": {},
"outputs": [],
"source": [
"list(report.datastore['report_sections'][0].keys())"
"list(report.datastore[\"report_sections\"][0].keys())"
]
},
{
Expand All @@ -85,7 +87,7 @@
"metadata": {},
"outputs": [],
"source": [
"[section['section_title'] for section in report.datastore['report_sections']]"
"[section[\"section_title\"] for section in report.datastore[\"report_sections\"]]"
]
},
{
Expand All @@ -103,15 +105,17 @@
"metadata": {},
"outputs": [],
"source": [
"from IPython.core.display import display, HTML\n",
"from IPython.core.display import HTML, display\n",
"\n",
"\n",
"def show_image(plot):\n",
" display(HTML(f'<img src=\"data:image/jpeg;base64, {plot[\"plot\"]}\" />'))\n",
" text = f'<strong>{plot[\"name\"]}</strong>'\n",
" if plot['description']:\n",
" if plot[\"description\"]:\n",
" text += f': {plot[\"description\"]}'\n",
" display(HTML(text))\n",
"\n",
"\n",
"def show_table(plot):\n",
" style = \"\"\"table.overview{\n",
" margin: 25px;\n",
Expand Down Expand Up @@ -147,7 +151,7 @@
" font-weight: 300;\n",
" }\n",
" \"\"\"\n",
" display(HTML(f'<style>{style}</style>'))\n",
" display(HTML(f\"<style>{style}</style>\"))\n",
" display(HTML(plot[\"plot\"]))"
]
},
Expand Down Expand Up @@ -187,7 +191,7 @@
"outputs": [],
"source": [
"# First section, First Feature, First plot\n",
"show_image(report.datastore['report_sections'][1]['features'][0]['plots'][0])"
"show_image(report.datastore[\"report_sections\"][1][\"features\"][0][\"plots\"][0])"
]
},
{
Expand Down Expand Up @@ -216,7 +220,7 @@
"metadata": {},
"outputs": [],
"source": [
"show_image(report.datastore['report_sections'][1]['features'][1]['plots'][0])"
"show_image(report.datastore[\"report_sections\"][1][\"features\"][1][\"plots\"][0])"
]
},
{
Expand Down Expand Up @@ -266,7 +270,7 @@
"metadata": {},
"outputs": [],
"source": [
"show_image(report.datastore['report_sections'][1]['features'][0]['plots'][2])"
"show_image(report.datastore[\"report_sections\"][1][\"features\"][0][\"plots\"][2])"
]
},
{
Expand Down Expand Up @@ -299,7 +303,7 @@
"metadata": {},
"outputs": [],
"source": [
"show_table(report.datastore['report_sections'][2]['features'][0]['plots'][0])"
"show_table(report.datastore[\"report_sections\"][2][\"features\"][0][\"plots\"][0])"
]
},
{
Expand All @@ -325,7 +329,7 @@
"metadata": {},
"outputs": [],
"source": [
"show_table(report.datastore['report_sections'][3]['features'][0]['plots'][0])"
"show_table(report.datastore[\"report_sections\"][3][\"features\"][0][\"plots\"][0])"
]
},
{
Expand All @@ -351,7 +355,7 @@
"metadata": {},
"outputs": [],
"source": [
"show_image(report.datastore['report_sections'][4]['features'][0]['plots'][0])"
"show_image(report.datastore[\"report_sections\"][4][\"features\"][0][\"plots\"][0])"
]
},
{
Expand All @@ -377,7 +381,7 @@
"metadata": {},
"outputs": [],
"source": [
"show_image(report.datastore['report_sections'][5]['features'][0]['plots'][0])"
"show_image(report.datastore[\"report_sections\"][5][\"features\"][0][\"plots\"][0])"
]
}
],
Expand Down

0 comments on commit 77fab29

Please sign in to comment.