diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d9de8bda..fcb31244 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,4 +31,12 @@ repos:
- id: docautogenerate
name: docsautogenerate
entry: bash -c 'cd ./docs/ && bash autogenerate.sh'
- language: system
\ No newline at end of file
+ language: system
+- repo: https://github.com/nbQA-dev/nbQA
+ rev: 1.3.1
+ hooks:
+ - id: nbqa-black
+ - id: nbqa-pyupgrade
+ args: ['--py36-plus']
+ - id: nbqa-isort
+ args: ['--profile=black']
\ No newline at end of file
diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb
index f1ec12c7..9739176e 100644
--- a/popmon/notebooks/popmon_tutorial_advanced.ipynb
+++ b/popmon/notebooks/popmon_tutorial_advanced.ipynb
@@ -34,6 +34,7 @@
"source": [
"# install popmon (if not installed yet)\n",
"import sys\n",
+ "\n",
"!\"{sys.executable}\" -m pip install -q popmon"
]
},
@@ -47,7 +48,7 @@
"\n",
"import popmon\n",
"from popmon import resources\n",
- "from popmon.config import Settings, Report"
+ "from popmon.config import Report, Settings"
]
},
{
@@ -64,7 +65,9 @@
"metadata": {},
"outputs": [],
"source": [
- "df = pd.read_csv(resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"])"
+ "df = pd.read_csv(\n",
+ " resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n",
+ ")"
]
},
{
@@ -126,7 +129,7 @@
"outputs": [],
"source": [
"# reuse the previous settings\n",
- "settings.monitoring.pull_rules={\"*_pull\": [10, 7, -7, -10]}\n",
+ "settings.monitoring.pull_rules = {\"*_pull\": [10, 7, -7, -10]}\n",
"\n",
"df.pm_stability_report(\n",
" time_axis=\"DATE\",\n",
@@ -157,8 +160,8 @@
"source": [
"# download histogrammar jar files if not already installed, used for histogramming of spark dataframe\n",
"try:\n",
- " from pyspark.sql import SparkSession\n",
" from pyspark import __version__ as pyspark_version\n",
+ " from pyspark.sql import SparkSession\n",
"\n",
" pyspark_installed = True\n",
"except ImportError:\n",
@@ -173,12 +176,12 @@
"outputs": [],
"source": [
"if pyspark_installed:\n",
- " scala = '2.12' if int(pyspark_version[0]) >= 3 else '2.11'\n",
- " hist_jar = f'io.github.histogrammar:histogrammar_{scala}:1.0.20'\n",
- " hist_spark_jar = f'io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.20'\n",
- " \n",
+ " scala = \"2.12\" if int(pyspark_version[0]) >= 3 else \"2.11\"\n",
+ " hist_jar = f\"io.github.histogrammar:histogrammar_{scala}:1.0.20\"\n",
+ " hist_spark_jar = f\"io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.20\"\n",
+ "\n",
" spark = SparkSession.builder.config(\n",
- " \"spark.jars.packages\", f'{hist_spark_jar},{hist_jar}'\n",
+ " \"spark.jars.packages\", f\"{hist_spark_jar},{hist_jar}\"\n",
" ).getOrCreate()\n",
"\n",
" sdf = spark.createDataFrame(df)\n",
@@ -441,25 +444,31 @@
"metadata": {},
"outputs": [],
"source": [
- "from popmon.hist.hist_splitter import HistSplitter\n",
"from popmon.analysis.profiling import HistProfiler\n",
- "from popmon.pipeline.report import StabilityReport\n",
"from popmon.base import Pipeline\n",
- "from popmon.visualization import SectionGenerator, ReportGenerator\n",
- "\n",
+ "from popmon.hist.hist_splitter import HistSplitter\n",
+ "from popmon.pipeline.report import StabilityReport\n",
+ "from popmon.visualization import ReportGenerator, SectionGenerator\n",
"\n",
"datastore = {\n",
- " \"hists\": df.pm_make_histograms(time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\")\n",
+ " \"hists\": df.pm_make_histograms(\n",
+ " time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
+ " )\n",
"}\n",
"\n",
"\n",
"class CustomPipeline(Pipeline):\n",
" def __init__(self):\n",
" modules = [\n",
- " HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
+ " HistSplitter(\n",
+ " read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"\n",
+ " ),\n",
" HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
" SectionGenerator(\n",
- " section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\", settings=report_settings\n",
+ " section_name=\"Profiles\",\n",
+ " read_key=\"profiles\",\n",
+ " store_key=\"report_sections\",\n",
+ " settings=report_settings,\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
" ]\n",
@@ -488,16 +497,19 @@
"source": [
"from popmon.analysis.comparison.hist_comparer import ReferenceHistComparer\n",
"\n",
- "\n",
"datastore = {\n",
- " \"hists\": df.pm_make_histograms(time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\")\n",
+ " \"hists\": df.pm_make_histograms(\n",
+ " time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
+ " )\n",
"}\n",
"\n",
"\n",
"class CustomComparisonsPipeline(Pipeline):\n",
" def __init__(self):\n",
" modules = [\n",
- " HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
+ " HistSplitter(\n",
+ " read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"\n",
+ " ),\n",
" HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
" ReferenceHistComparer(\n",
" reference_key=\"split_hists\",\n",
@@ -505,16 +517,22 @@
" store_key=\"comparisons\",\n",
" ),\n",
" SectionGenerator(\n",
- " section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\", settings=report_settings\n",
+ " section_name=\"Profiles\",\n",
+ " read_key=\"profiles\",\n",
+ " store_key=\"report_sections\",\n",
+ " settings=report_settings,\n",
" ),\n",
" SectionGenerator(\n",
- " section_name=\"Comparisons\", read_key=\"comparisons\", store_key=\"report_sections\", settings=report_settings\n",
+ " section_name=\"Comparisons\",\n",
+ " read_key=\"comparisons\",\n",
+ " store_key=\"report_sections\",\n",
+ " settings=report_settings,\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
" ]\n",
" super().__init__(modules)\n",
"\n",
- " \n",
+ "\n",
"pipeline = CustomComparisonsPipeline()\n",
"datastore = pipeline.transform(datastore)\n",
"\n",
diff --git a/popmon/notebooks/popmon_tutorial_basic.ipynb b/popmon/notebooks/popmon_tutorial_basic.ipynb
index fadcf170..97a153d9 100644
--- a/popmon/notebooks/popmon_tutorial_basic.ipynb
+++ b/popmon/notebooks/popmon_tutorial_basic.ipynb
@@ -28,7 +28,7 @@
"outputs": [],
"source": [
"# (optional) Adjust the jupyter notebook style for easier navigation of the reports\n",
- "from IPython.core.display import display, HTML\n",
+ "from IPython.core.display import HTML, display\n",
"\n",
"# Wider notebook\n",
"display(HTML(\"\"))\n",
@@ -57,6 +57,7 @@
"outputs": [],
"source": [
"import sys\n",
+ "\n",
"!\"{sys.executable}\" -m pip install -q popmon"
]
},
@@ -74,6 +75,7 @@
"outputs": [],
"source": [
"import pandas as pd\n",
+ "\n",
"import popmon\n",
"from popmon import resources\n",
"from popmon.config import Report"
diff --git a/popmon/notebooks/popmon_tutorial_incremental_data.ipynb b/popmon/notebooks/popmon_tutorial_incremental_data.ipynb
index ed30289e..450dbc17 100644
--- a/popmon/notebooks/popmon_tutorial_incremental_data.ipynb
+++ b/popmon/notebooks/popmon_tutorial_incremental_data.ipynb
@@ -37,6 +37,7 @@
"outputs": [],
"source": [
"import sys\n",
+ "\n",
"!\"{sys.executable}\" -m pip install -q popmon"
]
},
@@ -49,8 +50,7 @@
"import pandas as pd\n",
"\n",
"import popmon\n",
- "from popmon import stability_report, stitch_histograms, get_bin_specs\n",
- "from popmon import resources"
+ "from popmon import get_bin_specs, resources, stability_report, stitch_histograms"
]
},
{
diff --git a/popmon/notebooks/popmon_tutorial_reports.ipynb b/popmon/notebooks/popmon_tutorial_reports.ipynb
index c2c54057..3340c181 100644
--- a/popmon/notebooks/popmon_tutorial_reports.ipynb
+++ b/popmon/notebooks/popmon_tutorial_reports.ipynb
@@ -30,7 +30,9 @@
"metadata": {},
"outputs": [],
"source": [
- "df = pd.read_csv(resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"])\n",
+ "df = pd.read_csv(\n",
+ " resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n",
+ ")\n",
"report = df.pm_stability_report(time_axis=\"DATE\", time_width=\"1w\")"
]
},
@@ -67,7 +69,7 @@
"metadata": {},
"outputs": [],
"source": [
- "list(report.datastore['report_sections'][0].keys())"
+ "list(report.datastore[\"report_sections\"][0].keys())"
]
},
{
@@ -85,7 +87,7 @@
"metadata": {},
"outputs": [],
"source": [
- "[section['section_title'] for section in report.datastore['report_sections']]"
+ "[section[\"section_title\"] for section in report.datastore[\"report_sections\"]]"
]
},
{
@@ -103,15 +105,17 @@
"metadata": {},
"outputs": [],
"source": [
- "from IPython.core.display import display, HTML\n",
+ "from IPython.core.display import HTML, display\n",
+ "\n",
"\n",
"def show_image(plot):\n",
" display(HTML(f''))\n",
" text = f'{plot[\"name\"]}'\n",
- " if plot['description']:\n",
+ " if plot[\"description\"]:\n",
" text += f': {plot[\"description\"]}'\n",
" display(HTML(text))\n",
"\n",
+ "\n",
"def show_table(plot):\n",
" style = \"\"\"table.overview{\n",
" margin: 25px;\n",
@@ -147,7 +151,7 @@
" font-weight: 300;\n",
" }\n",
" \"\"\"\n",
- " display(HTML(f''))\n",
+ " display(HTML(f\"\"))\n",
" display(HTML(plot[\"plot\"]))"
]
},
@@ -187,7 +191,7 @@
"outputs": [],
"source": [
"# First section, First Feature, First plot\n",
- "show_image(report.datastore['report_sections'][1]['features'][0]['plots'][0])"
+ "show_image(report.datastore[\"report_sections\"][1][\"features\"][0][\"plots\"][0])"
]
},
{
@@ -216,7 +220,7 @@
"metadata": {},
"outputs": [],
"source": [
- "show_image(report.datastore['report_sections'][1]['features'][1]['plots'][0])"
+ "show_image(report.datastore[\"report_sections\"][1][\"features\"][1][\"plots\"][0])"
]
},
{
@@ -266,7 +270,7 @@
"metadata": {},
"outputs": [],
"source": [
- "show_image(report.datastore['report_sections'][1]['features'][0]['plots'][2])"
+ "show_image(report.datastore[\"report_sections\"][1][\"features\"][0][\"plots\"][2])"
]
},
{
@@ -299,7 +303,7 @@
"metadata": {},
"outputs": [],
"source": [
- "show_table(report.datastore['report_sections'][2]['features'][0]['plots'][0])"
+ "show_table(report.datastore[\"report_sections\"][2][\"features\"][0][\"plots\"][0])"
]
},
{
@@ -325,7 +329,7 @@
"metadata": {},
"outputs": [],
"source": [
- "show_table(report.datastore['report_sections'][3]['features'][0]['plots'][0])"
+ "show_table(report.datastore[\"report_sections\"][3][\"features\"][0][\"plots\"][0])"
]
},
{
@@ -351,7 +355,7 @@
"metadata": {},
"outputs": [],
"source": [
- "show_image(report.datastore['report_sections'][4]['features'][0]['plots'][0])"
+ "show_image(report.datastore[\"report_sections\"][4][\"features\"][0][\"plots\"][0])"
]
},
{
@@ -377,7 +381,7 @@
"metadata": {},
"outputs": [],
"source": [
- "show_image(report.datastore['report_sections'][5]['features'][0]['plots'][0])"
+ "show_image(report.datastore[\"report_sections\"][5][\"features\"][0][\"plots\"][0])"
]
}
],