From 77fab29e573ebbd85e5fd9cfe1acd0be6d43e46e Mon Sep 17 00:00:00 2001 From: Simon Brugman Date: Tue, 28 Jun 2022 18:24:47 +0200 Subject: [PATCH] ci: enable qa on jupyter notebooks --- .pre-commit-config.yaml | 10 ++- .../notebooks/popmon_tutorial_advanced.ipynb | 62 ++++++++++++------- popmon/notebooks/popmon_tutorial_basic.ipynb | 4 +- .../popmon_tutorial_incremental_data.ipynb | 4 +- .../notebooks/popmon_tutorial_reports.ipynb | 30 +++++---- 5 files changed, 71 insertions(+), 39 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d9de8bda..fcb31244 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,4 +31,12 @@ repos: - id: docautogenerate name: docsautogenerate entry: bash -c 'cd ./docs/ && bash autogenerate.sh' - language: system \ No newline at end of file + language: system +- repo: https://github.com/nbQA-dev/nbQA + rev: 1.3.1 + hooks: + - id: nbqa-black + - id: nbqa-pyupgrade + args: ['--py36-plus'] + - id: nbqa-isort + args: ['--profile=black'] \ No newline at end of file diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb index f1ec12c7..9739176e 100644 --- a/popmon/notebooks/popmon_tutorial_advanced.ipynb +++ b/popmon/notebooks/popmon_tutorial_advanced.ipynb @@ -34,6 +34,7 @@ "source": [ "# install popmon (if not installed yet)\n", "import sys\n", + "\n", "!\"{sys.executable}\" -m pip install -q popmon" ] }, @@ -47,7 +48,7 @@ "\n", "import popmon\n", "from popmon import resources\n", - "from popmon.config import Settings, Report" + "from popmon.config import Report, Settings" ] }, { @@ -64,7 +65,9 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv(resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"])" + "df = pd.read_csv(\n", + " resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n", + ")" ] }, { @@ -126,7 +129,7 @@ "outputs": [], "source": [ "# reuse the previous settings\n", - "settings.monitoring.pull_rules={\"*_pull\": [10, 7, -7, -10]}\n", + "settings.monitoring.pull_rules = {\"*_pull\": [10, 7, -7, -10]}\n", "\n", "df.pm_stability_report(\n", " time_axis=\"DATE\",\n", @@ -157,8 +160,8 @@ "source": [ "# download histogrammar jar files if not already installed, used for histogramming of spark dataframe\n", "try:\n", - " from pyspark.sql import SparkSession\n", " from pyspark import __version__ as pyspark_version\n", + " from pyspark.sql import SparkSession\n", "\n", " pyspark_installed = True\n", "except ImportError:\n", @@ -173,12 +176,12 @@ "outputs": [], "source": [ "if pyspark_installed:\n", - " scala = '2.12' if int(pyspark_version[0]) >= 3 else '2.11'\n", - " hist_jar = f'io.github.histogrammar:histogrammar_{scala}:1.0.20'\n", - " hist_spark_jar = f'io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.20'\n", - " \n", + " scala = \"2.12\" if int(pyspark_version[0]) >= 3 else \"2.11\"\n", + " hist_jar = f\"io.github.histogrammar:histogrammar_{scala}:1.0.20\"\n", + " hist_spark_jar = f\"io.github.histogrammar:histogrammar-sparksql_{scala}:1.0.20\"\n", + "\n", " spark = SparkSession.builder.config(\n", - " \"spark.jars.packages\", f'{hist_spark_jar},{hist_jar}'\n", + " \"spark.jars.packages\", f\"{hist_spark_jar},{hist_jar}\"\n", " ).getOrCreate()\n", "\n", " sdf = spark.createDataFrame(df)\n", @@ -441,25 +444,31 @@ "metadata": {}, "outputs": [], "source": [ - "from popmon.hist.hist_splitter import HistSplitter\n", "from popmon.analysis.profiling import HistProfiler\n", - "from popmon.pipeline.report import StabilityReport\n", "from popmon.base import Pipeline\n", - "from popmon.visualization import SectionGenerator, ReportGenerator\n", - "\n", + "from popmon.hist.hist_splitter import HistSplitter\n", + "from popmon.pipeline.report import StabilityReport\n", + "from popmon.visualization import ReportGenerator, SectionGenerator\n", "\n", "datastore = {\n", - " \"hists\": df.pm_make_histograms(time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\")\n", + " \"hists\": df.pm_make_histograms(\n", + " time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n", + " )\n", "}\n", "\n", "\n", "class CustomPipeline(Pipeline):\n", " def __init__(self):\n", " modules = [\n", - " HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n", + " HistSplitter(\n", + " read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"\n", + " ),\n", " HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n", " SectionGenerator(\n", - " section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\", settings=report_settings\n", + " section_name=\"Profiles\",\n", + " read_key=\"profiles\",\n", + " store_key=\"report_sections\",\n", + " settings=report_settings,\n", " ),\n", " ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n", " ]\n", @@ -488,16 +497,19 @@ "source": [ "from popmon.analysis.comparison.hist_comparer import ReferenceHistComparer\n", "\n", - "\n", "datastore = {\n", - " \"hists\": df.pm_make_histograms(time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\")\n", + " \"hists\": df.pm_make_histograms(\n", + " time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n", + " )\n", "}\n", "\n", "\n", "class CustomComparisonsPipeline(Pipeline):\n", " def __init__(self):\n", " modules = [\n", - " HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n", + " HistSplitter(\n", + " read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"\n", + " ),\n", " HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n", " ReferenceHistComparer(\n", " reference_key=\"split_hists\",\n", @@ -505,16 +517,22 @@ " store_key=\"comparisons\",\n", " ),\n", " SectionGenerator(\n", - " section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\", settings=report_settings\n", + " section_name=\"Profiles\",\n", + " read_key=\"profiles\",\n", + " store_key=\"report_sections\",\n", + " settings=report_settings,\n", " ),\n", " SectionGenerator(\n", - " section_name=\"Comparisons\", read_key=\"comparisons\", store_key=\"report_sections\", settings=report_settings\n", + " section_name=\"Comparisons\",\n", + " read_key=\"comparisons\",\n", + " store_key=\"report_sections\",\n", + " settings=report_settings,\n", " ),\n", " ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n", " ]\n", " super().__init__(modules)\n", "\n", - " \n", + "\n", "pipeline = CustomComparisonsPipeline()\n", "datastore = pipeline.transform(datastore)\n", "\n", diff --git a/popmon/notebooks/popmon_tutorial_basic.ipynb b/popmon/notebooks/popmon_tutorial_basic.ipynb index fadcf170..97a153d9 100644 --- a/popmon/notebooks/popmon_tutorial_basic.ipynb +++ b/popmon/notebooks/popmon_tutorial_basic.ipynb @@ -28,7 +28,7 @@ "outputs": [], "source": [ "# (optional) Adjust the jupyter notebook style for easier navigation of the reports\n", - "from IPython.core.display import display, HTML\n", + "from IPython.core.display import HTML, display\n", "\n", "# Wider notebook\n", "display(HTML(\"\"))\n", @@ -57,6 +57,7 @@ "outputs": [], "source": [ "import sys\n", + "\n", "!\"{sys.executable}\" -m pip install -q popmon" ] }, @@ -74,6 +75,7 @@ "outputs": [], "source": [ "import pandas as pd\n", + "\n", "import popmon\n", "from popmon import resources\n", "from popmon.config import Report" diff --git a/popmon/notebooks/popmon_tutorial_incremental_data.ipynb b/popmon/notebooks/popmon_tutorial_incremental_data.ipynb index ed30289e..450dbc17 100644 --- a/popmon/notebooks/popmon_tutorial_incremental_data.ipynb +++ b/popmon/notebooks/popmon_tutorial_incremental_data.ipynb @@ -37,6 +37,7 @@ "outputs": [], "source": [ "import sys\n", + "\n", "!\"{sys.executable}\" -m pip install -q popmon" ] }, @@ -49,8 +50,7 @@ "import pandas as pd\n", "\n", "import popmon\n", - "from popmon import stability_report, stitch_histograms, get_bin_specs\n", - "from popmon import resources" + "from popmon import get_bin_specs, resources, stability_report, stitch_histograms" ] }, { diff --git a/popmon/notebooks/popmon_tutorial_reports.ipynb b/popmon/notebooks/popmon_tutorial_reports.ipynb index c2c54057..3340c181 100644 --- a/popmon/notebooks/popmon_tutorial_reports.ipynb +++ b/popmon/notebooks/popmon_tutorial_reports.ipynb @@ -30,7 +30,9 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv(resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"])\n", + "df = pd.read_csv(\n", + " resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n", + ")\n", "report = df.pm_stability_report(time_axis=\"DATE\", time_width=\"1w\")" ] }, @@ -67,7 +69,7 @@ "metadata": {}, "outputs": [], "source": [ - "list(report.datastore['report_sections'][0].keys())" + "list(report.datastore[\"report_sections\"][0].keys())" ] }, { @@ -85,7 +87,7 @@ "metadata": {}, "outputs": [], "source": [ - "[section['section_title'] for section in report.datastore['report_sections']]" + "[section[\"section_title\"] for section in report.datastore[\"report_sections\"]]" ] }, { @@ -103,15 +105,17 @@ "metadata": {}, "outputs": [], "source": [ - "from IPython.core.display import display, HTML\n", + "from IPython.core.display import HTML, display\n", + "\n", "\n", "def show_image(plot):\n", " display(HTML(f''))\n", " text = f'{plot[\"name\"]}'\n", - " if plot['description']:\n", + " if plot[\"description\"]:\n", " text += f': {plot[\"description\"]}'\n", " display(HTML(text))\n", "\n", + "\n", "def show_table(plot):\n", " style = \"\"\"table.overview{\n", " margin: 25px;\n", @@ -147,7 +151,7 @@ " font-weight: 300;\n", " }\n", " \"\"\"\n", - " display(HTML(f''))\n", + " display(HTML(f\"\"))\n", " display(HTML(plot[\"plot\"]))" ] }, @@ -187,7 +191,7 @@ "outputs": [], "source": [ "# First section, First Feature, First plot\n", - "show_image(report.datastore['report_sections'][1]['features'][0]['plots'][0])" + "show_image(report.datastore[\"report_sections\"][1][\"features\"][0][\"plots\"][0])" ] }, { @@ -216,7 +220,7 @@ "metadata": {}, "outputs": [], "source": [ - "show_image(report.datastore['report_sections'][1]['features'][1]['plots'][0])" + "show_image(report.datastore[\"report_sections\"][1][\"features\"][1][\"plots\"][0])" ] }, { @@ -266,7 +270,7 @@ "metadata": {}, "outputs": [], "source": [ - "show_image(report.datastore['report_sections'][1]['features'][0]['plots'][2])" + "show_image(report.datastore[\"report_sections\"][1][\"features\"][0][\"plots\"][2])" ] }, { @@ -299,7 +303,7 @@ "metadata": {}, "outputs": [], "source": [ - "show_table(report.datastore['report_sections'][2]['features'][0]['plots'][0])" + "show_table(report.datastore[\"report_sections\"][2][\"features\"][0][\"plots\"][0])" ] }, { @@ -325,7 +329,7 @@ "metadata": {}, "outputs": [], "source": [ - "show_table(report.datastore['report_sections'][3]['features'][0]['plots'][0])" + "show_table(report.datastore[\"report_sections\"][3][\"features\"][0][\"plots\"][0])" ] }, { @@ -351,7 +355,7 @@ "metadata": {}, "outputs": [], "source": [ - "show_image(report.datastore['report_sections'][4]['features'][0]['plots'][0])" + "show_image(report.datastore[\"report_sections\"][4][\"features\"][0][\"plots\"][0])" ] }, { @@ -377,7 +381,7 @@ "metadata": {}, "outputs": [], "source": [ - "show_image(report.datastore['report_sections'][5]['features'][0]['plots'][0])" + "show_image(report.datastore[\"report_sections\"][5][\"features\"][0][\"plots\"][0])" ] } ],