Text (yaml) report for download (#181)

* pyyaml dep * stub reports block * fix flakey test of weight * half-way there! * add confidence and accuracy * stub inputs * csv_path * generate report and drop in tmp * text report download works * "please wait" * test each download * use original column names in our report * CSV report (#182) * use original column names in our report * flatten util * checkpoint on CSV report; tests not passing * csv report works * add a test * button grid * factor out button function * update test to match * add epsilon to report * remove params when we can just get it from self * Add column info to report
opendp · Dec 3, 2024 · 23034e9 · 23034e9
1 parent 55bd929
commit 23034e9
Show file tree

Hide file tree

Showing 10 changed files with 218 additions and 20 deletions.
diff --git a/dp_wizard/app/results_panel.py b/dp_wizard/app/results_panel.py
@@ -1,4 +1,8 @@
+from pathlib import Path
+
 from shiny import ui, render, reactive, Inputs, Outputs, Session
+from faicons import icon_svg
+from htmltools.tags import table, tr, td
 
 from dp_wizard.utils.code_generators import (
     NotebookGenerator,
@@ -9,17 +13,38 @@
 from dp_wizard.utils.converters import convert_py_to_nb
 
 
+wait_message = "Please wait."
+
+
+def td_button(name: str, ext: str, icon: str):
+    function_name = f'download_{name.lower().replace(" ", "_")}'
+    return (
+        td(
+            ui.download_button(
+                function_name,
+                [
+                    icon_svg(icon, margin_right="0.5em"),
+                    f"Download {name} ({ext})",
+                ],
+                width="20em",
+            )
+        ),
+    )
+
+
 def results_ui():
     return ui.nav_panel(
         "Download results",
         ui.markdown("You can now make a differentially private release of your data."),
-        ui.download_button(
-            "download_script",
-            "Download Script (.py)",
-        ),
-        ui.download_button(
-            "download_notebook",
-            "Download Notebook (.ipynb)",
+        table(
+            tr(
+                td_button("Notebook", ".ipynb", "book"),
+                td_button("Script", ".py", "python"),
+            ),
+            tr(
+                td_button("Report", ".txt", "file-lines"),
+                td_button("Table", ".csv", "file-csv"),
+            ),
         ),
         value="results_panel",
     )
@@ -58,19 +83,55 @@ def analysis_plan() -> AnalysisPlan:
             columns=columns,
         )
 
+    @reactive.calc
+    def notebook_nb():
+        # This creates the notebook, and evaluates it,
+        # and drops reports in the tmp dir.
+        # Could be slow!
+        # Luckily, reactive calcs are lazy.
+        notebook_py = NotebookGenerator(analysis_plan()).make_py()
+        return convert_py_to_nb(notebook_py, execute=True)
+
     @render.download(
         filename="dp-wizard-script.py",
         media_type="text/x-python",
     )
     async def download_script():
-        script_py = ScriptGenerator(analysis_plan()).make_py()
-        yield script_py
+        with ui.Progress() as progress:
+            progress.set(message=wait_message)
+            yield ScriptGenerator(analysis_plan()).make_py()
 
     @render.download(
         filename="dp-wizard-notebook.ipynb",
         media_type="application/x-ipynb+json",
     )
     async def download_notebook():
-        notebook_py = NotebookGenerator(analysis_plan()).make_py()
-        notebook_nb = convert_py_to_nb(notebook_py, execute=True)
-        yield notebook_nb
+        with ui.Progress() as progress:
+            progress.set(message=wait_message)
+            yield notebook_nb()
+
+    @render.download(
+        filename="dp-wizard-report.txt",
+        media_type="text/plain",
+    )
+    async def download_report():
+        with ui.Progress() as progress:
+            progress.set(message=wait_message)
+            notebook_nb()  # Evaluate just for the side effect of creating report.
+            report_txt = (
+                Path(__file__).parent.parent / "tmp" / "report.txt"
+            ).read_text()
+            yield report_txt
+
+    @render.download(
+        filename="dp-wizard-report.csv",
+        media_type="text/plain",
+    )
+    async def download_table():
+        with ui.Progress() as progress:
+            progress.set(message=wait_message)
+            notebook_nb()  # Evaluate just for the side effect of creating report.
+            report_csv = (
+                Path(__file__).parent.parent / "tmp" / "report.csv"
+            ).read_text()
+            yield report_csv
diff --git a/dp_wizard/tmp/.gitignore b/dp_wizard/tmp/.gitignore
@@ -1 +1,3 @@
 demo.csv
+report.txt
+report.csv
diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py
@@ -33,13 +33,17 @@ def __init__(self, analysis_plan: AnalysisPlan):
     @abstractmethod
     def _make_context(self) -> str: ...  # pragma: no cover
 
+    def _make_extra_blocks(self):
+        return {}
+
     def make_py(self):
         return str(
             Template(self.root_template).fill_blocks(
                 IMPORTS_BLOCK=_make_imports(),
-                COLUMNS_BLOCK=self._make_columns(self.columns),
+                COLUMNS_BLOCK=self._make_columns(),
                 CONTEXT_BLOCK=self._make_context(),
-                QUERIES_BLOCK=self._make_queries(self.columns.keys()),
+                QUERIES_BLOCK=self._make_queries(),
+                **self._make_extra_blocks(),
             )
         )
 
@@ -66,22 +70,23 @@ def _make_margins_dict(self, bin_names: Iterable[str]):
         margins_dict = "{" + "".join(margins) + "\n    }"
         return margins_dict
 
-    def _make_columns(self, columns: dict[str, AnalysisPlanColumn]):
+    def _make_columns(self):
         return "\n".join(
             make_column_config_block(
                 name=name,
                 lower_bound=col.lower_bound,
                 upper_bound=col.upper_bound,
                 bin_count=col.bin_count,
             )
-            for name, col in columns.items()
+            for name, col in self.columns.items()
         )
 
-    def _make_queries(self, column_names: Iterable[str]):
+    def _make_queries(self):
         confidence_note = (
             "The actual value is within the shown range "
             f"with {int(confidence * 100)}% confidence."
         )
+        column_names = self.columns.keys()
         return f"confidence = {confidence} # {confidence_note}\n\n" + "\n".join(
             _make_query(column_name) for column_name in column_names
         )
@@ -115,6 +120,41 @@ class NotebookGenerator(_CodeGenerator):
     def _make_context(self):
         return str(self._make_partial_context().fill_values(CSV_PATH=self.csv_path))
 
+    def _make_extra_blocks(self):
+        outputs_expression = (
+            "{"
+            + ",".join(
+                str(
+                    Template("report_kv")
+                    .fill_values(
+                        NAME=name,
+                        CONFIDENCE=confidence,
+                    )
+                    .fill_expressions(
+                        IDENTIFIER_HISTOGRAM=f"{name_to_identifier(name)}_histogram",
+                        IDENTIFIER_ACCURACY=f"{name_to_identifier(name)}_accuracy",
+                    )
+                )
+                for name in self.columns.keys()
+            )
+            + "}"
+        )
+        tmp_path = Path(__file__).parent.parent.parent / "tmp"
+        reports_block = str(
+            Template("reports")
+            .fill_expressions(
+                OUTPUTS=outputs_expression,
+                COLUMNS={k: v._asdict() for k, v in self.columns.items()},
+            )
+            .fill_values(
+                CSV_PATH=self.csv_path,
+                EPSILON=self.epsilon,
+                TXT_REPORT_PATH=str(tmp_path / "report.txt"),
+                CSV_REPORT_PATH=str(tmp_path / "report.csv"),
+            )
+        )
+        return {"REPORTS_BLOCK": reports_block}
+
 
 class ScriptGenerator(_CodeGenerator):
     root_template = "script"

diff --git a/dp_wizard/utils/code_generators/no-tests/_notebook.py b/dp_wizard/utils/code_generators/no-tests/_notebook.py
@@ -34,3 +34,9 @@
 # +
 QUERIES_BLOCK
 # -
+
+# The code below produces a summary report.
+
+# +
+REPORTS_BLOCK
+# -
diff --git a/dp_wizard/utils/code_generators/no-tests/_report_kv.py b/dp_wizard/utils/code_generators/no-tests/_report_kv.py
@@ -0,0 +1,5 @@
+NAME: {
+    "histogram": dict(zip(*df_to_columns(IDENTIFIER_HISTOGRAM))),
+    "accuracy": IDENTIFIER_ACCURACY,
+    "confidence": CONFIDENCE,
+}
diff --git a/dp_wizard/utils/code_generators/no-tests/_reports.py b/dp_wizard/utils/code_generators/no-tests/_reports.py
@@ -0,0 +1,53 @@
+from yaml import dump
+from pathlib import Path
+import csv
+
+
+# https://stackoverflow.com/a/6027615/10727889
+def flatten_dict(dictionary, parent_key=""):
+    """
+    Walk tree to return flat dictionary.
+    >>> from pprint import pp
+    >>> pp(flatten_dict({
+    ...     "inputs": {
+    ...         "data": "fake.csv"
+    ...     },
+    ...     "outputs": {
+    ...         "a column": {
+    ...             "(0, 1]": 24,
+    ...             "(1, 2]": 42,
+    ...         }
+    ...     }
+    ... }))
+    {'inputs: data': 'fake.csv',
+     'outputs: a column: (0, 1]': 24,
+     'outputs: a column: (1, 2]': 42}
+    """
+    separator = ": "
+    items = []
+    for key, value in dictionary.items():
+        new_key = parent_key + separator + key if parent_key else key
+        if isinstance(value, dict):
+            items.extend(flatten_dict(value, new_key).items())
+        else:
+            items.append((new_key, value))
+    return dict(items)
+
+
+report = {
+    "inputs": {
+        "data": CSV_PATH,
+        "epsilon": EPSILON,
+        "columns": COLUMNS,
+    },
+    "outputs": OUTPUTS,
+}
+
+print(dump(report))
+Path(TXT_REPORT_PATH).write_text(dump(report))
+
+flat_report = flatten_dict(report)
+with Path(CSV_REPORT_PATH).open(mode="w", newline="") as handle:
+    writer = csv.writer(handle)
+    for kv_pair in flat_report.items():
+        writer.writerow(kv_pair)
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,6 +18,7 @@ dependencies = [
     "jupyter-client",
     "nbconvert",
     "ipykernel",
+    "pyyaml",
 ]
 
 [project.scripts]

diff --git a/requirements-dev.in b/requirements-dev.in
@@ -27,6 +27,7 @@ scipy<1.14
 # Conversion:
 jupytext
 jupyter-client
+pyyaml
 nbconvert
 ipykernel
 # May also require:

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -268,6 +268,7 @@ python-slugify==8.0.4
     # via pytest-playwright
 pyyaml==6.0.2
     # via
+    #   -r requirements-dev.in
     #   jupytext
     #   pre-commit
 pyzmq==26.2.0

diff --git a/tests/test_app.py b/tests/test_app.py
@@ -122,14 +122,42 @@ def expect_no_error():
     expect_visible(download_results_text)
     expect_no_error()
 
-    with page.expect_download() as download_info:
+    # Text Report:
+    with page.expect_download() as text_report_download_info:
+        page.get_by_text("Download report (.txt)").click()
+    expect_no_error()
+
+    report_download = text_report_download_info.value
+    report = report_download.path().read_text()
+    assert "confidence: 0.95" in report
+
+    # CSV Report:
+    with page.expect_download() as csv_report_download_info:
+        page.get_by_text("Download table (.csv)").click()
+    expect_no_error()
+
+    report_download = csv_report_download_info.value
+    report = report_download.path().read_text()
+    assert "outputs: grade: confidence,0.95" in report
+
+    # Script:
+    with page.expect_download() as script_download_info:
         page.get_by_text("Download script").click()
     expect_no_error()
 
-    download = download_info.value
-    script = download.path().read_text()
+    script_download = script_download_info.value
+    script = script_download.path().read_text()
     assert "privacy_unit = dp.unit_of(contributions=42)" in script
 
+    # Notebook:
+    with page.expect_download() as notebook_download_info:
+        page.get_by_text("Download notebook").click()
+    expect_no_error()
+
+    notebook_download = notebook_download_info.value
+    notebook = notebook_download.path().read_text()
+    assert "privacy_unit = dp.unit_of(contributions=42)" in notebook
+
     # -- Feedback --
     page.get_by_text("Feedback").click()
     iframe = page.locator("#feedback-iframe")