Skip to content

Commit

Permalink
Text (yaml) report for download (#181)
Browse files Browse the repository at this point in the history
* pyyaml dep

* stub reports block

* fix flakey test of weight

* half-way there!

* add confidence and accuracy

* stub inputs

* csv_path

* generate report and drop in tmp

* text report download works

* "please wait"

* test each download

* use original column names in our report

* CSV report (#182)

* use original column names in our report

* flatten util

* checkpoint on CSV report; tests not passing

* csv report works

* add a test

* button grid

* factor out button function

* update test to match

* add epsilon to report

* remove params when we can just get it from self

* Add column info to report
  • Loading branch information
mccalluc authored Dec 3, 2024
1 parent 55bd929 commit 23034e9
Show file tree
Hide file tree
Showing 10 changed files with 218 additions and 20 deletions.
85 changes: 73 additions & 12 deletions dp_wizard/app/results_panel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from pathlib import Path

from shiny import ui, render, reactive, Inputs, Outputs, Session
from faicons import icon_svg
from htmltools.tags import table, tr, td

from dp_wizard.utils.code_generators import (
NotebookGenerator,
Expand All @@ -9,17 +13,38 @@
from dp_wizard.utils.converters import convert_py_to_nb


wait_message = "Please wait."


def td_button(name: str, ext: str, icon: str):
function_name = f'download_{name.lower().replace(" ", "_")}'
return (
td(
ui.download_button(
function_name,
[
icon_svg(icon, margin_right="0.5em"),
f"Download {name} ({ext})",
],
width="20em",
)
),
)


def results_ui():
return ui.nav_panel(
"Download results",
ui.markdown("You can now make a differentially private release of your data."),
ui.download_button(
"download_script",
"Download Script (.py)",
),
ui.download_button(
"download_notebook",
"Download Notebook (.ipynb)",
table(
tr(
td_button("Notebook", ".ipynb", "book"),
td_button("Script", ".py", "python"),
),
tr(
td_button("Report", ".txt", "file-lines"),
td_button("Table", ".csv", "file-csv"),
),
),
value="results_panel",
)
Expand Down Expand Up @@ -58,19 +83,55 @@ def analysis_plan() -> AnalysisPlan:
columns=columns,
)

@reactive.calc
def notebook_nb():
# This creates the notebook, and evaluates it,
# and drops reports in the tmp dir.
# Could be slow!
# Luckily, reactive calcs are lazy.
notebook_py = NotebookGenerator(analysis_plan()).make_py()
return convert_py_to_nb(notebook_py, execute=True)

@render.download(
filename="dp-wizard-script.py",
media_type="text/x-python",
)
async def download_script():
script_py = ScriptGenerator(analysis_plan()).make_py()
yield script_py
with ui.Progress() as progress:
progress.set(message=wait_message)
yield ScriptGenerator(analysis_plan()).make_py()

@render.download(
filename="dp-wizard-notebook.ipynb",
media_type="application/x-ipynb+json",
)
async def download_notebook():
notebook_py = NotebookGenerator(analysis_plan()).make_py()
notebook_nb = convert_py_to_nb(notebook_py, execute=True)
yield notebook_nb
with ui.Progress() as progress:
progress.set(message=wait_message)
yield notebook_nb()

@render.download(
filename="dp-wizard-report.txt",
media_type="text/plain",
)
async def download_report():
with ui.Progress() as progress:
progress.set(message=wait_message)
notebook_nb() # Evaluate just for the side effect of creating report.
report_txt = (
Path(__file__).parent.parent / "tmp" / "report.txt"
).read_text()
yield report_txt

@render.download(
filename="dp-wizard-report.csv",
media_type="text/plain",
)
async def download_table():
with ui.Progress() as progress:
progress.set(message=wait_message)
notebook_nb() # Evaluate just for the side effect of creating report.
report_csv = (
Path(__file__).parent.parent / "tmp" / "report.csv"
).read_text()
yield report_csv
2 changes: 2 additions & 0 deletions dp_wizard/tmp/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
demo.csv
report.txt
report.csv
50 changes: 45 additions & 5 deletions dp_wizard/utils/code_generators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,17 @@ def __init__(self, analysis_plan: AnalysisPlan):
@abstractmethod
def _make_context(self) -> str: ... # pragma: no cover

def _make_extra_blocks(self):
return {}

def make_py(self):
return str(
Template(self.root_template).fill_blocks(
IMPORTS_BLOCK=_make_imports(),
COLUMNS_BLOCK=self._make_columns(self.columns),
COLUMNS_BLOCK=self._make_columns(),
CONTEXT_BLOCK=self._make_context(),
QUERIES_BLOCK=self._make_queries(self.columns.keys()),
QUERIES_BLOCK=self._make_queries(),
**self._make_extra_blocks(),
)
)

Expand All @@ -66,22 +70,23 @@ def _make_margins_dict(self, bin_names: Iterable[str]):
margins_dict = "{" + "".join(margins) + "\n }"
return margins_dict

def _make_columns(self, columns: dict[str, AnalysisPlanColumn]):
def _make_columns(self):
return "\n".join(
make_column_config_block(
name=name,
lower_bound=col.lower_bound,
upper_bound=col.upper_bound,
bin_count=col.bin_count,
)
for name, col in columns.items()
for name, col in self.columns.items()
)

def _make_queries(self, column_names: Iterable[str]):
def _make_queries(self):
confidence_note = (
"The actual value is within the shown range "
f"with {int(confidence * 100)}% confidence."
)
column_names = self.columns.keys()
return f"confidence = {confidence} # {confidence_note}\n\n" + "\n".join(
_make_query(column_name) for column_name in column_names
)
Expand Down Expand Up @@ -115,6 +120,41 @@ class NotebookGenerator(_CodeGenerator):
def _make_context(self):
return str(self._make_partial_context().fill_values(CSV_PATH=self.csv_path))

def _make_extra_blocks(self):
outputs_expression = (
"{"
+ ",".join(
str(
Template("report_kv")
.fill_values(
NAME=name,
CONFIDENCE=confidence,
)
.fill_expressions(
IDENTIFIER_HISTOGRAM=f"{name_to_identifier(name)}_histogram",
IDENTIFIER_ACCURACY=f"{name_to_identifier(name)}_accuracy",
)
)
for name in self.columns.keys()
)
+ "}"
)
tmp_path = Path(__file__).parent.parent.parent / "tmp"
reports_block = str(
Template("reports")
.fill_expressions(
OUTPUTS=outputs_expression,
COLUMNS={k: v._asdict() for k, v in self.columns.items()},
)
.fill_values(
CSV_PATH=self.csv_path,
EPSILON=self.epsilon,
TXT_REPORT_PATH=str(tmp_path / "report.txt"),
CSV_REPORT_PATH=str(tmp_path / "report.csv"),
)
)
return {"REPORTS_BLOCK": reports_block}


class ScriptGenerator(_CodeGenerator):
root_template = "script"
Expand Down
6 changes: 6 additions & 0 deletions dp_wizard/utils/code_generators/no-tests/_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@
# +
QUERIES_BLOCK
# -

# The code below produces a summary report.

# +
REPORTS_BLOCK
# -
5 changes: 5 additions & 0 deletions dp_wizard/utils/code_generators/no-tests/_report_kv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
NAME: {
"histogram": dict(zip(*df_to_columns(IDENTIFIER_HISTOGRAM))),
"accuracy": IDENTIFIER_ACCURACY,
"confidence": CONFIDENCE,
}
53 changes: 53 additions & 0 deletions dp_wizard/utils/code_generators/no-tests/_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from yaml import dump
from pathlib import Path
import csv


# https://stackoverflow.com/a/6027615/10727889
def flatten_dict(dictionary, parent_key=""):
"""
Walk tree to return flat dictionary.
>>> from pprint import pp
>>> pp(flatten_dict({
... "inputs": {
... "data": "fake.csv"
... },
... "outputs": {
... "a column": {
... "(0, 1]": 24,
... "(1, 2]": 42,
... }
... }
... }))
{'inputs: data': 'fake.csv',
'outputs: a column: (0, 1]': 24,
'outputs: a column: (1, 2]': 42}
"""
separator = ": "
items = []
for key, value in dictionary.items():
new_key = parent_key + separator + key if parent_key else key
if isinstance(value, dict):
items.extend(flatten_dict(value, new_key).items())
else:
items.append((new_key, value))
return dict(items)


report = {
"inputs": {
"data": CSV_PATH,
"epsilon": EPSILON,
"columns": COLUMNS,
},
"outputs": OUTPUTS,
}

print(dump(report))
Path(TXT_REPORT_PATH).write_text(dump(report))

flat_report = flatten_dict(report)
with Path(CSV_REPORT_PATH).open(mode="w", newline="") as handle:
writer = csv.writer(handle)
for kv_pair in flat_report.items():
writer.writerow(kv_pair)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"jupyter-client",
"nbconvert",
"ipykernel",
"pyyaml",
]

[project.scripts]
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ scipy<1.14
# Conversion:
jupytext
jupyter-client
pyyaml
nbconvert
ipykernel
# May also require:
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ python-slugify==8.0.4
# via pytest-playwright
pyyaml==6.0.2
# via
# -r requirements-dev.in
# jupytext
# pre-commit
pyzmq==26.2.0
Expand Down
34 changes: 31 additions & 3 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,42 @@ def expect_no_error():
expect_visible(download_results_text)
expect_no_error()

with page.expect_download() as download_info:
# Text Report:
with page.expect_download() as text_report_download_info:
page.get_by_text("Download report (.txt)").click()
expect_no_error()

report_download = text_report_download_info.value
report = report_download.path().read_text()
assert "confidence: 0.95" in report

# CSV Report:
with page.expect_download() as csv_report_download_info:
page.get_by_text("Download table (.csv)").click()
expect_no_error()

report_download = csv_report_download_info.value
report = report_download.path().read_text()
assert "outputs: grade: confidence,0.95" in report

# Script:
with page.expect_download() as script_download_info:
page.get_by_text("Download script").click()
expect_no_error()

download = download_info.value
script = download.path().read_text()
script_download = script_download_info.value
script = script_download.path().read_text()
assert "privacy_unit = dp.unit_of(contributions=42)" in script

# Notebook:
with page.expect_download() as notebook_download_info:
page.get_by_text("Download notebook").click()
expect_no_error()

notebook_download = notebook_download_info.value
notebook = notebook_download.path().read_text()
assert "privacy_unit = dp.unit_of(contributions=42)" in notebook

# -- Feedback --
page.get_by_text("Feedback").click()
iframe = page.locator("#feedback-iframe")
Expand Down

0 comments on commit 23034e9

Please sign in to comment.