Skip to content

Commit

Permalink
feat: add option to embed data when exporting report to notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
mbelak-dtml committed Feb 29, 2024
1 parent f7b00be commit a3b22f5
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 38,371 deletions.
62 changes: 58 additions & 4 deletions edvart/report.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import base64
import logging
import os
import pickle
import warnings
from abc import ABC
from copy import copy
from enum import Enum, auto
from typing import List, Optional, Tuple, Union

import isort
Expand Down Expand Up @@ -35,6 +37,14 @@ class EmptyReportWarning(UserWarning):
"""Warning raised when a report contains no sections."""


class ExportDataMode(str, Enum):
"""Data export mode for the report."""

NONE = auto()
FILE = auto()
EMBED = auto()


class ReportBase(ABC):
"""
Abstract base class for reports.
Expand All @@ -55,6 +65,8 @@ class ReportBase(ABC):
"import plotly.io as pio",
}

_DEFAULT_LOAD_DATA_CODE = "df = ... # TODO: Fill in code for loading data"

def __init__(
self,
dataframe: pd.DataFrame,
Expand Down Expand Up @@ -84,27 +96,69 @@ def show(self) -> None:
for section in self.sections:
section.show(self.df)

def _export_data(
self, export_data_mode: ExportDataMode, notebook_file_path: Union[str, os.PathLike]
) -> Tuple[str, List[str]]:
"""
Generates code for loading exported data into the exported notebook.
Parameters
----------
export_data_mode : ExportDataMode
The mode of exporting the data.
notebook_file_path : str or PathLike
Filepath of the exported notebook.
-------
Tuple[str, List[str]]
A tuple containing the code for loading the data and a list of imports required for
the code.
"""
if export_data_mode == ExportDataMode.NONE:
return self._DEFAULT_LOAD_DATA_CODE, []
if export_data_mode == ExportDataMode.FILE:
parquet_file_name = str(notebook_file_path).rstrip(".ipynb") + "-data.parquet"
self.df.to_parquet(parquet_file_name)
return f"df = pd.read_parquet('{parquet_file_name}')", ["import pandas as pd"]
assert export_data_mode == ExportDataMode.EMBED
buffer = base64.b85encode(self.df.to_parquet())
return (
code_dedent(
f"""
df_parquet = BytesIO(base64.b85decode({buffer}.decode()))
df = pd.read_parquet(df_parquet)"""
),
["import base64", "import pandas as pd", "from io import BytesIO"],
)

def export_notebook(
self,
notebook_filepath: str,
notebook_filepath: Union[str, os.PathLike],
dataset_name: str = "[INSERT DATASET NAME]",
dataset_description: str = "[INSERT DATASET DESCRIPTION]",
export_data_mode: ExportDataMode = ExportDataMode.EMBED,
) -> None:
"""Exports the report as an .ipynb file.
Parameters
----------
notebook_filepath : str
notebook_filepath : str or PathLike
Filepath of the exported notebook.
dataset_name : str (default = "[INSERT DATASET NAME]")
Name of dataset to be used in the title of the report.
dataset_description : str (default = "[INSERT DATASET DESCRIPTION]")
Description of dataset to be used below the title of the report.
"""
load_data_code, load_data_imports = self._export_data(
export_data_mode, notebook_file_path=notebook_filepath
)
# Generate a notebook containing dataset name and description
self._warn_if_empty()
nb = self._generate_notebook(
dataset_name=dataset_name, dataset_description=dataset_description
dataset_name=dataset_name,
dataset_description=dataset_description,
load_df=load_data_code,
extra_imports=load_data_imports,
)

# Save notebook to file
Expand All @@ -113,9 +167,9 @@ def export_notebook(

def _generate_notebook(
self,
load_df: str,
dataset_name: str = "[INSERT DATASET NAME]",
dataset_description: str = "[INSERT DATASET DESCRIPTION]",
load_df: str = "df = ...",
extra_imports: Optional[List[str]] = None,
show_load_data: bool = True,
) -> nbf.NotebookNode:
Expand Down
38,378 changes: 12 additions & 38,366 deletions examples/report-example.ipynb

Large diffs are not rendered by default.

19 changes: 18 additions & 1 deletion tests/test_report.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import pathlib
import warnings
from contextlib import redirect_stdout

import nbconvert
import nbformat
import numpy as np
import pandas as pd

from edvart.report import DefaultReport, Report
from edvart.report import DefaultReport, ExportDataMode, Report
from edvart.report_sections.bivariate_analysis import BivariateAnalysis
from edvart.report_sections.section_base import Verbosity
from edvart.report_sections.univariate_analysis import UnivariateAnalysis
Expand Down Expand Up @@ -90,3 +93,17 @@ def test_show():
warnings.simplefilter("ignore", UserWarning)
with redirect_stdout(None):
report.show()


def test_exported_notebook_executes(tmp_path: pathlib.Path):
report = Report(dataframe=_get_test_df())

report.add_overview()
for export_data_mode in (ExportDataMode.EMBED, ExportDataMode.FILE):
export_path = tmp_path / "export_{export_data_mode}.ipynb"
report.export_notebook(export_path, export_data_mode=export_data_mode)

notebook = nbformat.read(export_path, as_version=4)
preprocessor = nbconvert.preprocessors.ExecutePreprocessor(timeout=60)

preprocessor.preprocess(notebook)

0 comments on commit a3b22f5

Please sign in to comment.