From d664e6c576fac1e18c13e56c04ea54fad6912ab4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Bel=C3=A1k?=
 <97168298+mbelak-dtml@users.noreply.github.com>
Date: Mon, 7 Aug 2023 13:13:17 +0200
Subject: [PATCH] feat!: Refactor `Report` into `Report` and `DefaultReport`.
 (#47)

Resolves #45
---
 api-example.md           |   4 +-
 docs/advanced.rst        |  24 ++--
 docs/getting_started.rst |   6 +-
 edvart/__init__.py       |   1 +
 edvart/report.py         | 254 ++++++++++++++++++++++-----------------
 tests/test_report.py     |  54 +++++++--
 6 files changed, 207 insertions(+), 136 deletions(-)
diff --git a/api-example.md b/api-example.md
index 6948491..e904ffc 100644
--- a/api-example.md
+++ b/api-example.md
@@ -35,7 +35,7 @@ dataset.head()
 ```
 
 ```python
-report = edvart.Report(
+report = edvart.DefaultReport(
     dataset,
     verbosity=0,
     columns_overview=['Name', 'Survived'],
@@ -69,7 +69,7 @@ dataset_ts = edvart.example_datasets.dataset_global_temp()
 ```
 
 ```python
-report_ts = edvart.TimeseriesReport(
+report_ts = edvart.DefaultTimeseriesReport(
     dataset_ts,
     # Monthly data -> analyze yearly seasonality
     sampling_rate=12,
diff --git a/docs/advanced.rst b/docs/advanced.rst
index 90aa44f..1a351bc 100644
--- a/docs/advanced.rst
+++ b/docs/advanced.rst
@@ -10,7 +10,10 @@ Report class
 ------------
 
 The most important class of the package :py:class:`~edvart.report.Report`.
-This class allows you to specify :ref:`verbosity <verbosity>` or specify which columns should be used.
+The report consists of sections, which can be added via methods of the `Report` class.
+The report is empty by default.
+The class :py:class:`~edvart.report.DefaultReport` is a subclass of `Report`,
+which contains a default set of sections.
 
 With created instance of `Report` you can:
 
@@ -114,17 +117,18 @@ or modifying sections settings.
 
 Selection of sections
 ~~~~~~~~~~~~~~~~~~~~~
-
-If you want to use only a subset of sections you have to set
-`use_default_sections` parameter of report to `False` and then you can add your own sections.
+You can add sections using methods `add_*` of the `Report` class.
 
 .. code-block:: python
 
-    # Shows only univariate analysis
+    # Shows only univariate and bivariate analysis
     import edvart
     df = edvart.example_datasets.dataset_titanic()
-    report = edvart.Report(df, use_default_sections=False)
-    report.add_univariate_analysis()
+    report = (
+      edvart.Report(df)
+        .add_univariate_analysis()
+        .add_bivariate_analysis()
+    )
 
 
 Sections configuration
@@ -141,7 +145,7 @@ Or you can set section verbosity (described later).
   import edvart
 
   df = edvart.example_datasets.dataset_titanic()
-  report = edvart.Report(df, columns_overview=["Name", "Survived"], use_default_sections=False)
+  report = edvart.Report(df)
 
   report.add_overview(omit_columns=["PassengerId"]).add_univariate_analysis(
     use_columns=["Name", "Sex", "Age"]
@@ -176,7 +180,7 @@ Examples:
     import edvart
 
     df = edvart.example_datasets.dataset_titanic()
-    edvart.Report(df, verbosity=1).export_notebook("test-export.ipynb")
+    edvart.DefaultReport(df, verbosity=1).export_notebook("test-export.ipynb")
 
 
 .. code-block:: python
@@ -185,4 +189,4 @@ Examples:
     import edvart
 
     df = edvart.example_datasets.dataset_titanic()
-    edvart.Report(df, verbosity=1, verbosity_univariate_analysis=2).export_notebook("test-export.ipynb")
+    edvart.DefaultReport(df, verbosity=1, verbosity_univariate_analysis=2).export_notebook("test-export.ipynb")
diff --git a/docs/getting_started.rst b/docs/getting_started.rst
index 7d2040c..352fa81 100644
--- a/docs/getting_started.rst
+++ b/docs/getting_started.rst
@@ -7,7 +7,7 @@ Getting started
 
     import edvart
     df = edvart.example_datasets.dataset_titanic()
-    edvart.Report(df).show()
+    edvart.DefaultReport(df).show()
 
 2. Generate report notebook
 
@@ -15,7 +15,7 @@ Getting started
 
     import edvart
     df = edvart.example_datasets.dataset_titanic()
-    report = edvart.Report(df)
+    report = edvart.DefaultReport(df)
     report.export_notebook("titanic_report.ipynb")
 
 You can modify the generated notebook if you want to modify some settings.
@@ -28,7 +28,7 @@ For more advanced usage of edvart, please read the documentation section
 
     import edvart
     df = edvart.example_datasets.dataset_titanic()
-    report = edvart.Report(df)
+    report = edvart.DefaultReport(df)
     report.export_html(
         html_filepath="titanic_report.html",
         dataset_name="Titanic",
diff --git a/edvart/__init__.py b/edvart/__init__.py
index 572e2f6..b0123f7 100644
--- a/edvart/__init__.py
+++ b/edvart/__init__.py
@@ -4,6 +4,7 @@
 from importlib.metadata import PackageNotFoundError, version
 
 from edvart import example_datasets
+from edvart.report import DefaultReport, DefaultTimeseriesReport
 from edvart.report import Report
 from edvart.report import Report as create_report
 from edvart.report import TimeseriesReport
diff --git a/edvart/report.py b/edvart/report.py
index e46ca5f..dd86952 100755
--- a/edvart/report.py
+++ b/edvart/report.py
@@ -34,8 +34,6 @@ class ReportBase(ABC):
     ----------
     dataframe : pd.DataFrame
         Data from which to generate the report.
-    use_default_sections : bool (default = True)
-        Whether add the report's default sections to the report.
     verbosity : int (default = 0)
         The default verbosity for the exported code of the entire report, has to be one of
         [0, 1, 2], by default 0.
@@ -49,12 +47,10 @@ class ReportBase(ABC):
     def __init__(
         self,
         dataframe: pd.DataFrame,
-        use_default_sections: bool = True,
         verbosity: int = 0,
     ):
         self._class_logger = logging.getLogger(__name__).getChild(self.__class__.__name__)
         self.df = dataframe
-        self.use_default_sections = use_default_sections
         self.sections = []
         # Check for global verbosity validity
         if verbosity not in [0, 1, 2]:
@@ -653,21 +649,39 @@ def add_table_of_contents(self, include_subsections: bool = True) -> "ReportBase
 
 
 class Report(ReportBase):
-    """This class instantiates an object that the edvart user should mainly use for report
-    configuration and export.
+    """
+    A report for tabular datasets. Contains no sections by default.
+
+    See `DefaultReport` for a report with default sections.
+    See methods `add_*` for adding sections to the report.
+
+    Parameters
+    ----------
+    dataframe : pd.DataFrame
+        Data from which to generate the report.
+    verbosity : int (default = 0)
+        Verbosity of the exported code of the entire report, has to be one of
+        [0, 1, 2], by default 0.
+    """
+
+    def __init__(self, dataframe: pd.DataFrame, verbosity: int = 0):
+        super().__init__(dataframe=dataframe, verbosity=verbosity)
+
 
-    This class is intended for creating general-purpose reports.
-    For creating a report for time-series data, please see `TimeseriesReport`.
+class DefaultReport(Report):
+    """A report for tabular data containing default sections.
+
+    The report contains the following sections:
+    - dataset overview
+    - univariate analysis
+    - bivariate analysis
+    - multivariate analysis
+    - group analysis (if `groupby` is specified)
 
     Parameters
     ----------
     dataframe : pd.DataFrame
         Data from which to generate the report.
-    use_default_sections : bool (default = True)
-        If True, all default sections of the report are added, otherwise you have to add
-        the sections manually using add_<section name>() methods.
-        Default sections for this report are: overview, univariate analysis, bivariate analysis and
-        multivariate analysis.
     verbosity : int (default = 0)
         The default verbosity for the exported code of the entire report, has to be one of
         [0, 1, 2], by default 0.
@@ -701,7 +715,6 @@ class Report(ReportBase):
     def __init__(
         self,
         dataframe: pd.DataFrame,
-        use_default_sections: bool = True,
         verbosity: int = 0,
         verbosity_overview: Optional[int] = None,
         verbosity_univariate_analysis: Optional[int] = None,
@@ -715,7 +728,7 @@ def __init__(
         columns_group_analysis: Optional[List[str]] = None,
         groupby: Union[str, List[str]] = None,
     ):
-        super().__init__(dataframe, use_default_sections, verbosity)
+        super().__init__(dataframe, verbosity)
 
         # If section verbosities are not set, default to the global verbosity
         if verbosity_overview is None:
@@ -726,90 +739,53 @@ def __init__(
             verbosity_bivariate_analysis = verbosity
         if verbosity_multivariate_analysis is None:
             verbosity_multivariate_analysis = verbosity
-        # Add default sections if user doesn't build the report manually
-        if use_default_sections:
-            self.add_table_of_contents(include_subsections=True)
-            self.add_overview(verbosity=verbosity_overview, use_columns=columns_overview)
-            self.add_univariate_analysis(
-                verbosity=verbosity_univariate_analysis,
-                use_columns=columns_univariate_analysis,
-            )
-            if isinstance(groupby, str):
-                color_col = groupby
-            elif hasattr(groupby, "__len__") and len(groupby) == 1:
-                color_col = groupby[0]
-            else:
-                color_col = None
-            self.add_bivariate_analysis(
-                verbosity=verbosity_bivariate_analysis,
-                use_columns=columns_bivariate_analysis,
-                color_col=color_col,
-            )
-            self.add_multivariate_analysis(
-                verbosity=verbosity_multivariate_analysis,
-                use_columns=columns_multivariate_analysis,
-                color_col=color_col,
+
+        # Add default sections
+        self.add_table_of_contents(include_subsections=True)
+        self.add_overview(verbosity=verbosity_overview, use_columns=columns_overview)
+        self.add_univariate_analysis(
+            verbosity=verbosity_univariate_analysis,
+            use_columns=columns_univariate_analysis,
+        )
+        if isinstance(groupby, str):
+            color_col = groupby
+        elif hasattr(groupby, "__len__") and len(groupby) == 1:
+            color_col = groupby[0]
+        else:
+            color_col = None
+        self.add_bivariate_analysis(
+            verbosity=verbosity_bivariate_analysis,
+            use_columns=columns_bivariate_analysis,
+            color_col=color_col,
+        )
+        self.add_multivariate_analysis(
+            verbosity=verbosity_multivariate_analysis,
+            use_columns=columns_multivariate_analysis,
+            color_col=color_col,
+        )
+        if groupby is not None:
+            self.add_group_analysis(
+                groupby=groupby,
+                use_columns=columns_group_analysis,
+                verbosity=verbosity_group_analysis,
             )
-            if groupby is not None:
-                self.add_group_analysis(
-                    groupby=groupby,
-                    use_columns=columns_group_analysis,
-                    verbosity=verbosity_group_analysis,
-                )
 
 
 class TimeseriesReport(ReportBase):
-    """This class instantiates an object that the edvart user should mainly use for report
-    configuration and export, specifically for timeseries data.
+    """
+    A report for time-series data. Contains no sections by default.
 
-    Parameters
-    ----------
-    dataframe : pd.DataFrame
-        Data from which to generate the report. Data needs to be indexed by time: pd.DateTimeIndex
-        or pd.PeriodIndex.
-        The data is assumed to be sorted according to the time index in ascending order.
-    use_default_sections : bool, optional
-        If True, all default sections of the report are added, otherwise you have to add
-        the sections manually using add_<section name>() methods.
-        Default sections for this report are overview, univariate analysis and timeseries analysis.
-    verbosity : int (default = 0)
-        The default verbosity for the exported code of the entire report, has to be one of
-        [0, 1, 2], by default 0.
-    verbosity_overview : int, optional
-        Verbosity of the overview section
-    verbosity_univariate_analysis : int, optional
-        Verbosity of the univariate analysis section
-    verbosity_timeseries_analysis : int, optional
-        Verbosity of the timeseries analysis section
-    columns_overview : List[str], optional
-        Subset of columns to use in overview section
-    columns_univariate_analysis : List[str], optional
-        Subset of columns to use in univariate analysis section
-    columns_timeseries_analysis : List[str], optional
-        Subset of columns to use in timeseries analysis section
-    sampling_rate : int, optional
-        Sampling rate for Fourier transform and Short-time Fourier transform subsections. Determines
-        frequency unit for analysis of frequencies, for example with monthly data and sampling rate
-        12, yearly frequncy spectrum is produced.
-        If not set, these two sections will not be included.
-    stft_window_size : int, optional
-        Windows size for short-time Fourier transform subsection. If not set, STFT will be exluded.
+    See `DefaultTimeseriesReport` for a time-series report with default sections.
+    See methods `add_*` for adding sections to the report.
+
+    Raises
+    ------
+    ValueError
+        If the input dataframe is not indexed by time.
     """
 
-    def __init__(
-        self,
-        dataframe: pd.DataFrame,
-        use_default_sections: bool = True,
-        verbosity: int = 0,
-        verbosity_overview: Optional[int] = None,
-        verbosity_univariate_analysis: Optional[int] = None,
-        verbosity_timeseries_analysis: Optional[int] = None,
-        columns_overview: Optional[List[str]] = None,
-        columns_univariate_analysis: Optional[List[str]] = None,
-        columns_timeseries_analysis: Optional[List[str]] = None,
-        sampling_rate: Optional[int] = None,
-        stft_window_size: Optional[int] = None,
-    ):
+    def __init__(self, dataframe: pd.DataFrame, verbosity: int = 0):
+        super().__init__(dataframe, verbosity)
         if not is_date(dataframe.index):
             raise ValueError(
                 "Input dataframe needs to be indexed by time."
@@ -821,27 +797,6 @@ def __init__(
             dataframe.index = pd.PeriodIndex(dataframe.index)
         else:
             dataframe.index = pd.DatetimeIndex(dataframe.index)
-        super().__init__(dataframe, use_default_sections, verbosity)
-
-        if verbosity_overview is None:
-            verbosity_overview = verbosity
-        if verbosity_univariate_analysis is None:
-            verbosity_univariate_analysis = verbosity
-        if verbosity_timeseries_analysis is None:
-            verbosity_timeseries_analysis = verbosity
-        if use_default_sections:
-            self.add_table_of_contents(include_subsections=True)
-            self.add_overview(verbosity=verbosity_overview, use_columns=columns_overview)
-            self.add_univariate_analysis(
-                verbosity=verbosity_univariate_analysis,
-                use_columns=columns_univariate_analysis,
-            )
-            self.add_timeseries_analysis(
-                verbosity=verbosity_timeseries_analysis,
-                use_columns=columns_timeseries_analysis,
-                sampling_rate=sampling_rate,
-                stft_window_size=stft_window_size,
-            )
 
     def add_timeseries_analysis(
         self,
@@ -928,3 +883,76 @@ def add_timeseries_analysis(
         )
 
         return self
+
+
+class DefaultTimeseriesReport(TimeseriesReport):
+    """A default report for time series data.
+
+    The report contains the following sections:
+    - dataset overview
+    - univariate analysis
+    - timeseries analysis
+
+    Parameters
+    ----------
+    dataframe : pd.DataFrame
+        Data from which to generate the report. Data needs to be indexed by time: pd.DateTimeIndex
+        or pd.PeriodIndex.
+        The data is assumed to be sorted according to the time index in ascending order.
+    verbosity : int (default = 0)
+        The default verbosity for the exported code of the entire report, has to be one of
+        [0, 1, 2], by default 0.
+    verbosity_overview : int, optional
+        Verbosity of the overview section
+    verbosity_univariate_analysis : int, optional
+        Verbosity of the univariate analysis section
+    verbosity_timeseries_analysis : int, optional
+        Verbosity of the timeseries analysis section
+    columns_overview : List[str], optional
+        Subset of columns to use in overview section
+    columns_univariate_analysis : List[str], optional
+        Subset of columns to use in univariate analysis section
+    columns_timeseries_analysis : List[str], optional
+        Subset of columns to use in timeseries analysis section
+    sampling_rate : int, optional
+        Sampling rate for Fourier transform and Short-time Fourier transform subsections. Determines
+        frequency unit for analysis of frequencies, for example with monthly data and sampling rate
+        12, yearly frequncy spectrum is produced.
+        If not set, these two sections will not be included.
+    stft_window_size : int, optional
+        Windows size for short-time Fourier transform subsection. If not set, STFT will be exluded.
+    """
+
+    def __init__(
+        self,
+        dataframe: pd.DataFrame,
+        verbosity: int = 0,
+        verbosity_overview: Optional[int] = None,
+        verbosity_univariate_analysis: Optional[int] = None,
+        verbosity_timeseries_analysis: Optional[int] = None,
+        columns_overview: Optional[List[str]] = None,
+        columns_univariate_analysis: Optional[List[str]] = None,
+        columns_timeseries_analysis: Optional[List[str]] = None,
+        sampling_rate: Optional[int] = None,
+        stft_window_size: Optional[int] = None,
+    ):
+        super().__init__(dataframe, verbosity)
+
+        if verbosity_overview is None:
+            verbosity_overview = verbosity
+        if verbosity_univariate_analysis is None:
+            verbosity_univariate_analysis = verbosity
+        if verbosity_timeseries_analysis is None:
+            verbosity_timeseries_analysis = verbosity
+        self.add_table_of_contents(include_subsections=True)
+        self.add_overview(verbosity=verbosity_overview, use_columns=columns_overview)
+        self.add_univariate_analysis(
+            verbosity=verbosity_univariate_analysis,
+            use_columns=columns_univariate_analysis,
+        )
+        self.add_timeseries_analysis(
+            verbosity=verbosity_timeseries_analysis,
+            use_columns=columns_timeseries_analysis,
+            sampling_rate=sampling_rate,
+            stft_window_size=stft_window_size,
+        )
diff --git a/tests/test_report.py b/tests/test_report.py
index 5439b38..bcf4da7 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -4,14 +4,54 @@
 import numpy as np
 import pandas as pd
 
-from edvart import Report
+from edvart.report import DefaultReport, Report
 
 
-def test_column_selection():
-    test_df = pd.DataFrame(
+def _get_test_df() -> pd.DataFrame:
+    return pd.DataFrame(
         data=np.random.random_sample((50, 20)), columns=[f"Col{i}" for i in range(20)]
     )
-    report = Report(dataframe=test_df, use_default_sections=False)
+
+
+def test_report():
+    report = Report(dataframe=_get_test_df())
+    assert len(report.sections) == 0, "Report should be empty"
+
+    report.add_overview(verbosity=1)
+    assert len(report.sections) == 1, "Report should have one section"
+
+    report.add_bivariate_analysis(verbosity=2, use_columns=["Col1", "Col2", "Col3"])
+    assert len(report.sections) == 2, "Report should have two sections"
+
+    assert report.sections[0].name == "Overview", "Wrong section name"
+    assert report.sections[0].verbosity == 1, "Wrong section verbosity"
+    assert report.sections[0].columns is None, "Default column selection should be None"
+
+    assert report.sections[1].columns == ["Col1", "Col2", "Col3"], "Wrong columns"
+
+
+def test_default_report():
+    report = DefaultReport(
+        dataframe=_get_test_df(),
+        verbosity_overview=1,
+        verbosity_univariate_analysis=2,
+        columns_bivariate_analysis=["Col1", "Col2", "Col3"],
+    )
+    assert len(report.sections) > 0, "Default report should not be empty"
+
+    assert report.sections[1].verbosity == 1, "Wrong section verbosity"
+    assert report.sections[1].columns is None, "Default column selection should be None"
+
+    assert report.sections[2].verbosity == 2, "Wrong section verbosity"
+    assert report.sections[2].columns is None, "Default column selection should be None"
+
+    assert report.sections[3].verbosity == 0, "Wrong section verbosity"
+    assert report.sections[3].columns == ["Col1", "Col2", "Col3"], "Wrong columns"
+
+
+def test_column_selection():
+    test_df = _get_test_df()
+    report = Report(dataframe=test_df)
 
     # Default column selection
     report.add_overview()
@@ -34,10 +74,8 @@ def test_column_selection():
 
 
 def test_show():
-    test_df = pd.DataFrame(
-        data=np.random.random_sample((50, 20)), columns=[f"Col{i}" for i in range(20)]
-    )
-    report = Report(dataframe=test_df, use_default_sections=False)
+    test_df = _get_test_df()
+    report = Report(dataframe=test_df)
 
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", UserWarning)