refactor: make low verbosity subsection definition more transparent

datamole-ai · Sep 13, 2023 · 6c45f19 · 6c45f19
1 parent af97784
commit 6c45f19
Show file tree

Hide file tree

Showing 5 changed files with 70 additions and 80 deletions.
diff --git a/edvart/report_sections/bivariate_analysis.py b/edvart/report_sections/bivariate_analysis.py
@@ -79,6 +79,9 @@ class BivariateAnalysisSubsection(IntEnum):
         def __str__(self):
             return self.name
 
+    # By default use all subsections
+    _DEFAULT_SUBSECTIONS_TO_SHOW = list(BivariateAnalysisSubsection)
+
     def __init__(
         self,
         subsections: Optional[List[BivariateAnalysisSubsection]] = None,
@@ -99,25 +102,16 @@ def __init__(
         subsec = BivariateAnalysis.BivariateAnalysisSubsection
 
         # Store subsection verbosities
-        verbosities = {
+        self.subsection_verbosities = {
             subsec.CorrelationPlot: verbosity_correlations,
             subsec.PairPlot: verbosity_pairplot,
             subsec.ContingencyTable: verbosity_contingency_table,
         }
 
-        # By default use all subsections
         if subsections is None:
-            subsections_all = list(BivariateAnalysis.BivariateAnalysisSubsection)
+            self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
         else:
-            subsections_all = subsections
-
-        # Store subsections with LOW verbosity
-        self.subsections_low_verbosity = [
-            sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
-        ]
-
-        if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
-            self.subsections_low_verbosity = None
+            self.subsections_to_show = subsections
 
         if (columns_x is None) != (columns_y is None):
             raise ValueError("Either both or neither of columns_x, columns_y must be specified.")
@@ -145,7 +139,9 @@ def __init__(
             ),
         }
 
-        subsections_implementations = [enum_to_implementation[sub] for sub in subsections_all]
+        subsections_implementations = [
+            enum_to_implementation[sub] for sub in self.subsections_to_show
+        ]
         super().__init__(subsections_implementations, verbosity, columns)
 
         self.columns_x = columns_x
@@ -229,10 +225,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
         cells.append(section_header)
         if self.verbosity == Verbosity.LOW:
             code = "bivariate_analysis(df=df"
-            if self.subsections_low_verbosity is not None:
+            subsections_to_show_with_low_verbo = [
+                sub
+                for sub in self.subsections_to_show
+                if self.subsection_verbosities[sub] == Verbosity.LOW
+            ]
+            if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW:
                 arg_subsections_names = [
                     f"BivariateAnalysis.BivariateAnalysisSubsection.{str(sub)}"
-                    for sub in self.subsections_low_verbosity
+                    for sub in subsections_to_show_with_low_verbo
                 ]
 
                 code += f", subsections={arg_subsections_names}".replace("'", "")

diff --git a/edvart/report_sections/dataset_overview.py b/edvart/report_sections/dataset_overview.py
@@ -67,6 +67,9 @@ class OverviewSubsection(IntEnum):
         def __str__(self):
             return self.name
 
+    # By default use all subsections
+    _DEFAULT_SUBSECTIONS_TO_SHOW = list(OverviewSubsection)
+
     def __init__(
         self,
         subsections: Optional[List[OverviewSubsection]] = None,
@@ -93,7 +96,7 @@ def __init__(
         subsec = Overview.OverviewSubsection
 
         # Store subsection verbosities
-        verbosities = {
+        self.subsection_verbosities = {
             subsec.QuickInfo: verbosity_quick_info,
             subsec.DataTypes: verbosity_data_types,
             subsec.DataPreview: verbosity_data_preview,
@@ -103,19 +106,10 @@ def __init__(
             subsec.DuplicateRows: verbosity_duplicate_rows,
         }
 
-        # By default use all subsections
         if subsections is None:
-            subsections_all = list(Overview.OverviewSubsection)
+            self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
         else:
-            subsections_all = subsections
-
-        # Store subsections with LOW verbosity
-        self.subsections_low_verbosity = [
-            sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
-        ]
-
-        if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
-            self.subsections_low_verbosity = None
+            self.subsections_to_show = subsections
 
         # Construct objects that implement subsections
         enum_to_implementation = {
@@ -130,13 +124,9 @@ def __init__(
             subsec.DuplicateRows: DuplicateRows(verbosity_duplicate_rows, columns),
         }
 
-        # Store subsection selection
-        if subsections is None:
-            subsections_implementations = [
-                enum_to_implementation[sub] for sub in Overview.OverviewSubsection
-            ]
-        else:
-            subsections_implementations = [enum_to_implementation[sub] for sub in subsections]
+        subsections_implementations = [
+            enum_to_implementation[sub] for sub in self.subsections_to_show
+        ]
         super().__init__(subsections_implementations, verbosity, columns)
 
     @property
@@ -202,10 +192,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
 
         if self.verbosity == Verbosity.LOW:
             code = "overview_analysis(df=df"
-            if self.subsections_low_verbosity is not None:
+            subsections_to_show_with_low_verbo = [
+                sub
+                for sub in self.subsections_to_show
+                if self.subsection_verbosities[sub] == Verbosity.LOW
+            ]
+            if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW:
                 arg_subsections_names = [
                     f"Overview.OverviewSubsection.{str(sub)}"
-                    for sub in self.subsections_low_verbosity
+                    for sub in subsections_to_show_with_low_verbo
                 ]
                 code += f", subsections={arg_subsections_names}".replace("'", "")
             if self.columns is not None:

diff --git a/edvart/report_sections/multivariate_analysis.py b/edvart/report_sections/multivariate_analysis.py
@@ -70,6 +70,9 @@ class MultivariateAnalysisSubsection(IntEnum):
         def __str__(self):
             return self.name
 
+    # By default use all subsections
+    _DEFAULT_SUBSECTIONS_TO_SHOW = list(MultivariateAnalysisSubsection)
+
     def __init__(
         self,
         df: pd.DataFrame,
@@ -96,27 +99,18 @@ def __init__(
         )
 
         subsec = MultivariateAnalysis.MultivariateAnalysisSubsection
-        verbosities = {
+        self.subsection_verbosities = {
             subsec.PCA: verbosity_pca,
             subsec.ParallelCoordinates: verbosity_parallel_coordinates,
             subsec.ParallelCategories: verbosity_parallel_categories,
         }
         if UMAP_AVAILABLE:
-            verbosities[subsec.UMAP] = verbosity_umap
+            self.subsection_verbosities[subsec.UMAP] = verbosity_umap
 
-        # By default use all subsections
         if subsections is None:
-            subsections_all = list(subsec)
+            self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
         else:
-            subsections_all = subsections
-
-        # Store subsections with LOW verbosity
-        self.subsections_low_verbosity = [
-            sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
-        ]
-
-        if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
-            self.subsections_low_verbosity = None
+            self.subsections_to_show = subsections
 
         enum_to_implementation = {
             subsec.PCA: PCA(df, verbosity_pca, columns, color_col=color_col),
@@ -132,7 +126,9 @@ def __init__(
                 df, verbosity_umap, columns, color_col=color_col
             )
 
-        subsections_implementations = [enum_to_implementation[sub] for sub in subsections_all]
+        subsections_implementations = [
+            enum_to_implementation[sub] for sub in self.subsections_to_show
+        ]
 
         self.color_col = color_col
         super().__init__(subsections_implementations, verbosity, columns)
@@ -212,10 +208,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
         cells.append(section_header)
         if self.verbosity == Verbosity.LOW:
             code = "multivariate_analysis(df=df"
-            if self.subsections_low_verbosity is not None:
+            subsections_to_show_with_low_verbo = [
+                sub
+                for sub in self.subsections_to_show
+                if self.subsection_verbosities[sub] == Verbosity.LOW
+            ]
+            if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW:
                 arg_subsections_names = [
                     f"MultivariateAnalysis.MultivariateAnalysisSubsection.{str(sub)}"
-                    for sub in self.subsections_low_verbosity
+                    for sub in subsections_to_show_with_low_verbo
                 ]
                 code += f", subsections={arg_subsections_names}".replace("'", "")
             if self.columns is not None:

diff --git a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py
@@ -114,7 +114,7 @@ def __init__(
 
         subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection
 
-        verbosities = {
+        self.subsection_verbosities = {
             subsec.TimeSeriesLinePlot: verbosity_time_series_line_plot,
             subsec.RollingStatistics: verbosity_rolling_statistics,
             subsec.BoxplotsOverTime: verbosity_boxplots_over_time,
@@ -135,7 +135,7 @@ def __init__(
             subsec.StationarityTests: StationarityTests(verbosity_stationarity_tests, columns),
             subsec.Autocorrelation: Autocorrelation(verbosity_autocorrelation, columns),
         }
-
+        # Add FT and STFT only if required parameters specified
         if sampling_rate is not None:
             enum_to_implementation[subsec.FourierTransform] = FourierTransform(
                 sampling_rate, verbosity_fourier_transform, columns
@@ -156,25 +156,17 @@ def __init__(
                     "Need to set a `sampling_rate` to plot Short-time Fourier transform."
                 )
 
-        # By default use all subsections, FT and STFT only if required parameters specified
+        self.default_subsections_to_show = list(enum_to_implementation.keys())
+
         if subsections is None:
-            subsections_all = list(enum_to_implementation.keys())
+            self.subsections_to_show = self.default_subsections_to_show
         else:
-            subsections_all = subsections
+            self.subsections_to_show = subsections
 
-        # Store subsections with LOW verbosity
-        self.subsections_low_verbosity = [
-            sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
+        subsections_implementations = [
+            enum_to_implementation[sub] for sub in self.subsections_to_show
         ]
 
-        if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
-            self.subsections_low_verbosity = None
-
-        if subsections is None:
-            subsections_implementations = list(enum_to_implementation.values())
-        else:
-            subsections_implementations = [enum_to_implementation[sub] for sub in subsections]
-
         super().__init__(subsections_implementations, verbosity, columns)
 
     @property
@@ -245,24 +237,24 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
         if self.verbosity == Verbosity.LOW:
             subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection
             code = "timeseries_analysis(df=df"
-
-            if self.subsections_low_verbosity is not None:
+            subsections_to_show_with_low_verbo = [
+                sub
+                for sub in self.subsections_to_show
+                if self.subsection_verbosities[sub] == Verbosity.LOW
+            ]
+            if subsections_to_show_with_low_verbo != self.default_subsections_to_show:
                 arg_subsections_names = [
                     f"TimeseriesAnalysis.TimeseriesAnalysisSubsection.{str(sub)}"
-                    for sub in self.subsections_low_verbosity
+                    for sub in subsections_to_show_with_low_verbo
                 ]
                 code += f", subsections={arg_subsections_names}".replace("'", "")
-
-            stft_included_or_empty = (
-                self.subsections_low_verbosity is None
-                or subsec.ShortTimeFT in self.subsections_low_verbosity
-            )
+            stft_included = subsec.ShortTimeFT in subsections_to_show_with_low_verbo
             include_sampling_rate = self.sampling_rate is not None and (
-                stft_included_or_empty or subsec.FourierTransform in self.subsections_low_verbosity
+                stft_included or subsec.FourierTransform in subsections_to_show_with_low_verbo
             )
             if include_sampling_rate:
                 code += f", sampling_rate={self.sampling_rate}"
-                if self.stft_window_size is not None and stft_included_or_empty:
+                if self.stft_window_size is not None and stft_included:
                     code += f", stft_window_size={self.stft_window_size}"
 
             if self.columns is not None:

diff --git a/tests/test_timeseries_analysis.py b/tests/test_timeseries_analysis.py
@@ -345,6 +345,7 @@ def test_verbosity_low_different_subsection_verbosities():
         sampling_rate=1,
         stft_window_size=2,
         verbosity_rolling_statistics=Verbosity.MEDIUM,
+        verbosity_fourier_transform=Verbosity.MEDIUM,
         verbosity_short_time_ft=Verbosity.HIGH,
     )
 
@@ -355,9 +356,9 @@ def test_verbosity_low_different_subsection_verbosities():
     expected_code = [
         "timeseries_analysis(df=df, "
         "subsections=[TimeseriesAnalysis.TimeseriesAnalysisSubsection.TimeSeriesLinePlot, "
-        "TimeseriesAnalysis.TimeseriesAnalysisSubsection.FourierTransform, "
         "TimeseriesAnalysis.TimeseriesAnalysisSubsection.StationarityTests, "
-        "TimeseriesAnalysis.TimeseriesAnalysisSubsection.BoxplotsOverTime], sampling_rate=1)",
+        "TimeseriesAnalysis.TimeseriesAnalysisSubsection.BoxplotsOverTime])",
+        "fourier_transform(df=df, sampling_rate=1)",
         "rolling_statistics(df=df)",
         (
             get_code(timeseries_analysis.ShortTimeFT.short_time_ft)