From 6c45f1922cf1c65d06ea2faa6d0041cb464fb1b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Luk=C3=A1ny?= Date: Wed, 13 Sep 2023 14:54:12 +0200 Subject: [PATCH] refactor: make low verbosity subsection definition more transparent --- edvart/report_sections/bivariate_analysis.py | 31 ++++++------- edvart/report_sections/dataset_overview.py | 37 +++++++--------- .../report_sections/multivariate_analysis.py | 33 +++++++------- .../timeseries_analysis.py | 44 ++++++++----------- tests/test_timeseries_analysis.py | 5 ++- 5 files changed, 70 insertions(+), 80 deletions(-) diff --git a/edvart/report_sections/bivariate_analysis.py b/edvart/report_sections/bivariate_analysis.py index 97d40cc..c89a93c 100644 --- a/edvart/report_sections/bivariate_analysis.py +++ b/edvart/report_sections/bivariate_analysis.py @@ -79,6 +79,9 @@ class BivariateAnalysisSubsection(IntEnum): def __str__(self): return self.name + # By default use all subsections + _DEFAULT_SUBSECTIONS_TO_SHOW = list(BivariateAnalysisSubsection) + def __init__( self, subsections: Optional[List[BivariateAnalysisSubsection]] = None, @@ -99,25 +102,16 @@ def __init__( subsec = BivariateAnalysis.BivariateAnalysisSubsection # Store subsection verbosities - verbosities = { + self.subsection_verbosities = { subsec.CorrelationPlot: verbosity_correlations, subsec.PairPlot: verbosity_pairplot, subsec.ContingencyTable: verbosity_contingency_table, } - # By default use all subsections if subsections is None: - subsections_all = list(BivariateAnalysis.BivariateAnalysisSubsection) + self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW else: - subsections_all = subsections - - # Store subsections with LOW verbosity - self.subsections_low_verbosity = [ - sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW - ] - - if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None: - self.subsections_low_verbosity = None + self.subsections_to_show = subsections if (columns_x is None) != (columns_y is None): raise ValueError("Either both or neither of columns_x, columns_y must be specified.") @@ -145,7 +139,9 @@ def __init__( ), } - subsections_implementations = [enum_to_implementation[sub] for sub in subsections_all] + subsections_implementations = [ + enum_to_implementation[sub] for sub in self.subsections_to_show + ] super().__init__(subsections_implementations, verbosity, columns) self.columns_x = columns_x @@ -229,10 +225,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: cells.append(section_header) if self.verbosity == Verbosity.LOW: code = "bivariate_analysis(df=df" - if self.subsections_low_verbosity is not None: + subsections_to_show_with_low_verbo = [ + sub + for sub in self.subsections_to_show + if self.subsection_verbosities[sub] == Verbosity.LOW + ] + if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW: arg_subsections_names = [ f"BivariateAnalysis.BivariateAnalysisSubsection.{str(sub)}" - for sub in self.subsections_low_verbosity + for sub in subsections_to_show_with_low_verbo ] code += f", subsections={arg_subsections_names}".replace("'", "") diff --git a/edvart/report_sections/dataset_overview.py b/edvart/report_sections/dataset_overview.py index 7b6264f..4515f96 100644 --- a/edvart/report_sections/dataset_overview.py +++ b/edvart/report_sections/dataset_overview.py @@ -67,6 +67,9 @@ class OverviewSubsection(IntEnum): def __str__(self): return self.name + # By default use all subsections + _DEFAULT_SUBSECTIONS_TO_SHOW = list(OverviewSubsection) + def __init__( self, subsections: Optional[List[OverviewSubsection]] = None, @@ -93,7 +96,7 @@ def __init__( subsec = Overview.OverviewSubsection # Store subsection verbosities - verbosities = { + self.subsection_verbosities = { subsec.QuickInfo: verbosity_quick_info, subsec.DataTypes: verbosity_data_types, subsec.DataPreview: verbosity_data_preview, @@ -103,19 +106,10 @@ def __init__( subsec.DuplicateRows: verbosity_duplicate_rows, } - # By default use all subsections if subsections is None: - subsections_all = list(Overview.OverviewSubsection) + self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW else: - subsections_all = subsections - - # Store subsections with LOW verbosity - self.subsections_low_verbosity = [ - sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW - ] - - if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None: - self.subsections_low_verbosity = None + self.subsections_to_show = subsections # Construct objects that implement subsections enum_to_implementation = { @@ -130,13 +124,9 @@ def __init__( subsec.DuplicateRows: DuplicateRows(verbosity_duplicate_rows, columns), } - # Store subsection selection - if subsections is None: - subsections_implementations = [ - enum_to_implementation[sub] for sub in Overview.OverviewSubsection - ] - else: - subsections_implementations = [enum_to_implementation[sub] for sub in subsections] + subsections_implementations = [ + enum_to_implementation[sub] for sub in self.subsections_to_show + ] super().__init__(subsections_implementations, verbosity, columns) @property @@ -202,10 +192,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: if self.verbosity == Verbosity.LOW: code = "overview_analysis(df=df" - if self.subsections_low_verbosity is not None: + subsections_to_show_with_low_verbo = [ + sub + for sub in self.subsections_to_show + if self.subsection_verbosities[sub] == Verbosity.LOW + ] + if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW: arg_subsections_names = [ f"Overview.OverviewSubsection.{str(sub)}" - for sub in self.subsections_low_verbosity + for sub in subsections_to_show_with_low_verbo ] code += f", subsections={arg_subsections_names}".replace("'", "") if self.columns is not None: diff --git a/edvart/report_sections/multivariate_analysis.py b/edvart/report_sections/multivariate_analysis.py index e576f7b..0154d21 100644 --- a/edvart/report_sections/multivariate_analysis.py +++ b/edvart/report_sections/multivariate_analysis.py @@ -70,6 +70,9 @@ class MultivariateAnalysisSubsection(IntEnum): def __str__(self): return self.name + # By default use all subsections + _DEFAULT_SUBSECTIONS_TO_SHOW = list(MultivariateAnalysisSubsection) + def __init__( self, df: pd.DataFrame, @@ -96,27 +99,18 @@ def __init__( ) subsec = MultivariateAnalysis.MultivariateAnalysisSubsection - verbosities = { + self.subsection_verbosities = { subsec.PCA: verbosity_pca, subsec.ParallelCoordinates: verbosity_parallel_coordinates, subsec.ParallelCategories: verbosity_parallel_categories, } if UMAP_AVAILABLE: - verbosities[subsec.UMAP] = verbosity_umap + self.subsection_verbosities[subsec.UMAP] = verbosity_umap - # By default use all subsections if subsections is None: - subsections_all = list(subsec) + self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW else: - subsections_all = subsections - - # Store subsections with LOW verbosity - self.subsections_low_verbosity = [ - sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW - ] - - if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None: - self.subsections_low_verbosity = None + self.subsections_to_show = subsections enum_to_implementation = { subsec.PCA: PCA(df, verbosity_pca, columns, color_col=color_col), @@ -132,7 +126,9 @@ def __init__( df, verbosity_umap, columns, color_col=color_col ) - subsections_implementations = [enum_to_implementation[sub] for sub in subsections_all] + subsections_implementations = [ + enum_to_implementation[sub] for sub in self.subsections_to_show + ] self.color_col = color_col super().__init__(subsections_implementations, verbosity, columns) @@ -212,10 +208,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: cells.append(section_header) if self.verbosity == Verbosity.LOW: code = "multivariate_analysis(df=df" - if self.subsections_low_verbosity is not None: + subsections_to_show_with_low_verbo = [ + sub + for sub in self.subsections_to_show + if self.subsection_verbosities[sub] == Verbosity.LOW + ] + if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW: arg_subsections_names = [ f"MultivariateAnalysis.MultivariateAnalysisSubsection.{str(sub)}" - for sub in self.subsections_low_verbosity + for sub in subsections_to_show_with_low_verbo ] code += f", subsections={arg_subsections_names}".replace("'", "") if self.columns is not None: diff --git a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py index a069dfc..1ec783c 100644 --- a/edvart/report_sections/timeseries_analysis/timeseries_analysis.py +++ b/edvart/report_sections/timeseries_analysis/timeseries_analysis.py @@ -114,7 +114,7 @@ def __init__( subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection - verbosities = { + self.subsection_verbosities = { subsec.TimeSeriesLinePlot: verbosity_time_series_line_plot, subsec.RollingStatistics: verbosity_rolling_statistics, subsec.BoxplotsOverTime: verbosity_boxplots_over_time, @@ -135,7 +135,7 @@ def __init__( subsec.StationarityTests: StationarityTests(verbosity_stationarity_tests, columns), subsec.Autocorrelation: Autocorrelation(verbosity_autocorrelation, columns), } - + # Add FT and STFT only if required parameters specified if sampling_rate is not None: enum_to_implementation[subsec.FourierTransform] = FourierTransform( sampling_rate, verbosity_fourier_transform, columns @@ -156,25 +156,17 @@ def __init__( "Need to set a `sampling_rate` to plot Short-time Fourier transform." ) - # By default use all subsections, FT and STFT only if required parameters specified + self.default_subsections_to_show = list(enum_to_implementation.keys()) + if subsections is None: - subsections_all = list(enum_to_implementation.keys()) + self.subsections_to_show = self.default_subsections_to_show else: - subsections_all = subsections + self.subsections_to_show = subsections - # Store subsections with LOW verbosity - self.subsections_low_verbosity = [ - sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW + subsections_implementations = [ + enum_to_implementation[sub] for sub in self.subsections_to_show ] - if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None: - self.subsections_low_verbosity = None - - if subsections is None: - subsections_implementations = list(enum_to_implementation.values()) - else: - subsections_implementations = [enum_to_implementation[sub] for sub in subsections] - super().__init__(subsections_implementations, verbosity, columns) @property @@ -245,24 +237,24 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: if self.verbosity == Verbosity.LOW: subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection code = "timeseries_analysis(df=df" - - if self.subsections_low_verbosity is not None: + subsections_to_show_with_low_verbo = [ + sub + for sub in self.subsections_to_show + if self.subsection_verbosities[sub] == Verbosity.LOW + ] + if subsections_to_show_with_low_verbo != self.default_subsections_to_show: arg_subsections_names = [ f"TimeseriesAnalysis.TimeseriesAnalysisSubsection.{str(sub)}" - for sub in self.subsections_low_verbosity + for sub in subsections_to_show_with_low_verbo ] code += f", subsections={arg_subsections_names}".replace("'", "") - - stft_included_or_empty = ( - self.subsections_low_verbosity is None - or subsec.ShortTimeFT in self.subsections_low_verbosity - ) + stft_included = subsec.ShortTimeFT in subsections_to_show_with_low_verbo include_sampling_rate = self.sampling_rate is not None and ( - stft_included_or_empty or subsec.FourierTransform in self.subsections_low_verbosity + stft_included or subsec.FourierTransform in subsections_to_show_with_low_verbo ) if include_sampling_rate: code += f", sampling_rate={self.sampling_rate}" - if self.stft_window_size is not None and stft_included_or_empty: + if self.stft_window_size is not None and stft_included: code += f", stft_window_size={self.stft_window_size}" if self.columns is not None: diff --git a/tests/test_timeseries_analysis.py b/tests/test_timeseries_analysis.py index 2532382..b79faf0 100644 --- a/tests/test_timeseries_analysis.py +++ b/tests/test_timeseries_analysis.py @@ -345,6 +345,7 @@ def test_verbosity_low_different_subsection_verbosities(): sampling_rate=1, stft_window_size=2, verbosity_rolling_statistics=Verbosity.MEDIUM, + verbosity_fourier_transform=Verbosity.MEDIUM, verbosity_short_time_ft=Verbosity.HIGH, ) @@ -355,9 +356,9 @@ def test_verbosity_low_different_subsection_verbosities(): expected_code = [ "timeseries_analysis(df=df, " "subsections=[TimeseriesAnalysis.TimeseriesAnalysisSubsection.TimeSeriesLinePlot, " - "TimeseriesAnalysis.TimeseriesAnalysisSubsection.FourierTransform, " "TimeseriesAnalysis.TimeseriesAnalysisSubsection.StationarityTests, " - "TimeseriesAnalysis.TimeseriesAnalysisSubsection.BoxplotsOverTime], sampling_rate=1)", + "TimeseriesAnalysis.TimeseriesAnalysisSubsection.BoxplotsOverTime])", + "fourier_transform(df=df, sampling_rate=1)", "rolling_statistics(df=df)", ( get_code(timeseries_analysis.ShortTimeFT.short_time_ft)