Skip to content

Commit

Permalink
refactor: make low verbosity subsection definition more transparent
Browse files Browse the repository at this point in the history
  • Loading branch information
lukany committed Sep 13, 2023
1 parent af97784 commit 6c45f19
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 80 deletions.
31 changes: 16 additions & 15 deletions edvart/report_sections/bivariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class BivariateAnalysisSubsection(IntEnum):
def __str__(self):
return self.name

# By default use all subsections
_DEFAULT_SUBSECTIONS_TO_SHOW = list(BivariateAnalysisSubsection)

def __init__(
self,
subsections: Optional[List[BivariateAnalysisSubsection]] = None,
Expand All @@ -99,25 +102,16 @@ def __init__(
subsec = BivariateAnalysis.BivariateAnalysisSubsection

# Store subsection verbosities
verbosities = {
self.subsection_verbosities = {
subsec.CorrelationPlot: verbosity_correlations,
subsec.PairPlot: verbosity_pairplot,
subsec.ContingencyTable: verbosity_contingency_table,
}

# By default use all subsections
if subsections is None:
subsections_all = list(BivariateAnalysis.BivariateAnalysisSubsection)
self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
else:
subsections_all = subsections

# Store subsections with LOW verbosity
self.subsections_low_verbosity = [
sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
]

if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
self.subsections_low_verbosity = None
self.subsections_to_show = subsections

if (columns_x is None) != (columns_y is None):
raise ValueError("Either both or neither of columns_x, columns_y must be specified.")
Expand Down Expand Up @@ -145,7 +139,9 @@ def __init__(
),
}

subsections_implementations = [enum_to_implementation[sub] for sub in subsections_all]
subsections_implementations = [
enum_to_implementation[sub] for sub in self.subsections_to_show
]
super().__init__(subsections_implementations, verbosity, columns)

self.columns_x = columns_x
Expand Down Expand Up @@ -229,10 +225,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
cells.append(section_header)
if self.verbosity == Verbosity.LOW:
code = "bivariate_analysis(df=df"
if self.subsections_low_verbosity is not None:
subsections_to_show_with_low_verbo = [
sub
for sub in self.subsections_to_show
if self.subsection_verbosities[sub] == Verbosity.LOW
]
if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW:
arg_subsections_names = [
f"BivariateAnalysis.BivariateAnalysisSubsection.{str(sub)}"
for sub in self.subsections_low_verbosity
for sub in subsections_to_show_with_low_verbo
]

code += f", subsections={arg_subsections_names}".replace("'", "")
Expand Down
37 changes: 16 additions & 21 deletions edvart/report_sections/dataset_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class OverviewSubsection(IntEnum):
def __str__(self):
return self.name

# By default use all subsections
_DEFAULT_SUBSECTIONS_TO_SHOW = list(OverviewSubsection)

def __init__(
self,
subsections: Optional[List[OverviewSubsection]] = None,
Expand All @@ -93,7 +96,7 @@ def __init__(
subsec = Overview.OverviewSubsection

# Store subsection verbosities
verbosities = {
self.subsection_verbosities = {
subsec.QuickInfo: verbosity_quick_info,
subsec.DataTypes: verbosity_data_types,
subsec.DataPreview: verbosity_data_preview,
Expand All @@ -103,19 +106,10 @@ def __init__(
subsec.DuplicateRows: verbosity_duplicate_rows,
}

# By default use all subsections
if subsections is None:
subsections_all = list(Overview.OverviewSubsection)
self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
else:
subsections_all = subsections

# Store subsections with LOW verbosity
self.subsections_low_verbosity = [
sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
]

if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
self.subsections_low_verbosity = None
self.subsections_to_show = subsections

# Construct objects that implement subsections
enum_to_implementation = {
Expand All @@ -130,13 +124,9 @@ def __init__(
subsec.DuplicateRows: DuplicateRows(verbosity_duplicate_rows, columns),
}

# Store subsection selection
if subsections is None:
subsections_implementations = [
enum_to_implementation[sub] for sub in Overview.OverviewSubsection
]
else:
subsections_implementations = [enum_to_implementation[sub] for sub in subsections]
subsections_implementations = [
enum_to_implementation[sub] for sub in self.subsections_to_show
]
super().__init__(subsections_implementations, verbosity, columns)

@property
Expand Down Expand Up @@ -202,10 +192,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:

if self.verbosity == Verbosity.LOW:
code = "overview_analysis(df=df"
if self.subsections_low_verbosity is not None:
subsections_to_show_with_low_verbo = [
sub
for sub in self.subsections_to_show
if self.subsection_verbosities[sub] == Verbosity.LOW
]
if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW:
arg_subsections_names = [
f"Overview.OverviewSubsection.{str(sub)}"
for sub in self.subsections_low_verbosity
for sub in subsections_to_show_with_low_verbo
]
code += f", subsections={arg_subsections_names}".replace("'", "")
if self.columns is not None:
Expand Down
33 changes: 17 additions & 16 deletions edvart/report_sections/multivariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class MultivariateAnalysisSubsection(IntEnum):
def __str__(self):
return self.name

# By default use all subsections
_DEFAULT_SUBSECTIONS_TO_SHOW = list(MultivariateAnalysisSubsection)

def __init__(
self,
df: pd.DataFrame,
Expand All @@ -96,27 +99,18 @@ def __init__(
)

subsec = MultivariateAnalysis.MultivariateAnalysisSubsection
verbosities = {
self.subsection_verbosities = {
subsec.PCA: verbosity_pca,
subsec.ParallelCoordinates: verbosity_parallel_coordinates,
subsec.ParallelCategories: verbosity_parallel_categories,
}
if UMAP_AVAILABLE:
verbosities[subsec.UMAP] = verbosity_umap
self.subsection_verbosities[subsec.UMAP] = verbosity_umap

# By default use all subsections
if subsections is None:
subsections_all = list(subsec)
self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
else:
subsections_all = subsections

# Store subsections with LOW verbosity
self.subsections_low_verbosity = [
sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
]

if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
self.subsections_low_verbosity = None
self.subsections_to_show = subsections

enum_to_implementation = {
subsec.PCA: PCA(df, verbosity_pca, columns, color_col=color_col),
Expand All @@ -132,7 +126,9 @@ def __init__(
df, verbosity_umap, columns, color_col=color_col
)

subsections_implementations = [enum_to_implementation[sub] for sub in subsections_all]
subsections_implementations = [
enum_to_implementation[sub] for sub in self.subsections_to_show
]

self.color_col = color_col
super().__init__(subsections_implementations, verbosity, columns)
Expand Down Expand Up @@ -212,10 +208,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
cells.append(section_header)
if self.verbosity == Verbosity.LOW:
code = "multivariate_analysis(df=df"
if self.subsections_low_verbosity is not None:
subsections_to_show_with_low_verbo = [
sub
for sub in self.subsections_to_show
if self.subsection_verbosities[sub] == Verbosity.LOW
]
if subsections_to_show_with_low_verbo != self._DEFAULT_SUBSECTIONS_TO_SHOW:
arg_subsections_names = [
f"MultivariateAnalysis.MultivariateAnalysisSubsection.{str(sub)}"
for sub in self.subsections_low_verbosity
for sub in subsections_to_show_with_low_verbo
]
code += f", subsections={arg_subsections_names}".replace("'", "")
if self.columns is not None:
Expand Down
44 changes: 18 additions & 26 deletions edvart/report_sections/timeseries_analysis/timeseries_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def __init__(

subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection

verbosities = {
self.subsection_verbosities = {
subsec.TimeSeriesLinePlot: verbosity_time_series_line_plot,
subsec.RollingStatistics: verbosity_rolling_statistics,
subsec.BoxplotsOverTime: verbosity_boxplots_over_time,
Expand All @@ -135,7 +135,7 @@ def __init__(
subsec.StationarityTests: StationarityTests(verbosity_stationarity_tests, columns),
subsec.Autocorrelation: Autocorrelation(verbosity_autocorrelation, columns),
}

# Add FT and STFT only if required parameters specified
if sampling_rate is not None:
enum_to_implementation[subsec.FourierTransform] = FourierTransform(
sampling_rate, verbosity_fourier_transform, columns
Expand All @@ -156,25 +156,17 @@ def __init__(
"Need to set a `sampling_rate` to plot Short-time Fourier transform."
)

# By default use all subsections, FT and STFT only if required parameters specified
self.default_subsections_to_show = list(enum_to_implementation.keys())

if subsections is None:
subsections_all = list(enum_to_implementation.keys())
self.subsections_to_show = self.default_subsections_to_show
else:
subsections_all = subsections
self.subsections_to_show = subsections

# Store subsections with LOW verbosity
self.subsections_low_verbosity = [
sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
subsections_implementations = [
enum_to_implementation[sub] for sub in self.subsections_to_show
]

if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
self.subsections_low_verbosity = None

if subsections is None:
subsections_implementations = list(enum_to_implementation.values())
else:
subsections_implementations = [enum_to_implementation[sub] for sub in subsections]

super().__init__(subsections_implementations, verbosity, columns)

@property
Expand Down Expand Up @@ -245,24 +237,24 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
if self.verbosity == Verbosity.LOW:
subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection
code = "timeseries_analysis(df=df"

if self.subsections_low_verbosity is not None:
subsections_to_show_with_low_verbo = [
sub
for sub in self.subsections_to_show
if self.subsection_verbosities[sub] == Verbosity.LOW
]
if subsections_to_show_with_low_verbo != self.default_subsections_to_show:
arg_subsections_names = [
f"TimeseriesAnalysis.TimeseriesAnalysisSubsection.{str(sub)}"
for sub in self.subsections_low_verbosity
for sub in subsections_to_show_with_low_verbo
]
code += f", subsections={arg_subsections_names}".replace("'", "")

stft_included_or_empty = (
self.subsections_low_verbosity is None
or subsec.ShortTimeFT in self.subsections_low_verbosity
)
stft_included = subsec.ShortTimeFT in subsections_to_show_with_low_verbo
include_sampling_rate = self.sampling_rate is not None and (
stft_included_or_empty or subsec.FourierTransform in self.subsections_low_verbosity
stft_included or subsec.FourierTransform in subsections_to_show_with_low_verbo
)
if include_sampling_rate:
code += f", sampling_rate={self.sampling_rate}"
if self.stft_window_size is not None and stft_included_or_empty:
if self.stft_window_size is not None and stft_included:
code += f", stft_window_size={self.stft_window_size}"

if self.columns is not None:
Expand Down
5 changes: 3 additions & 2 deletions tests/test_timeseries_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ def test_verbosity_low_different_subsection_verbosities():
sampling_rate=1,
stft_window_size=2,
verbosity_rolling_statistics=Verbosity.MEDIUM,
verbosity_fourier_transform=Verbosity.MEDIUM,
verbosity_short_time_ft=Verbosity.HIGH,
)

Expand All @@ -355,9 +356,9 @@ def test_verbosity_low_different_subsection_verbosities():
expected_code = [
"timeseries_analysis(df=df, "
"subsections=[TimeseriesAnalysis.TimeseriesAnalysisSubsection.TimeSeriesLinePlot, "
"TimeseriesAnalysis.TimeseriesAnalysisSubsection.FourierTransform, "
"TimeseriesAnalysis.TimeseriesAnalysisSubsection.StationarityTests, "
"TimeseriesAnalysis.TimeseriesAnalysisSubsection.BoxplotsOverTime], sampling_rate=1)",
"TimeseriesAnalysis.TimeseriesAnalysisSubsection.BoxplotsOverTime])",
"fourier_transform(df=df, sampling_rate=1)",
"rolling_statistics(df=df)",
(
get_code(timeseries_analysis.ShortTimeFT.short_time_ft)
Expand Down

0 comments on commit 6c45f19

Please sign in to comment.