Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: make low verbosity subsection definition more transparent #127

Merged
merged 2 commits into from
Sep 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions edvart/report_sections/bivariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class BivariateAnalysisSubsection(IntEnum):
def __str__(self):
return self.name

# By default use all subsections
_DEFAULT_SUBSECTIONS_TO_SHOW = list(BivariateAnalysisSubsection)

def __init__(
self,
subsections: Optional[List[BivariateAnalysisSubsection]] = None,
Expand All @@ -99,25 +102,16 @@ def __init__(
subsec = BivariateAnalysis.BivariateAnalysisSubsection

# Store subsection verbosities
verbosities = {
self.subsection_verbosities = {
subsec.CorrelationPlot: verbosity_correlations,
subsec.PairPlot: verbosity_pairplot,
subsec.ContingencyTable: verbosity_contingency_table,
}

# By default use all subsections
if subsections is None:
subsections_all = list(BivariateAnalysis.BivariateAnalysisSubsection)
self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
else:
subsections_all = subsections

# Store subsections with LOW verbosity
self.subsections_low_verbosity = [
sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
]

if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
self.subsections_low_verbosity = None
self.subsections_to_show = subsections

if (columns_x is None) != (columns_y is None):
raise ValueError("Either both or neither of columns_x, columns_y must be specified.")
Expand Down Expand Up @@ -145,7 +139,9 @@ def __init__(
),
}

subsections_implementations = [enum_to_implementation[sub] for sub in subsections_all]
subsections_implementations = [
enum_to_implementation[sub] for sub in self.subsections_to_show
]
super().__init__(subsections_implementations, verbosity, columns)

self.columns_x = columns_x
Expand Down Expand Up @@ -229,10 +225,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
cells.append(section_header)
if self.verbosity == Verbosity.LOW:
code = "bivariate_analysis(df=df"
if self.subsections_low_verbosity is not None:
subsections_to_show_with_low_verbosity = [
sub
for sub in self.subsections_to_show
if self.subsection_verbosities[sub] == Verbosity.LOW
]
if subsections_to_show_with_low_verbosity != self._DEFAULT_SUBSECTIONS_TO_SHOW:
arg_subsections_names = [
f"BivariateAnalysis.BivariateAnalysisSubsection.{str(sub)}"
for sub in self.subsections_low_verbosity
for sub in subsections_to_show_with_low_verbosity
]

code += f", subsections={arg_subsections_names}".replace("'", "")
Expand Down
37 changes: 16 additions & 21 deletions edvart/report_sections/dataset_overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ class OverviewSubsection(IntEnum):
def __str__(self):
return self.name

# By default use all subsections
_DEFAULT_SUBSECTIONS_TO_SHOW = list(OverviewSubsection)

def __init__(
self,
subsections: Optional[List[OverviewSubsection]] = None,
Expand All @@ -93,7 +96,7 @@ def __init__(
subsec = Overview.OverviewSubsection

# Store subsection verbosities
verbosities = {
self.subsection_verbosities = {
subsec.QuickInfo: verbosity_quick_info,
subsec.DataTypes: verbosity_data_types,
subsec.DataPreview: verbosity_data_preview,
Expand All @@ -103,19 +106,10 @@ def __init__(
subsec.DuplicateRows: verbosity_duplicate_rows,
}

# By default use all subsections
if subsections is None:
subsections_all = list(Overview.OverviewSubsection)
self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
else:
subsections_all = subsections

# Store subsections with LOW verbosity
self.subsections_low_verbosity = [
sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
]

if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
self.subsections_low_verbosity = None
self.subsections_to_show = subsections

# Construct objects that implement subsections
enum_to_implementation = {
Expand All @@ -130,13 +124,9 @@ def __init__(
subsec.DuplicateRows: DuplicateRows(verbosity_duplicate_rows, columns),
}

# Store subsection selection
if subsections is None:
subsections_implementations = [
enum_to_implementation[sub] for sub in Overview.OverviewSubsection
]
else:
subsections_implementations = [enum_to_implementation[sub] for sub in subsections]
subsections_implementations = [
enum_to_implementation[sub] for sub in self.subsections_to_show
]
super().__init__(subsections_implementations, verbosity, columns)

@property
Expand Down Expand Up @@ -202,10 +192,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:

if self.verbosity == Verbosity.LOW:
code = "overview_analysis(df=df"
if self.subsections_low_verbosity is not None:
subsections_to_show_with_low_verbosity = [
sub
for sub in self.subsections_to_show
if self.subsection_verbosities[sub] == Verbosity.LOW
]
if subsections_to_show_with_low_verbosity != self._DEFAULT_SUBSECTIONS_TO_SHOW:
arg_subsections_names = [
f"Overview.OverviewSubsection.{str(sub)}"
for sub in self.subsections_low_verbosity
for sub in subsections_to_show_with_low_verbosity
]
code += f", subsections={arg_subsections_names}".replace("'", "")
if self.columns is not None:
Expand Down
33 changes: 17 additions & 16 deletions edvart/report_sections/multivariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ class MultivariateAnalysisSubsection(IntEnum):
def __str__(self):
return self.name

# By default use all subsections
_DEFAULT_SUBSECTIONS_TO_SHOW = list(MultivariateAnalysisSubsection)

def __init__(
self,
df: pd.DataFrame,
Expand All @@ -96,27 +99,18 @@ def __init__(
)

subsec = MultivariateAnalysis.MultivariateAnalysisSubsection
verbosities = {
self.subsection_verbosities = {
subsec.PCA: verbosity_pca,
subsec.ParallelCoordinates: verbosity_parallel_coordinates,
subsec.ParallelCategories: verbosity_parallel_categories,
}
if UMAP_AVAILABLE:
verbosities[subsec.UMAP] = verbosity_umap
self.subsection_verbosities[subsec.UMAP] = verbosity_umap

# By default use all subsections
if subsections is None:
subsections_all = list(subsec)
self.subsections_to_show = self._DEFAULT_SUBSECTIONS_TO_SHOW
else:
subsections_all = subsections

# Store subsections with LOW verbosity
self.subsections_low_verbosity = [
sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
]

if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
self.subsections_low_verbosity = None
self.subsections_to_show = subsections
mbelak-dtml marked this conversation as resolved.
Show resolved Hide resolved

enum_to_implementation = {
subsec.PCA: PCA(df, verbosity_pca, columns, color_col=color_col),
Expand All @@ -132,7 +126,9 @@ def __init__(
df, verbosity_umap, columns, color_col=color_col
)

subsections_implementations = [enum_to_implementation[sub] for sub in subsections_all]
subsections_implementations = [
enum_to_implementation[sub] for sub in self.subsections_to_show
]

self.color_col = color_col
super().__init__(subsections_implementations, verbosity, columns)
Expand Down Expand Up @@ -212,10 +208,15 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
cells.append(section_header)
if self.verbosity == Verbosity.LOW:
code = "multivariate_analysis(df=df"
if self.subsections_low_verbosity is not None:
subsections_to_show_with_low_verbosity = [
sub
for sub in self.subsections_to_show
if self.subsection_verbosities[sub] == Verbosity.LOW
]
if subsections_to_show_with_low_verbosity != self._DEFAULT_SUBSECTIONS_TO_SHOW:
arg_subsections_names = [
f"MultivariateAnalysis.MultivariateAnalysisSubsection.{str(sub)}"
for sub in self.subsections_low_verbosity
for sub in subsections_to_show_with_low_verbosity
]
code += f", subsections={arg_subsections_names}".replace("'", "")
if self.columns is not None:
Expand Down
44 changes: 18 additions & 26 deletions edvart/report_sections/timeseries_analysis/timeseries_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def __init__(

subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection

verbosities = {
self.subsection_verbosities = {
subsec.TimeSeriesLinePlot: verbosity_time_series_line_plot,
subsec.RollingStatistics: verbosity_rolling_statistics,
subsec.BoxplotsOverTime: verbosity_boxplots_over_time,
Expand All @@ -135,7 +135,7 @@ def __init__(
subsec.StationarityTests: StationarityTests(verbosity_stationarity_tests, columns),
subsec.Autocorrelation: Autocorrelation(verbosity_autocorrelation, columns),
}

# Add FT and STFT only if required parameters specified
if sampling_rate is not None:
enum_to_implementation[subsec.FourierTransform] = FourierTransform(
sampling_rate, verbosity_fourier_transform, columns
Expand All @@ -156,25 +156,17 @@ def __init__(
"Need to set a `sampling_rate` to plot Short-time Fourier transform."
)

# By default use all subsections, FT and STFT only if required parameters specified
self.default_subsections_to_show = list(enum_to_implementation.keys())

if subsections is None:
subsections_all = list(enum_to_implementation.keys())
self.subsections_to_show = self.default_subsections_to_show
else:
subsections_all = subsections
self.subsections_to_show = subsections

# Store subsections with LOW verbosity
self.subsections_low_verbosity = [
sub for sub in subsections_all if verbosities[sub] == Verbosity.LOW
subsections_implementations = [
enum_to_implementation[sub] for sub in self.subsections_to_show
]

if len(self.subsections_low_verbosity) == len(subsections_all) and subsections is None:
self.subsections_low_verbosity = None

if subsections is None:
subsections_implementations = list(enum_to_implementation.values())
else:
subsections_implementations = [enum_to_implementation[sub] for sub in subsections]

super().__init__(subsections_implementations, verbosity, columns)

@property
Expand Down Expand Up @@ -245,24 +237,24 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
if self.verbosity == Verbosity.LOW:
subsec = TimeseriesAnalysis.TimeseriesAnalysisSubsection
code = "timeseries_analysis(df=df"

if self.subsections_low_verbosity is not None:
subsections_to_show_with_low_verbosity = [
sub
for sub in self.subsections_to_show
if self.subsection_verbosities[sub] == Verbosity.LOW
]
if subsections_to_show_with_low_verbosity != self.default_subsections_to_show:
arg_subsections_names = [
f"TimeseriesAnalysis.TimeseriesAnalysisSubsection.{str(sub)}"
for sub in self.subsections_low_verbosity
for sub in subsections_to_show_with_low_verbosity
]
code += f", subsections={arg_subsections_names}".replace("'", "")

stft_included_or_empty = (
self.subsections_low_verbosity is None
or subsec.ShortTimeFT in self.subsections_low_verbosity
)
stft_included = subsec.ShortTimeFT in subsections_to_show_with_low_verbosity
include_sampling_rate = self.sampling_rate is not None and (
stft_included_or_empty or subsec.FourierTransform in self.subsections_low_verbosity
stft_included or subsec.FourierTransform in subsections_to_show_with_low_verbosity
)
if include_sampling_rate:
code += f", sampling_rate={self.sampling_rate}"
if self.stft_window_size is not None and stft_included_or_empty:
if self.stft_window_size is not None and stft_included:
code += f", stft_window_size={self.stft_window_size}"

if self.columns is not None:
Expand Down
5 changes: 3 additions & 2 deletions tests/test_timeseries_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ def test_verbosity_low_different_subsection_verbosities():
sampling_rate=1,
stft_window_size=2,
verbosity_rolling_statistics=Verbosity.MEDIUM,
verbosity_fourier_transform=Verbosity.MEDIUM,
mbelak-dtml marked this conversation as resolved.
Show resolved Hide resolved
verbosity_short_time_ft=Verbosity.HIGH,
)

Expand All @@ -355,9 +356,9 @@ def test_verbosity_low_different_subsection_verbosities():
expected_code = [
"timeseries_analysis(df=df, "
"subsections=[TimeseriesAnalysis.TimeseriesAnalysisSubsection.TimeSeriesLinePlot, "
"TimeseriesAnalysis.TimeseriesAnalysisSubsection.FourierTransform, "
"TimeseriesAnalysis.TimeseriesAnalysisSubsection.StationarityTests, "
"TimeseriesAnalysis.TimeseriesAnalysisSubsection.BoxplotsOverTime], sampling_rate=1)",
"TimeseriesAnalysis.TimeseriesAnalysisSubsection.BoxplotsOverTime])",
"fourier_transform(df=df, sampling_rate=1)",
"rolling_statistics(df=df)",
(
get_code(timeseries_analysis.ShortTimeFT.short_time_ft)
Expand Down