From 53e46feab1fb81cee4e656b2dfae5ba3e83d89bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Bel=C3=A1k?= Date: Wed, 9 Aug 2023 16:01:34 +0200 Subject: [PATCH] feat: Use `UNIQUE` data type in univariate analysis --- edvart/report_sections/univariate_analysis.py | 52 ++++++++++++------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/edvart/report_sections/univariate_analysis.py b/edvart/report_sections/univariate_analysis.py index d3ccc6b..f9b31c8 100644 --- a/edvart/report_sections/univariate_analysis.py +++ b/edvart/report_sections/univariate_analysis.py @@ -278,6 +278,8 @@ def univariate_analysis(df: pd.DataFrame, columns: Optional[List[str]] = None) - if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN): UnivariateAnalysis.top_most_frequent(df[col]) UnivariateAnalysis.bar_plot(df[col]) + elif data_type == DataType.UNIQUE: + display(Markdown("Each value in the column is unique.")) else: UnivariateAnalysis.numeric_statistics(df[col]) UnivariateAnalysis.histogram(df[col]) @@ -384,29 +386,37 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None: column_header = nbfv4.new_markdown_cell(f"## *{col} - {data_type_name}*") cells.append(column_header) if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN): - code = code_dedent( - f""" - top_most_frequent(df['{col}']) - bar_plot(df['{col}'])""" - ) - elif self.verbosity == 1: - code = code_dedent( - f""" - numeric_statistics(df['{col}']) - histogram(df['{col}'])""" + cell = nbfv4.new_code_cell( + code_dedent( + f""" + top_most_frequent(df['{col}']) + bar_plot(df['{col}'])""" + ) ) + elif data_type == DataType.UNIQUE: + cell = nbfv4.new_markdown_cell("Each value in the column is unique.") else: - code = code_dedent( - f""" - numeric_statistics( - df['{col}'], - descriptive_stats=default_descriptive_statistics(), - quantile_stats=default_quantile_statistics() + if self.verbosity == 1: + cell = nbfv4.new_code_cell( + code_dedent( + f""" + numeric_statistics(df['{col}']) + histogram(df['{col}'])""" ) - histogram(df['{col}'])""" - ) - code_cell = nbfv4.new_code_cell(code) - cells.append(code_cell) + ) + else: + cell = nbfv4.new_code_cell( + code_dedent( + f""" + numeric_statistics( + df['{col}'], + descriptive_stats=default_descriptive_statistics(), + quantile_stats=default_quantile_statistics() + ) + histogram(df['{col}'])""" + ) + ) + cells.append(cell) def show(self, df: pd.DataFrame) -> None: """Generates univariate analysis cell output in the calling notebook. @@ -431,6 +441,8 @@ def show(self, df: pd.DataFrame) -> None: if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN): UnivariateAnalysis.top_most_frequent(df[col]) UnivariateAnalysis.bar_plot(df[col]) + elif data_type == DataType.UNIQUE: + display(Markdown("Each value in the column is unique.")) else: UnivariateAnalysis.numeric_statistics(df[col]) UnivariateAnalysis.histogram(df[col])