Skip to content

Commit

Permalink
feat: Use UNIQUE data type in univariate analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
mbelak-dtml committed Aug 10, 2023
1 parent e6f1b8e commit 136ebb6
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions edvart/report_sections/univariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@ def univariate_analysis(df: pd.DataFrame, columns: Optional[List[str]] = None) -
if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN):
UnivariateAnalysis.top_most_frequent(df[col])
UnivariateAnalysis.bar_plot(df[col])
elif data_type == DataType.UNIQUE:
display(Markdown("Each value in the column is unique."))
else:
UnivariateAnalysis.numeric_statistics(df[col])
UnivariateAnalysis.histogram(df[col])
Expand Down Expand Up @@ -384,29 +386,37 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
column_header = nbfv4.new_markdown_cell(f"## *{col} - {data_type_name}*")
cells.append(column_header)
if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN):
code = code_dedent(
f"""
top_most_frequent(df['{col}'])
bar_plot(df['{col}'])"""
)
elif self.verbosity == 1:
code = code_dedent(
f"""
numeric_statistics(df['{col}'])
histogram(df['{col}'])"""
cell = nbfv4.new_code_cell(
code_dedent(
f"""
top_most_frequent(df['{col}'])
bar_plot(df['{col}'])"""
)
)
elif data_type == DataType.UNIQUE:
cell = nbfv4.new_markdown_cell("Each value in the column is unique.")
else:
code = code_dedent(
f"""
numeric_statistics(
df['{col}'],
descriptive_stats=default_descriptive_statistics(),
quantile_stats=default_quantile_statistics()
if self.verbosity == 1:
cell = nbfv4.new_code_cell(
code_dedent(
f"""
numeric_statistics(df['{col}'])
histogram(df['{col}'])"""
)
histogram(df['{col}'])"""
)
code_cell = nbfv4.new_code_cell(code)
cells.append(code_cell)
)
else:
cell = nbfv4.new_code_cell(
code_dedent(
f"""
numeric_statistics(
df['{col}'],
descriptive_stats=default_descriptive_statistics(),
quantile_stats=default_quantile_statistics()
)
histogram(df['{col}'])"""
)
)
cells.append(cell)

def show(self, df: pd.DataFrame) -> None:
"""Generates univariate analysis cell output in the calling notebook.
Expand All @@ -431,6 +441,8 @@ def show(self, df: pd.DataFrame) -> None:
if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN):
UnivariateAnalysis.top_most_frequent(df[col])
UnivariateAnalysis.bar_plot(df[col])
elif data_type == DataType.UNIQUE:
display(Markdown("Each value in the column is unique."))
else:
UnivariateAnalysis.numeric_statistics(df[col])
UnivariateAnalysis.histogram(df[col])
Expand Down

0 comments on commit 136ebb6

Please sign in to comment.