refactor: Use direct conversion to represent inferred data type.

Replaces two calls to `infer_data_type`, once with `string_representation=True` and once with `string_representation=False`.
datamole-ai · Aug 31, 2023 · 131f18c · 131f18c
1 parent 2ac7a3e
commit 131f18c
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 5 deletions.
diff --git a/edvart/report_sections/dataset_overview.py b/edvart/report_sections/dataset_overview.py
@@ -378,10 +378,9 @@ def data_types(df: pd.DataFrame, columns: Optional[List[str]] = None) -> None:
         if columns is not None:
             df = df[columns]
         dtypes = df.apply(
-            func=infer_data_type,
+            func=lambda x_: str(infer_data_type(x_)),
             axis=0,
             result_type="expand",
-            string_representation=True,
         )
 
         # Convert result to frame for viewing

diff --git a/edvart/report_sections/univariate_analysis.py b/edvart/report_sections/univariate_analysis.py
@@ -266,8 +266,8 @@ def univariate_analysis(df: pd.DataFrame, columns: Optional[List[str]] = None) -
                 display(Markdown(f"## *{col} - NULL*"))
                 display(Markdown("The column contains only null values."))
                 continue
-            data_type_name = infer_data_type(df[col], string_representation=True)
             data_type = infer_data_type(df[col])
+            data_type_name = str(data_type)
             display(Markdown(f"## *{col} - {data_type_name}*"))
             if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN):
                 UnivariateAnalysis.top_most_frequent(df[col])
@@ -375,8 +375,8 @@ def add_cells(self, cells: List[Dict[str, Any]]) -> None:
                     display(Markdown(f"## *{col} - NULL*"))
                     display(Markdown("The column contains only null values."))
                     continue
-                data_type_name = infer_data_type(self.df[col], string_representation=True)
                 data_type = infer_data_type(self.df[col])
+                data_type_name = str(data_type)
                 column_header = nbfv4.new_markdown_cell(f"## *{col} - {data_type_name}*")
                 cells.append(column_header)
                 if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN):
@@ -429,8 +429,8 @@ def show(self, df: pd.DataFrame) -> None:
                 display(Markdown(f"## *{col} - NULL*"))
                 display(Markdown("The column contains only null values."))
                 continue
-            data_type_name = infer_data_type(df[col], string_representation=True)
             data_type = infer_data_type(df[col])
+            data_type_name = str(data_type)
             display(Markdown(f"## *{col} - {data_type_name}*"))
             if data_type in (DataType.CATEGORICAL, DataType.BOOLEAN):
                 UnivariateAnalysis.top_most_frequent(df[col])