From a0c56ad1671bd4388356dd952b398efc31fd8796 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Mon, 27 Mar 2023 17:35:50 +0200 Subject: [PATCH] feat: remove `list_columns_with_XY` methods from `Table` (#100) Closes #94. ### Summary of Changes In the `Table` class: * Remove`list_columns_with_missing_values` * Remove `list_columns_with_non_numerical_values` * Mark `list_columns_with_numerical_values` as internal --- src/safeds/data/tabular/containers/_table.py | 40 ++----------------- .../test_list_columns_with_missing_values.py | 9 ----- ..._list_columns_with_non_numerical_values.py | 19 --------- ...test_list_columns_with_numerical_values.py | 4 +- 4 files changed, 6 insertions(+), 66 deletions(-) delete mode 100644 tests/safeds/data/tabular/containers/_table/test_list_columns_with_missing_values.py delete mode 100644 tests/safeds/data/tabular/containers/_table/test_list_columns_with_non_numerical_values.py diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 0e228ed93..fd62f988f 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -596,7 +596,7 @@ def drop_columns_with_non_numerical_values(self) -> Table: A table without the columns that contain non-numerical values. """ - return Table.from_columns(self.list_columns_with_numerical_values()) + return Table.from_columns(self._list_columns_with_numerical_values()) def drop_duplicate_rows(self) -> Table: """ @@ -637,7 +637,7 @@ def drop_rows_with_outliers(self) -> Table: result = self._data.copy(deep=True) table_without_nonnumericals = Table.from_columns( - self.list_columns_with_numerical_values() + self._list_columns_with_numerical_values() ) result = result[ @@ -662,7 +662,6 @@ def filter_rows(self, query: Callable[[Row], bool]) -> Table: table : Table A table containing only the rows filtered by the query. """ - rows: list[Row] = [row for row in self.to_rows() if query(row)] if len(rows) == 0: result_table = Table([], self._schema) @@ -936,7 +935,7 @@ def correlation_heatmap(self) -> None: """ Plot a correlation heatmap for all numerical columns of this `Table`. """ - only_numerical = Table.from_columns(self.list_columns_with_numerical_values()) + only_numerical = self.drop_columns_with_non_numerical_values() sns.heatmap( data=only_numerical._data.corr(), @@ -1100,38 +1099,7 @@ def _ipython_display_(self) -> DisplayHandle: ): return display(tmp) - def list_columns_with_missing_values(self) -> list[Column]: - """ - Return a list of all the columns that have at least one missing value. Returns an empty list if there are none. - - Returns - ------- - columns_with_missing_values: list[Column] - The list of columns with missing values. - """ - columns = self.to_columns() - columns_with_missing_values = [] - for column in columns: - if column.has_missing_values(): - columns_with_missing_values.append(column) - return columns_with_missing_values - - def list_columns_with_non_numerical_values(self) -> list[Column]: - """ - Return a list of columns only containing non-numerical values. - - Returns - ------- - cols : list[Column] - The list with only non-numerical columns. - """ - cols = [] - for column_name, data_type in self._schema._schema.items(): - if not data_type.is_numeric(): - cols.append(self.get_column(column_name)) - return cols - - def list_columns_with_numerical_values(self) -> list[Column]: + def _list_columns_with_numerical_values(self) -> list[Column]: """ Return a list of columns only containing numerical values. diff --git a/tests/safeds/data/tabular/containers/_table/test_list_columns_with_missing_values.py b/tests/safeds/data/tabular/containers/_table/test_list_columns_with_missing_values.py deleted file mode 100644 index a817741da..000000000 --- a/tests/safeds/data/tabular/containers/_table/test_list_columns_with_missing_values.py +++ /dev/null @@ -1,9 +0,0 @@ -import numpy as np -import pandas as pd -from safeds.data.tabular.containers import Table - - -def test_list_columns_with_missing_values() -> None: - table = Table(pd.DataFrame(data={"col1": ["col1_1", 2], "col2": [np.nan, 3]})) - columns_with_missing_values = table.list_columns_with_missing_values() - assert columns_with_missing_values == [table.get_column("col2")] diff --git a/tests/safeds/data/tabular/containers/_table/test_list_columns_with_non_numerical_values.py b/tests/safeds/data/tabular/containers/_table/test_list_columns_with_non_numerical_values.py deleted file mode 100644 index 722666b9c..000000000 --- a/tests/safeds/data/tabular/containers/_table/test_list_columns_with_non_numerical_values.py +++ /dev/null @@ -1,19 +0,0 @@ -import pandas as pd -from safeds.data.tabular.containers import Table - - -def test_list_columns_with_non_numerical_values_valid() -> None: - table = Table( - pd.DataFrame( - data={ - "col1": ["A", "B", "C", "A"], - "col2": ["Test1", "Test1", "Test3", "Test1"], - "col3": [1, 2, 3, 4], - "col4": [2, 3, 1, 4], - } - ) - ) - columns = table.list_columns_with_non_numerical_values() - assert columns[0] == table.get_column("col1") - assert columns[1] == table.get_column("col2") - assert len(columns) == 2 diff --git a/tests/safeds/data/tabular/containers/_table/test_list_columns_with_numerical_values.py b/tests/safeds/data/tabular/containers/_table/test_list_columns_with_numerical_values.py index 41d592b3b..bc5cba885 100644 --- a/tests/safeds/data/tabular/containers/_table/test_list_columns_with_numerical_values.py +++ b/tests/safeds/data/tabular/containers/_table/test_list_columns_with_numerical_values.py @@ -15,7 +15,7 @@ def test_list_columns_with_numerical_values_valid() -> None: } ) ) - columns = table.list_columns_with_numerical_values() + columns = table._list_columns_with_numerical_values() assert columns[0] == table.get_column("col3") assert columns[1] == table.get_column("col4") assert len(columns) == 2 @@ -25,6 +25,6 @@ def test_list_columns_with_numerical_values_invalid() -> None: table = Table( [], TableSchema({"col1": ColumnType.from_numpy_dtype(np.dtype(float))}) ) - columns = table.list_columns_with_numerical_values() + columns = table._list_columns_with_numerical_values() assert columns[0] == table.get_column("col1") assert len(columns) == 1