From 8f2c1b3edd9dd4ca5ac96680689ff2a9186c2c47 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Thu, 16 Nov 2023 21:14:58 +0100 Subject: [PATCH] perf: remove unneeded copy operations --- .../data/tabular/containers/_tagged_table.py | 24 ++++--------------- .../tabular/transformation/_discretizer.py | 2 +- .../data/tabular/transformation/_imputer.py | 2 +- .../tabular/transformation/_label_encoder.py | 4 ++-- .../transformation/_one_hot_encoder.py | 2 +- .../tabular/transformation/_range_scaler.py | 4 ++-- .../transformation/_standard_scaler.py | 4 ++-- src/safeds/ml/classical/_util_sklearn.py | 2 +- .../_table/_tagged_table/test_copy.py | 22 ----------------- 9 files changed, 14 insertions(+), 52 deletions(-) delete mode 100644 tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index cc045a7ec..23a665162 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -1,6 +1,5 @@ from __future__ import annotations -import copy from typing import TYPE_CHECKING from safeds.data.tabular.containers import Column, Row, Table @@ -193,21 +192,6 @@ def target(self) -> Column: """ return self._target - # ------------------------------------------------------------------------------------------------------------------ - # Helpers - # ------------------------------------------------------------------------------------------------------------------ - - def _copy(self) -> TaggedTable: - """ - Return a copy of this tagged table. - - Returns - ------- - table : TaggedTable - The copy of this tagged table. - """ - return copy.deepcopy(self) - # ------------------------------------------------------------------------------------------------------------------ # Specific methods from TaggedTable class: # ------------------------------------------------------------------------------------------------------------------ @@ -268,7 +252,7 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: super().add_columns(columns), target_name=self.target.name, feature_names=self.features.column_names - + [col.name for col in (columns.to_columns() if isinstance(columns, Table) else columns)], + + [col.name for col in (columns.to_columns() if isinstance(columns, Table) else columns)], ) # ------------------------------------------------------------------------------------------------------------------ @@ -702,8 +686,8 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag self.features.column_names if old_column_name not in self.features.column_names else self.features.column_names[: self.features.column_names.index(old_column_name)] - + [col.name for col in new_columns] - + self.features.column_names[self.features.column_names.index(old_column_name) + 1 :] + + [col.name for col in new_columns] + + self.features.column_names[self.features.column_names.index(old_column_name) + 1:] ), ) @@ -763,7 +747,7 @@ def slice_rows( def sort_columns( self, comparator: Callable[[Column, Column], int] = lambda col1, col2: (col1.name > col2.name) - - (col1.name < col2.name), + - (col1.name < col2.name), ) -> TaggedTable: """ Sort the columns of a `TaggedTable` with the given comparator and return a new `TaggedTable`. diff --git a/src/safeds/data/tabular/transformation/_discretizer.py b/src/safeds/data/tabular/transformation/_discretizer.py index 581130c55..3d887c7df 100644 --- a/src/safeds/data/tabular/transformation/_discretizer.py +++ b/src/safeds/data/tabular/transformation/_discretizer.py @@ -139,7 +139,7 @@ def transform(self, table: Table) -> Table: if not table.get_column(column).type.is_numeric(): raise NonNumericColumnError(f"{column} is of type {table.get_column(column).type}.") - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) return Table._from_pandas_dataframe(data) diff --git a/src/safeds/data/tabular/transformation/_imputer.py b/src/safeds/data/tabular/transformation/_imputer.py index 074d99637..8fb7b0235 100644 --- a/src/safeds/data/tabular/transformation/_imputer.py +++ b/src/safeds/data/tabular/transformation/_imputer.py @@ -210,7 +210,7 @@ def transform(self, table: Table) -> Table: if table.number_of_rows == 0: raise ValueError("The Imputer cannot transform the table because it contains 0 rows") - data = table._data.copy() + data = table._data.reset_index(drop=True) data[self._column_names] = pd.DataFrame( self._wrapped_transformer.transform(data[self._column_names]), columns=self._column_names, diff --git a/src/safeds/data/tabular/transformation/_label_encoder.py b/src/safeds/data/tabular/transformation/_label_encoder.py index 7b3351d7b..7ed6b5fd9 100644 --- a/src/safeds/data/tabular/transformation/_label_encoder.py +++ b/src/safeds/data/tabular/transformation/_label_encoder.py @@ -112,7 +112,7 @@ def transform(self, table: Table) -> Table: if table.number_of_rows == 0: raise ValueError("The LabelEncoder cannot transform the table because it contains 0 rows") - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) return Table._from_pandas_dataframe(data) @@ -171,7 +171,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: ), ) - data = transformed_table._data.copy() + data = transformed_table._data.reset_index(drop=True) data.columns = transformed_table.column_names data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names]) return Table._from_pandas_dataframe(data) diff --git a/src/safeds/data/tabular/transformation/_one_hot_encoder.py b/src/safeds/data/tabular/transformation/_one_hot_encoder.py index ba9cda823..8c0f44071 100644 --- a/src/safeds/data/tabular/transformation/_one_hot_encoder.py +++ b/src/safeds/data/tabular/transformation/_one_hot_encoder.py @@ -121,7 +121,7 @@ def fit(self, table: Table, column_names: list[str] | None) -> OneHotEncoder: stacklevel=2, ) - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names result = OneHotEncoder() diff --git a/src/safeds/data/tabular/transformation/_range_scaler.py b/src/safeds/data/tabular/transformation/_range_scaler.py index 5c4fd3935..066c25632 100644 --- a/src/safeds/data/tabular/transformation/_range_scaler.py +++ b/src/safeds/data/tabular/transformation/_range_scaler.py @@ -151,7 +151,7 @@ def transform(self, table: Table) -> Table: ), ) - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) return Table._from_pandas_dataframe(data) @@ -213,7 +213,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: ), ) - data = transformed_table._data.copy() + data = transformed_table._data.reset_index(drop=True) data.columns = transformed_table.column_names data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names]) return Table._from_pandas_dataframe(data) diff --git a/src/safeds/data/tabular/transformation/_standard_scaler.py b/src/safeds/data/tabular/transformation/_standard_scaler.py index 1153ef140..748209f9c 100644 --- a/src/safeds/data/tabular/transformation/_standard_scaler.py +++ b/src/safeds/data/tabular/transformation/_standard_scaler.py @@ -133,7 +133,7 @@ def transform(self, table: Table) -> Table: ), ) - data = table._data.copy() + data = table._data.reset_index(drop=True) data.columns = table.column_names data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names]) return Table._from_pandas_dataframe(data) @@ -195,7 +195,7 @@ def inverse_transform(self, transformed_table: Table) -> Table: ), ) - data = transformed_table._data.copy() + data = transformed_table._data.reset_index(drop=True) data.columns = transformed_table.column_names data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names]) return Table._from_pandas_dataframe(data) diff --git a/src/safeds/ml/classical/_util_sklearn.py b/src/safeds/ml/classical/_util_sklearn.py index 1d801d214..d09d10e3b 100644 --- a/src/safeds/ml/classical/_util_sklearn.py +++ b/src/safeds/ml/classical/_util_sklearn.py @@ -160,7 +160,7 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_ dataset_df = dataset.keep_only_columns(feature_names)._data dataset_df.columns = feature_names - result_set = dataset._data.copy(deep=True) + result_set = dataset._data.reset_index(drop=True) result_set.columns = dataset.column_names try: diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py deleted file mode 100644 index 8819aff05..000000000 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_copy.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest -from safeds.data.tabular.containers import TaggedTable - - -@pytest.mark.parametrize( - "tagged_table", - [ - TaggedTable({"a": [], "b": []}, target_name="b", feature_names=["a"]), - TaggedTable({"a": ["a", 3, 0.1], "b": [True, False, None]}, target_name="b", feature_names=["a"]), - TaggedTable( - {"a": ["a", 3, 0.1], "b": [True, False, None], "c": ["a", "b", "c"]}, - target_name="b", - feature_names=["a"], - ), - TaggedTable({"a": [], "b": [], "c": []}, target_name="b", feature_names=["a"]), - ], - ids=["empty-rows", "normal", "column_as_non_feature", "column_as_non_feature_with_empty_rows"], -) -def test_should_copy_tagged_table(tagged_table: TaggedTable) -> None: - copied = tagged_table._copy() - assert copied == tagged_table - assert copied is not tagged_table