From 7cafe4afc059191ccd5c46a1a9fef0ff9aade69f Mon Sep 17 00:00:00 2001 From: jxnior01 Date: Thu, 13 Jul 2023 14:24:36 +0200 Subject: [PATCH 1/9] docs, validations, raised exceptions and covered all in tests. One test still fails, will have to resolve that --- .../data/tabular/containers/_tagged_table.py | 184 ++++++++++++++++-- src/safeds/exceptions/__init__.py | 2 + src/safeds/exceptions/_data.py | 7 + .../_table/_tagged_table/test_add_column.py | 41 ++++ .../test_add_column_as_feature.py | 43 +++- .../_table/_tagged_table/test_add_columns.py | 43 +++- .../test_add_columns_as_features.py | 41 ++++ .../_table/_tagged_table/test_add_row.py | 26 +++ .../_table/_tagged_table/test_add_rows.py | 28 ++- .../_table/_tagged_table/test_init.py | 15 +- .../_tagged_table/test_keep_only_columns.py | 17 +- .../_tagged_table/test_remove_columns.py | 14 +- .../_tagged_table/test_rename_column.py | 53 ++++- .../_tagged_table/test_replace_column.py | 49 ++++- 14 files changed, 542 insertions(+), 21 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 64f8c86fb..59a4ca815 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,8 +3,19 @@ import copy from typing import TYPE_CHECKING +import pandas as pd + from safeds.data.tabular.containers import Column, Row, Table -from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError, UnknownColumnNameError +from safeds.exceptions import ( + ColumnIsTargetError, + ColumnLengthMismatchError, + ColumnSizeError, + DuplicateColumnNameError, + IllegalSchemaModificationError, + IndexOutOfBoundsError, + SchemaMismatchError, + UnknownColumnNameError, +) if TYPE_CHECKING: from collections.abc import Callable, Mapping, Sequence @@ -158,6 +169,17 @@ def __init__( if len(feature_names) == 0: raise ValueError("At least one feature column must be specified.") + # Validate column lengths + expected_length: int | None = None + for column_values in data.values(): + if expected_length is None: + expected_length = len(column_values) + elif len(column_values) != expected_length: + raise ColumnLengthMismatchError( + "\n".join( + f"{column_name}: {len(column_values)}" for column_name, column_values in data.items()) + ) + self._features: Table = _data.keep_only_columns(feature_names) self._target: Column = _data.get_column(target_name) @@ -167,10 +189,26 @@ def __init__( @property def features(self) -> Table: + """ + Get the feature columns of the tagged table. + + Returns + ------- + Table + The table containing the feature columns. + """ return self._features @property def target(self) -> Column: + """ + Get the target column of the tagged table. + + Returns + ------- + Column + The target column. + """ return self._target # ------------------------------------------------------------------------------------------------------------------ @@ -198,6 +236,11 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: This table is not modified. + Parameters + ---------- + column : Column + The column to be added. + Returns ------- result : TaggedTable @@ -208,8 +251,14 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: DuplicateColumnNameError If the new column already exists. ColumnSizeError - If the size of the column does not match the amount of rows. + If the size of the column does not match the number of rows. """ + if column.name in self.column_names: + raise DuplicateColumnNameError(f"Column '{column.name}' already exists.") + + if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0: + raise ColumnSizeError(str(self.number_of_rows), str(column.number_of_rows)) + return TaggedTable._from_table( super().add_column(column), target_name=self.target.name, @@ -222,6 +271,11 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: This table is not modified. + Parameters + ---------- + columns : list[Column] | Table + The columns to be added as features. + Returns ------- result : TaggedTable @@ -230,10 +284,20 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: Raises ------ DuplicateColumnNameError - If the new column already exists. + If any of the new feature columns already exist. ColumnSizeError - If the size of the column does not match the amount of rows. + If the size of any feature column does not match the number of rows. """ + if isinstance(columns, Table): + columns = columns.to_columns() + + for column in columns: + if column.name in self.column_names: + raise DuplicateColumnNameError(column.name) + + if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0: + raise ColumnSizeError(str(self.number_of_rows), str(column.number_of_rows)) + return TaggedTable._from_table( super().add_columns(columns), target_name=self.target.name, @@ -270,6 +334,11 @@ def add_column(self, column: Column) -> TaggedTable: This table is not modified. + Parameters + ---------- + column : Column + The column to be added. + Returns ------- result : TaggedTable @@ -282,6 +351,12 @@ def add_column(self, column: Column) -> TaggedTable: ColumnSizeError If the size of the column does not match the amount of rows. """ + if column.name in self.column_names: + raise DuplicateColumnNameError(f"Column '{column.name}' already exists.") + + if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0: + raise ColumnSizeError(str(self.number_of_rows), str(column.number_of_rows)) + return TaggedTable._from_table( super().add_column(column), target_name=self.target.name, @@ -311,6 +386,16 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. """ + if isinstance(columns, Table): + columns = columns.to_columns() + + for column in columns: + if column.name in self.column_names: + raise DuplicateColumnNameError(f"Column '{column.name}' already exists.") + + if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0: + raise ColumnSizeError(str(self.number_of_rows), str(column.number_of_rows)) + return TaggedTable._from_table( super().add_columns(columns), target_name=self.target.name, @@ -338,6 +423,16 @@ def add_row(self, row: Row) -> TaggedTable: SchemaMismatchError If the schema of the row does not match the table schema. """ + if self.number_of_rows == 0: + if self.number_of_columns == 0: + for column in row.column_names: + self._data[column] = Column(column, []) + self._schema = Table._from_pandas_dataframe(pd.DataFrame(columns=row.column_names))._schema + elif self.column_names != row.column_names: + raise SchemaMismatchError + elif self._schema != row.schema: + raise SchemaMismatchError + return TaggedTable._from_table(super().add_row(row), target_name=self.target.name) def add_rows(self, rows: list[Row] | Table) -> TaggedTable: @@ -361,6 +456,19 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable: SchemaMismatchError If the schema of on of the row does not match the table schema. """ + if isinstance(rows, Table): + rows = rows.to_rows() + for row in rows: + if self.number_of_rows == 0: + if self.number_of_columns == 0: + for column in row.column_names: + self._data[column] = Column(column, []) + self._schema = Table._from_pandas_dataframe(pd.DataFrame(columns=self.column_names))._schema + elif self.column_names != row.column_names: + raise SchemaMismatchError + elif self._schema != row.schema: + raise SchemaMismatchError + return TaggedTable._from_table(super().add_rows(rows), target_name=self.target.name) def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: @@ -408,6 +516,13 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: IllegalSchemaModificationError If none of the given columns is the target column or any of the feature columns. """ + invalid_columns = [] + for name in column_names: + if not self._schema.has_column(name): + invalid_columns.append(name) + if len(invalid_columns) != 0: + raise UnknownColumnNameError(invalid_columns) + if self.target.name not in column_names: raise IllegalSchemaModificationError("Must keep the target column.") if len(set(self.features.column_names).intersection(set(column_names))) == 0: @@ -446,6 +561,13 @@ def remove_columns(self, column_names: list[str]) -> TaggedTable: IllegalSchemaModificationError If the given columns contain all the feature columns. """ + invalid_columns = [] + for name in column_names: + if not self._schema.has_column(name): + invalid_columns.append(name) + if len(invalid_columns) != 0: + raise UnknownColumnNameError(invalid_columns) + if self.target.name in column_names: raise ColumnIsTargetError(self.target.name) if len(set(self.features.column_names) - set(column_names)) == 0: @@ -587,9 +709,9 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: Parameters ---------- old_name : str - The old name of the target column + The old name of the target column. new_name : str - The new name of the target column + The new name of the target column. Returns ------- @@ -603,6 +725,13 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: DuplicateColumnNameError If the specified new target column name already exists. """ + if old_name not in self._schema.column_names: + raise UnknownColumnNameError([old_name]) + if old_name == new_name: + return self + if new_name in self._schema.column_names: + raise DuplicateColumnNameError(new_name) + return TaggedTable._from_table( super().rename_column(old_name, new_name), target_name=new_name if self.target.name == old_name else self.target.name, @@ -648,6 +777,22 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag IllegalSchemaModificationError If the target column would be removed or replaced by more than one column. """ + if old_column_name not in self._schema.column_names: + raise UnknownColumnNameError([old_column_name]) + + columns = list[Column]() + for old_column in self.column_names: + if old_column == old_column_name: + for new_column in new_columns: + if new_column.name in self.column_names and new_column.name != old_column_name: + raise DuplicateColumnNameError(new_column.name) + + if self.number_of_rows != new_column.number_of_rows: + raise ColumnSizeError(str(self.number_of_rows), str(new_column.number_of_rows)) + columns.append(new_column) + else: + columns.append(self.get_column(old_column)) + if old_column_name == self.target.name: if len(new_columns) != 1: raise IllegalSchemaModificationError( @@ -668,7 +813,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag if old_column_name not in self.features.column_names else self.features.column_names[: self.features.column_names.index(old_column_name)] + [col.name for col in new_columns] - + self.features.column_names[self.features.column_names.index(old_column_name) + 1 :] + + self.features.column_names[self.features.column_names.index(old_column_name) + 1:] ), ) @@ -719,6 +864,17 @@ def slice_rows( IndexOutOfBoundsError If the index is out of bounds. """ + if start is None: + start = 0 + + if end is None: + end = self.number_of_rows + + if end < start: + raise IndexOutOfBoundsError(slice(start, end)) + if start < 0 or end < 0 or start > self.number_of_rows or end > self.number_of_rows: + raise IndexOutOfBoundsError(start if start < 0 or start > self.number_of_rows else end) + return TaggedTable._from_table( super().slice_rows(start, end, step), target_name=self.target.name, @@ -809,8 +965,12 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg UnknownColumnNameError If the column does not exist. """ - return TaggedTable._from_table( - super().transform_column(name, transformer), - target_name=self.target.name, - feature_names=self.features.column_names, - ) + if self.has_column(name): + items: list = [transformer(item) for item in self.to_rows()] + result: list[Column] = [Column(name, items)] + return TaggedTable._from_table( + super().replace_column(name, result), + target_name=self.target.name, + feature_names=self.features.column_names, + ) + raise UnknownColumnNameError([name]) diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 352334cf5..021736287 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -9,6 +9,7 @@ IndexOutOfBoundsError, MissingValuesColumnError, NonNumericColumnError, + SchemaMismatchError, TransformerNotFittedError, UnknownColumnNameError, ValueNotPresentWhenFittedError, @@ -42,6 +43,7 @@ "IndexOutOfBoundsError", "MissingValuesColumnError", "NonNumericColumnError", + "SchemaMismatchError", "TransformerNotFittedError", "UnknownColumnNameError", "ValueNotPresentWhenFittedError", diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index 2d2fb7880..f11c7a334 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -93,6 +93,13 @@ def __init__(self, expected_size: str, actual_size: str): super().__init__(f"Expected a column of size {expected_size} but got column of size {actual_size}.") +class SchemaMismatchError(Exception): + """Exception raised when schemas are unequal.""" + + def __init__(self) -> None: + super().__init__("Failed because at least two schemas didn't match.") + + class ColumnLengthMismatchError(Exception): """Exception raised when the lengths of two or more columns do not match.""" diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index 22bd87605..d03b83bac 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, TaggedTable +from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -32,3 +33,43 @@ ) def test_should_add_column(tagged_table: TaggedTable, column: Column, expected_tagged_table: TaggedTable) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_column(column), expected_tagged_table) + + +@pytest.mark.parametrize( + ("tagged_table", "column", "error_msg"), + [ + ( + TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), + Column("B", ["g", "h", "i"]), + r"Column 'B' already exists." + ) + ], + ids=["column_already_exists"], +) +def test_should_raise_duplicate_column_name_if_column_already_exists( + tagged_table: TaggedTable, + column: Column, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + tagged_table.add_column_as_feature(column) + + +@pytest.mark.parametrize( + ("tagged_table", "column", "error_msg"), + [ + ( + TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), + Column("C", ["g", "h", "i", "j"]), + r"Expected a column of size 3 but got column of size 4." + ) + ], + ids=["column_is_oversize"], +) +def test_should_raise_column_size_error_if_column_is_oversize( + tagged_table: TaggedTable, + column: Column, + error_msg: str, +) -> None: + with pytest.raises(ColumnSizeError, match=error_msg): + tagged_table.add_column_as_feature(column) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py index 325df54db..96cfe1a6a 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -29,9 +30,49 @@ ], ids=["new column as feature", "table contains a non feature/target column"], ) -def test_add_column_as_feature( +def test_should_add_column_as_feature( tagged_table: TaggedTable, column: Column, tagged_table_with_new_column: TaggedTable, ) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_column_as_feature(column), tagged_table_with_new_column) + + +@pytest.mark.parametrize( + ("tagged_table", "column", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + Column("A", [7, 8, 9]), + r"Column 'A' already exists." + ) + ], + ids=["column_already_exists"], +) +def test_should_raise_duplicate_column_name_if_column_already_exists( + tagged_table: TaggedTable, + column: Column, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + tagged_table.add_column_as_feature(column) + + +@pytest.mark.parametrize( + ("tagged_table", "column", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + Column("C", [5, 7, 8, 9]), + r"Expected a column of size 3 but got column of size 4." + ) + ], + ids=["column_is_oversize"], +) +def test_should_raise_column_size_error_if_column_is_oversize( + tagged_table: TaggedTable, + column: Column, + error_msg: str, +) -> None: + with pytest.raises(ColumnSizeError, match=error_msg): + tagged_table.add_column_as_feature(column) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index 8773e3695..c6866cc5f 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -1,5 +1,6 @@ import pytest -from safeds.data.tabular.containers import Column, TaggedTable +from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -37,3 +38,43 @@ def test_should_add_columns( expected_tagged_table: TaggedTable, ) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_columns(columns), expected_tagged_table) + + +@pytest.mark.parametrize( + ("tagged_table", "columns", "error_msg"), + [ + ( + TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), + [Column("B", ["g", "h", "i"]), Column("C", ["g", "h", "i"])], + r"Column 'B' already exists." + ) + ], + ids=["column_already_exists"], +) +def test_should_raise_duplicate_column_name_if_column_already_exists( + tagged_table: TaggedTable, + columns: list[Column] | Table, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + tagged_table.add_columns(columns) + + +@pytest.mark.parametrize( + ("tagged_table", "columns", "error_msg"), + [ + ( + TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), + [Column("C", ["g", "h", "i", "j"]), Column("D", ["a", "c", "b", "c"])], + r"Expected a column of size 3 but got column of size 4." + ) + ], + ids=["columns_are_oversize"], +) +def test_should_raise_column_size_error_if_columns_are_oversize( + tagged_table: TaggedTable, + columns: list[Column] | Table, + error_msg: str, +) -> None: + with pytest.raises(ColumnSizeError, match=error_msg): + tagged_table.add_columns(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py index f1e7716b8..a9f479a06 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns_as_features.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, Table, TaggedTable +from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -43,3 +44,43 @@ def test_add_columns_as_features( tagged_table_with_new_columns: TaggedTable, ) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_columns_as_features(columns), tagged_table_with_new_columns) + + +@pytest.mark.parametrize( + ("tagged_table", "columns", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + [Column("A", [7, 8, 9]), Column("D", [10, 11, 12])], + r"Column 'A' already exists.", + ), + ], + ids=["column_already_exist"], +) +def test_add_columns_raise_duplicate_column_name_if_column_already_exist( + tagged_table: TaggedTable, + columns: list[Column] | Table, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + tagged_table.add_columns_as_features(columns) + + +@pytest.mark.parametrize( + ("tagged_table", "columns", "error_msg"), + [ + ( + TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), + [Column("C", [5, 7, 8, 9]), Column("D", [4, 10, 11, 12])], + r"Expected a column of size 3 but got column of size 4.", + ), + ], + ids=["columns_are_oversize"], +) +def test_should_raise_column_size_error_if_columns_are_oversize( + tagged_table: TaggedTable, + columns: list[Column] | Table, + error_msg: str, +) -> None: + with pytest.raises(ColumnSizeError, match=error_msg): + tagged_table.add_columns_as_features(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 2badeec11..8f7d9cda9 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import Row, TaggedTable +from safeds.exceptions import SchemaMismatchError from tests.helpers import assert_that_tagged_tables_are_equal @@ -34,3 +35,28 @@ ) def test_should_add_row(table: TaggedTable, row: Row, expected: TaggedTable) -> None: assert_that_tagged_tables_are_equal(table.add_row(row), expected) + + +@pytest.mark.parametrize( + ("tagged_table", "row", "error_msg"), + [ + ( + TaggedTable({"feature": [1, 2, 3], "target": [4, 5, 6]}, "target", ["feature"]), + Row({"feature": "a", "target": 8}), + r"Failed because at least two schemas didn't match." + ), + ( + TaggedTable({"feature": [], "target": []}, "target", ["feature"]), + Row({"feat": None, "targ": None}), + r"Failed because at least two schemas didn't match." + ), + ], + ids=["invalid_schemas", "schemas_mismatch"], +) +def test_should_raise_an_error_if_row_schema_invalid( + tagged_table: TaggedTable, + row: Row, + error_msg: str, +) -> None: + with pytest.raises(SchemaMismatchError, match=error_msg): + tagged_table.add_row(row) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index da8c37a5a..0416b0e89 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -1,5 +1,6 @@ import pytest -from safeds.data.tabular.containers import Row, TaggedTable +from safeds.data.tabular.containers import Row, Table, TaggedTable +from safeds.exceptions import SchemaMismatchError from tests.helpers import assert_that_tagged_tables_are_equal @@ -37,3 +38,28 @@ ) def test_should_add_rows(table: TaggedTable, rows: list[Row], expected: TaggedTable) -> None: assert_that_tagged_tables_are_equal(table.add_rows(rows), expected) + + +@pytest.mark.parametrize( + ("tagged_table", "rows", "error_msg"), + [ + ( + TaggedTable({"feature": [1, 2, 3], "target": [4, 5, 6]}, "target", ["feature"]), + [Row({"feature": 7, "target": 8}), Row({"feature": "a", "target": 9})], + r"Failed because at least two schemas didn't match." + ), + ( + TaggedTable({"feature": [], "target": []}, "target", ["feature"]), + [Row({"feat": None, "targ": None}), Row({"targ": None, "feat": None})], + r"Failed because at least two schemas didn't match." + ), + ], + ids=["invalid_schemas", "schemas_mismatch"], +) +def test_should_raise_an_error_if_rows_schemas_are_invalid( + tagged_table: TaggedTable, + rows: list[Row] | Table, + error_msg: str, +) -> None: + with pytest.raises(SchemaMismatchError, match=error_msg): + tagged_table.add_rows(rows) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py index 723dfc990..0d409e474 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import UnknownColumnNameError +from safeds.exceptions import ColumnLengthMismatchError, UnknownColumnNameError @pytest.mark.parametrize( @@ -63,6 +63,18 @@ ValueError, r"At least one feature column must be specified.", ), + ( + { + "A": [1, 4], + "B": [2, 5], + "C": [3, 6], + "T": [0, 1, 2], + }, + "T", + ["A", "B"], + ColumnLengthMismatchError, + r"The length of at least one column differs: \nA: 2\nB: 2\nC: 2\nT: 3", + ), ], ids=[ "feature_does_not_exist", @@ -70,6 +82,7 @@ "target_and_feature_overlap", "features_are_empty-explicitly", "features_are_empty_implicitly", + "Columns do not match", ], ) def test_should_raise_error( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index 975dca9e9..3c0d78454 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import IllegalSchemaModificationError +from safeds.exceptions import IllegalSchemaModificationError, UnknownColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -129,3 +129,18 @@ def test_should_raise_illegal_schema_modification(table: TaggedTable, column_nam match=error_msg, ): table.keep_only_columns(column_names) + + +@pytest.mark.parametrize( + ("tagged_table", "error_msg"), + [ + ( + TaggedTable({"feature": [1], "target": [2]}, "target", ["feature"]), + r"Could not find column\(s\) 'feat'" + ), + ], + ids=["unknown_column"], +) +def test_should_raise_error_if_column_name_unknown(tagged_table: TaggedTable, error_msg: str) -> None: + with pytest.raises(UnknownColumnNameError, match=error_msg): + tagged_table.keep_only_columns(["feat"]) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 9e8435885..671206cd8 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError +from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError, UnknownColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -168,8 +168,18 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: IllegalSchemaModificationError, r"Illegal schema modification: You cannot remove every feature column.", ), + ( + TaggedTable._from_table( + Table({"feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}), + "target", + ["feat"], + ), + ["feat", "feet"], + UnknownColumnNameError, + r"Could not find column\(s\) 'feet'", + ), ], - ids=["remove_only_target", "remove_non_feat_and_target", "remove_all_features", "remove_non_feat_and_all_features"], + ids=["remove_only_target", "remove_non_feat_and_target", "remove_all_features", "remove_non_feat_and_all_features", "remove_unknown_column"], ) def test_should_raise_in_remove_columns( table: TaggedTable, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 051c7fb90..b305296f1 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -1,5 +1,6 @@ import pytest from safeds.data.tabular.containers import TaggedTable +from safeds.exceptions import DuplicateColumnNameError, UnknownColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -76,7 +77,7 @@ ], ids=["rename_feature_column", "rename_target_column", "rename_non_feature_column"], ) -def test_should_add_column( +def test_should_rename_column( original_table: TaggedTable, old_column_name: str, new_column_name: str, @@ -84,3 +85,53 @@ def test_should_add_column( ) -> None: new_table = original_table.rename_column(old_column_name, new_column_name) assert_that_tagged_tables_are_equal(new_table, result_table) + + +@pytest.mark.parametrize( + ("original_table", "old_column_name", "new_column_name", "result_table", "error_msg"), + [ + ( + TaggedTable({"feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}, "target", ["feat"]), + "feet", + "feature", + TaggedTable({"feature": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}, "target", ["feature"]), + r"Could not find column\(s\) 'feet'", + ), + ], + ids=["column_does_not_exist"], +) +def test_should_raise_if_old_column_does_not_exist( + original_table: TaggedTable, + old_column_name: str, + new_column_name: str, + result_table: TaggedTable, + error_msg: str, +) -> None: + with pytest.raises(UnknownColumnNameError, match=error_msg): + assert_that_tagged_tables_are_equal(original_table.rename_column(old_column_name, new_column_name), + result_table) + + +@pytest.mark.parametrize( + ("original_table", "old_column_name", "new_column_name", "result_table", "error_msg"), + [ + ( + TaggedTable({"feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}, "target", ["feat"]), + "feat", + "non-feat", + TaggedTable({"feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}, "target", ["feat"]), + r"Column 'non-feat' already exists.", + ), + ], + ids=["column_already_exists"], +) +def test_should_raise_if_new_column_exists_already( + original_table: TaggedTable, + old_column_name: str, + new_column_name: str, + result_table: TaggedTable, + error_msg: str, +) -> None: + with pytest.raises(DuplicateColumnNameError, match=error_msg): + assert_that_tagged_tables_are_equal(original_table.rename_column(old_column_name, new_column_name), + result_table) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 72b773adc..f68ca2402 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -1,6 +1,11 @@ import pytest from safeds.data.tabular.containers import Column, TaggedTable -from safeds.exceptions import IllegalSchemaModificationError +from safeds.exceptions import ( + ColumnSizeError, + DuplicateColumnNameError, + IllegalSchemaModificationError, + UnknownColumnNameError, +) from tests.helpers import assert_that_tagged_tables_are_equal @@ -177,3 +182,45 @@ def test_should_throw_illegal_schema_modification( match='Target column "target_old" can only be replaced by exactly one new column.', ): original_table.replace_column(column_name_to_be_replaced, new_columns) + + +@pytest.mark.parametrize( + ("old_column_name", "column", "error", "error_message"), + [ + ( + "D", + [Column("C", ["d", "e", "f"])], + UnknownColumnNameError, + r"Could not find column\(s\) 'D'", + ), + ( + "target", + [Column("non-feat", ["d", "e", "f"]), Column("D", [3, 2, 1])], + DuplicateColumnNameError, + r"Column 'non-feat' already exists.", + ), + ( + "target", + [Column("D", [7, 8]), Column("E", ["c", "b"])], + ColumnSizeError, + r"Expected a column of size 3 but got column of size 2.", + ), + ], + ids=["UnknownColumnNameError", "DuplicateColumnNameError", "ColumnSizeError"], +) +def test_should_raise_error( + old_column_name: str, + column: list[Column], + error: type[Exception], + error_message: str, +) -> None: + input_table: TaggedTable = TaggedTable( + { + "feat": [1, 2, 3], + "non-feat": [4, 5, 6], + "target": ["a", "b", "c"], + }, "target", ["feat"] + ) + + with pytest.raises(error, match=error_message): + input_table.replace_column(old_column_name, column) From 5c0617cbe9f03d4538b5a106333638563d77fc7f Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 13 Jul 2023 12:35:40 +0000 Subject: [PATCH 2/9] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 5 ++--- .../containers/_table/_tagged_table/test_add_column.py | 8 ++++---- .../_table/_tagged_table/test_add_column_as_feature.py | 8 ++++---- .../_table/_tagged_table/test_add_columns.py | 8 ++++---- .../containers/_table/_tagged_table/test_add_row.py | 4 ++-- .../containers/_table/_tagged_table/test_add_rows.py | 4 ++-- .../_table/_tagged_table/test_keep_only_columns.py | 5 +---- .../_table/_tagged_table/test_remove_columns.py | 8 +++++++- .../_table/_tagged_table/test_rename_column.py | 10 ++++++---- .../_table/_tagged_table/test_replace_column.py | 4 +++- 10 files changed, 35 insertions(+), 29 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 59a4ca815..dd6ecb2f4 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -176,8 +176,7 @@ def __init__( expected_length = len(column_values) elif len(column_values) != expected_length: raise ColumnLengthMismatchError( - "\n".join( - f"{column_name}: {len(column_values)}" for column_name, column_values in data.items()) + "\n".join(f"{column_name}: {len(column_values)}" for column_name, column_values in data.items()), ) self._features: Table = _data.keep_only_columns(feature_names) @@ -813,7 +812,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag if old_column_name not in self.features.column_names else self.features.column_names[: self.features.column_names.index(old_column_name)] + [col.name for col in new_columns] - + self.features.column_names[self.features.column_names.index(old_column_name) + 1:] + + self.features.column_names[self.features.column_names.index(old_column_name) + 1 :] ), ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index d03b83bac..db103ad0c 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -41,8 +41,8 @@ def test_should_add_column(tagged_table: TaggedTable, column: Column, expected_t ( TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), Column("B", ["g", "h", "i"]), - r"Column 'B' already exists." - ) + r"Column 'B' already exists.", + ), ], ids=["column_already_exists"], ) @@ -61,8 +61,8 @@ def test_should_raise_duplicate_column_name_if_column_already_exists( ( TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), Column("C", ["g", "h", "i", "j"]), - r"Expected a column of size 3 but got column of size 4." - ) + r"Expected a column of size 3 but got column of size 4.", + ), ], ids=["column_is_oversize"], ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py index 96cfe1a6a..97566e34b 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column_as_feature.py @@ -44,8 +44,8 @@ def test_should_add_column_as_feature( ( TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), Column("A", [7, 8, 9]), - r"Column 'A' already exists." - ) + r"Column 'A' already exists.", + ), ], ids=["column_already_exists"], ) @@ -64,8 +64,8 @@ def test_should_raise_duplicate_column_name_if_column_already_exists( ( TaggedTable({"A": [1, 2, 3], "B": [4, 5, 6]}, target_name="B", feature_names=["A"]), Column("C", [5, 7, 8, 9]), - r"Expected a column of size 3 but got column of size 4." - ) + r"Expected a column of size 3 but got column of size 4.", + ), ], ids=["column_is_oversize"], ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index c6866cc5f..a172768d2 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -46,8 +46,8 @@ def test_should_add_columns( ( TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), [Column("B", ["g", "h", "i"]), Column("C", ["g", "h", "i"])], - r"Column 'B' already exists." - ) + r"Column 'B' already exists.", + ), ], ids=["column_already_exists"], ) @@ -66,8 +66,8 @@ def test_should_raise_duplicate_column_name_if_column_already_exists( ( TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), [Column("C", ["g", "h", "i", "j"]), Column("D", ["a", "c", "b", "c"])], - r"Expected a column of size 3 but got column of size 4." - ) + r"Expected a column of size 3 but got column of size 4.", + ), ], ids=["columns_are_oversize"], ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 8f7d9cda9..9f399b884 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -43,12 +43,12 @@ def test_should_add_row(table: TaggedTable, row: Row, expected: TaggedTable) -> ( TaggedTable({"feature": [1, 2, 3], "target": [4, 5, 6]}, "target", ["feature"]), Row({"feature": "a", "target": 8}), - r"Failed because at least two schemas didn't match." + r"Failed because at least two schemas didn't match.", ), ( TaggedTable({"feature": [], "target": []}, "target", ["feature"]), Row({"feat": None, "targ": None}), - r"Failed because at least two schemas didn't match." + r"Failed because at least two schemas didn't match.", ), ], ids=["invalid_schemas", "schemas_mismatch"], diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index 0416b0e89..c333a083a 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -46,12 +46,12 @@ def test_should_add_rows(table: TaggedTable, rows: list[Row], expected: TaggedTa ( TaggedTable({"feature": [1, 2, 3], "target": [4, 5, 6]}, "target", ["feature"]), [Row({"feature": 7, "target": 8}), Row({"feature": "a", "target": 9})], - r"Failed because at least two schemas didn't match." + r"Failed because at least two schemas didn't match.", ), ( TaggedTable({"feature": [], "target": []}, "target", ["feature"]), [Row({"feat": None, "targ": None}), Row({"targ": None, "feat": None})], - r"Failed because at least two schemas didn't match." + r"Failed because at least two schemas didn't match.", ), ], ids=["invalid_schemas", "schemas_mismatch"], diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index 3c0d78454..2d4560f54 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -134,10 +134,7 @@ def test_should_raise_illegal_schema_modification(table: TaggedTable, column_nam @pytest.mark.parametrize( ("tagged_table", "error_msg"), [ - ( - TaggedTable({"feature": [1], "target": [2]}, "target", ["feature"]), - r"Could not find column\(s\) 'feat'" - ), + (TaggedTable({"feature": [1], "target": [2]}, "target", ["feature"]), r"Could not find column\(s\) 'feat'"), ], ids=["unknown_column"], ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 671206cd8..7d4f47f42 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -179,7 +179,13 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: r"Could not find column\(s\) 'feet'", ), ], - ids=["remove_only_target", "remove_non_feat_and_target", "remove_all_features", "remove_non_feat_and_all_features", "remove_unknown_column"], + ids=[ + "remove_only_target", + "remove_non_feat_and_target", + "remove_all_features", + "remove_non_feat_and_all_features", + "remove_unknown_column", + ], ) def test_should_raise_in_remove_columns( table: TaggedTable, diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index b305296f1..4d8f99b76 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -108,8 +108,9 @@ def test_should_raise_if_old_column_does_not_exist( error_msg: str, ) -> None: with pytest.raises(UnknownColumnNameError, match=error_msg): - assert_that_tagged_tables_are_equal(original_table.rename_column(old_column_name, new_column_name), - result_table) + assert_that_tagged_tables_are_equal( + original_table.rename_column(old_column_name, new_column_name), result_table, + ) @pytest.mark.parametrize( @@ -133,5 +134,6 @@ def test_should_raise_if_new_column_exists_already( error_msg: str, ) -> None: with pytest.raises(DuplicateColumnNameError, match=error_msg): - assert_that_tagged_tables_are_equal(original_table.rename_column(old_column_name, new_column_name), - result_table) + assert_that_tagged_tables_are_equal( + original_table.rename_column(old_column_name, new_column_name), result_table, + ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index f68ca2402..296a8b36e 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -219,7 +219,9 @@ def test_should_raise_error( "feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": ["a", "b", "c"], - }, "target", ["feat"] + }, + "target", + ["feat"], ) with pytest.raises(error, match=error_message): From be3521a2cd6f7b8adba45e4e46a9f28ac4196066 Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 13 Jul 2023 12:37:24 +0000 Subject: [PATCH 3/9] style: apply automated linter fixes --- .../containers/_table/_tagged_table/test_rename_column.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 4d8f99b76..3446a764a 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -109,7 +109,8 @@ def test_should_raise_if_old_column_does_not_exist( ) -> None: with pytest.raises(UnknownColumnNameError, match=error_msg): assert_that_tagged_tables_are_equal( - original_table.rename_column(old_column_name, new_column_name), result_table, + original_table.rename_column(old_column_name, new_column_name), + result_table, ) @@ -135,5 +136,6 @@ def test_should_raise_if_new_column_exists_already( ) -> None: with pytest.raises(DuplicateColumnNameError, match=error_msg): assert_that_tagged_tables_are_equal( - original_table.rename_column(old_column_name, new_column_name), result_table, + original_table.rename_column(old_column_name, new_column_name), + result_table, ) From f7a6d2e6e44f891e7ac5426fe41484fcb8df36f9 Mon Sep 17 00:00:00 2001 From: jxnior01 Date: Thu, 13 Jul 2023 17:30:41 +0200 Subject: [PATCH 4/9] removed unnecessary Exceptions in code implementation and tests --- .../data/tabular/containers/_tagged_table.py | 92 ++----------------- .../_table/_tagged_table/test_add_column.py | 41 --------- .../_table/_tagged_table/test_add_columns.py | 43 +-------- .../_table/_tagged_table/test_init.py | 15 +-- .../_tagged_table/test_keep_only_columns.py | 14 +-- .../_tagged_table/test_remove_columns.py | 13 +-- .../_tagged_table/test_rename_column.py | 55 ----------- .../_tagged_table/test_replace_column.py | 51 +--------- .../{test_rename.py => test_rename_column.py} | 0 9 files changed, 11 insertions(+), 313 deletions(-) rename tests/safeds/data/tabular/containers/_table/{test_rename.py => test_rename_column.py} (100%) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index dd6ecb2f4..bd132da73 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -8,11 +8,9 @@ from safeds.data.tabular.containers import Column, Row, Table from safeds.exceptions import ( ColumnIsTargetError, - ColumnLengthMismatchError, ColumnSizeError, DuplicateColumnNameError, IllegalSchemaModificationError, - IndexOutOfBoundsError, SchemaMismatchError, UnknownColumnNameError, ) @@ -169,16 +167,6 @@ def __init__( if len(feature_names) == 0: raise ValueError("At least one feature column must be specified.") - # Validate column lengths - expected_length: int | None = None - for column_values in data.values(): - if expected_length is None: - expected_length = len(column_values) - elif len(column_values) != expected_length: - raise ColumnLengthMismatchError( - "\n".join(f"{column_name}: {len(column_values)}" for column_name, column_values in data.items()), - ) - self._features: Table = _data.keep_only_columns(feature_names) self._target: Column = _data.get_column(target_name) @@ -350,12 +338,6 @@ def add_column(self, column: Column) -> TaggedTable: ColumnSizeError If the size of the column does not match the amount of rows. """ - if column.name in self.column_names: - raise DuplicateColumnNameError(f"Column '{column.name}' already exists.") - - if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0: - raise ColumnSizeError(str(self.number_of_rows), str(column.number_of_rows)) - return TaggedTable._from_table( super().add_column(column), target_name=self.target.name, @@ -385,16 +367,6 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. """ - if isinstance(columns, Table): - columns = columns.to_columns() - - for column in columns: - if column.name in self.column_names: - raise DuplicateColumnNameError(f"Column '{column.name}' already exists.") - - if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0: - raise ColumnSizeError(str(self.number_of_rows), str(column.number_of_rows)) - return TaggedTable._from_table( super().add_columns(columns), target_name=self.target.name, @@ -515,13 +487,6 @@ def keep_only_columns(self, column_names: list[str]) -> TaggedTable: IllegalSchemaModificationError If none of the given columns is the target column or any of the feature columns. """ - invalid_columns = [] - for name in column_names: - if not self._schema.has_column(name): - invalid_columns.append(name) - if len(invalid_columns) != 0: - raise UnknownColumnNameError(invalid_columns) - if self.target.name not in column_names: raise IllegalSchemaModificationError("Must keep the target column.") if len(set(self.features.column_names).intersection(set(column_names))) == 0: @@ -560,13 +525,6 @@ def remove_columns(self, column_names: list[str]) -> TaggedTable: IllegalSchemaModificationError If the given columns contain all the feature columns. """ - invalid_columns = [] - for name in column_names: - if not self._schema.has_column(name): - invalid_columns.append(name) - if len(invalid_columns) != 0: - raise UnknownColumnNameError(invalid_columns) - if self.target.name in column_names: raise ColumnIsTargetError(self.target.name) if len(set(self.features.column_names) - set(column_names)) == 0: @@ -724,13 +682,6 @@ def rename_column(self, old_name: str, new_name: str) -> TaggedTable: DuplicateColumnNameError If the specified new target column name already exists. """ - if old_name not in self._schema.column_names: - raise UnknownColumnNameError([old_name]) - if old_name == new_name: - return self - if new_name in self._schema.column_names: - raise DuplicateColumnNameError(new_name) - return TaggedTable._from_table( super().rename_column(old_name, new_name), target_name=new_name if self.target.name == old_name else self.target.name, @@ -776,22 +727,6 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag IllegalSchemaModificationError If the target column would be removed or replaced by more than one column. """ - if old_column_name not in self._schema.column_names: - raise UnknownColumnNameError([old_column_name]) - - columns = list[Column]() - for old_column in self.column_names: - if old_column == old_column_name: - for new_column in new_columns: - if new_column.name in self.column_names and new_column.name != old_column_name: - raise DuplicateColumnNameError(new_column.name) - - if self.number_of_rows != new_column.number_of_rows: - raise ColumnSizeError(str(self.number_of_rows), str(new_column.number_of_rows)) - columns.append(new_column) - else: - columns.append(self.get_column(old_column)) - if old_column_name == self.target.name: if len(new_columns) != 1: raise IllegalSchemaModificationError( @@ -812,7 +747,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag if old_column_name not in self.features.column_names else self.features.column_names[: self.features.column_names.index(old_column_name)] + [col.name for col in new_columns] - + self.features.column_names[self.features.column_names.index(old_column_name) + 1 :] + + self.features.column_names[self.features.column_names.index(old_column_name) + 1:] ), ) @@ -863,17 +798,6 @@ def slice_rows( IndexOutOfBoundsError If the index is out of bounds. """ - if start is None: - start = 0 - - if end is None: - end = self.number_of_rows - - if end < start: - raise IndexOutOfBoundsError(slice(start, end)) - if start < 0 or end < 0 or start > self.number_of_rows or end > self.number_of_rows: - raise IndexOutOfBoundsError(start if start < 0 or start > self.number_of_rows else end) - return TaggedTable._from_table( super().slice_rows(start, end, step), target_name=self.target.name, @@ -964,12 +888,8 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg UnknownColumnNameError If the column does not exist. """ - if self.has_column(name): - items: list = [transformer(item) for item in self.to_rows()] - result: list[Column] = [Column(name, items)] - return TaggedTable._from_table( - super().replace_column(name, result), - target_name=self.target.name, - feature_names=self.features.column_names, - ) - raise UnknownColumnNameError([name]) + return TaggedTable._from_table( + super().transform_column(name, transformer), + target_name=self.target.name, + feature_names=self.features.column_names, + ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py index db103ad0c..22bd87605 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_column.py @@ -1,6 +1,5 @@ import pytest from safeds.data.tabular.containers import Column, TaggedTable -from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -33,43 +32,3 @@ ) def test_should_add_column(tagged_table: TaggedTable, column: Column, expected_tagged_table: TaggedTable) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_column(column), expected_tagged_table) - - -@pytest.mark.parametrize( - ("tagged_table", "column", "error_msg"), - [ - ( - TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), - Column("B", ["g", "h", "i"]), - r"Column 'B' already exists.", - ), - ], - ids=["column_already_exists"], -) -def test_should_raise_duplicate_column_name_if_column_already_exists( - tagged_table: TaggedTable, - column: Column, - error_msg: str, -) -> None: - with pytest.raises(DuplicateColumnNameError, match=error_msg): - tagged_table.add_column_as_feature(column) - - -@pytest.mark.parametrize( - ("tagged_table", "column", "error_msg"), - [ - ( - TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), - Column("C", ["g", "h", "i", "j"]), - r"Expected a column of size 3 but got column of size 4.", - ), - ], - ids=["column_is_oversize"], -) -def test_should_raise_column_size_error_if_column_is_oversize( - tagged_table: TaggedTable, - column: Column, - error_msg: str, -) -> None: - with pytest.raises(ColumnSizeError, match=error_msg): - tagged_table.add_column_as_feature(column) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py index a172768d2..8773e3695 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_columns.py @@ -1,6 +1,5 @@ import pytest -from safeds.data.tabular.containers import Column, Table, TaggedTable -from safeds.exceptions import ColumnSizeError, DuplicateColumnNameError +from safeds.data.tabular.containers import Column, TaggedTable from tests.helpers import assert_that_tagged_tables_are_equal @@ -38,43 +37,3 @@ def test_should_add_columns( expected_tagged_table: TaggedTable, ) -> None: assert_that_tagged_tables_are_equal(tagged_table.add_columns(columns), expected_tagged_table) - - -@pytest.mark.parametrize( - ("tagged_table", "columns", "error_msg"), - [ - ( - TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), - [Column("B", ["g", "h", "i"]), Column("C", ["g", "h", "i"])], - r"Column 'B' already exists.", - ), - ], - ids=["column_already_exists"], -) -def test_should_raise_duplicate_column_name_if_column_already_exists( - tagged_table: TaggedTable, - columns: list[Column] | Table, - error_msg: str, -) -> None: - with pytest.raises(DuplicateColumnNameError, match=error_msg): - tagged_table.add_columns(columns) - - -@pytest.mark.parametrize( - ("tagged_table", "columns", "error_msg"), - [ - ( - TaggedTable({"A": ["a", "b", "c"], "B": ["d", "e", "f"]}, target_name="B", feature_names=["A"]), - [Column("C", ["g", "h", "i", "j"]), Column("D", ["a", "c", "b", "c"])], - r"Expected a column of size 3 but got column of size 4.", - ), - ], - ids=["columns_are_oversize"], -) -def test_should_raise_column_size_error_if_columns_are_oversize( - tagged_table: TaggedTable, - columns: list[Column] | Table, - error_msg: str, -) -> None: - with pytest.raises(ColumnSizeError, match=error_msg): - tagged_table.add_columns(columns) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py index 0d409e474..723dfc990 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_init.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ColumnLengthMismatchError, UnknownColumnNameError +from safeds.exceptions import UnknownColumnNameError @pytest.mark.parametrize( @@ -63,18 +63,6 @@ ValueError, r"At least one feature column must be specified.", ), - ( - { - "A": [1, 4], - "B": [2, 5], - "C": [3, 6], - "T": [0, 1, 2], - }, - "T", - ["A", "B"], - ColumnLengthMismatchError, - r"The length of at least one column differs: \nA: 2\nB: 2\nC: 2\nT: 3", - ), ], ids=[ "feature_does_not_exist", @@ -82,7 +70,6 @@ "target_and_feature_overlap", "features_are_empty-explicitly", "features_are_empty_implicitly", - "Columns do not match", ], ) def test_should_raise_error( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py index 2d4560f54..975dca9e9 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_keep_only_columns.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import IllegalSchemaModificationError, UnknownColumnNameError +from safeds.exceptions import IllegalSchemaModificationError from tests.helpers import assert_that_tagged_tables_are_equal @@ -129,15 +129,3 @@ def test_should_raise_illegal_schema_modification(table: TaggedTable, column_nam match=error_msg, ): table.keep_only_columns(column_names) - - -@pytest.mark.parametrize( - ("tagged_table", "error_msg"), - [ - (TaggedTable({"feature": [1], "target": [2]}, "target", ["feature"]), r"Could not find column\(s\) 'feat'"), - ], - ids=["unknown_column"], -) -def test_should_raise_error_if_column_name_unknown(tagged_table: TaggedTable, error_msg: str) -> None: - with pytest.raises(UnknownColumnNameError, match=error_msg): - tagged_table.keep_only_columns(["feat"]) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py index 7d4f47f42..519e570f8 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_remove_columns.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Table, TaggedTable -from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError, UnknownColumnNameError +from safeds.exceptions import ColumnIsTargetError, IllegalSchemaModificationError from tests.helpers import assert_that_tagged_tables_are_equal @@ -168,23 +168,12 @@ def test_should_remove_columns(table: TaggedTable, columns: list[str], expected: IllegalSchemaModificationError, r"Illegal schema modification: You cannot remove every feature column.", ), - ( - TaggedTable._from_table( - Table({"feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}), - "target", - ["feat"], - ), - ["feat", "feet"], - UnknownColumnNameError, - r"Could not find column\(s\) 'feet'", - ), ], ids=[ "remove_only_target", "remove_non_feat_and_target", "remove_all_features", "remove_non_feat_and_all_features", - "remove_unknown_column", ], ) def test_should_raise_in_remove_columns( diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py index 3446a764a..75ee76e34 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_rename_column.py @@ -1,6 +1,5 @@ import pytest from safeds.data.tabular.containers import TaggedTable -from safeds.exceptions import DuplicateColumnNameError, UnknownColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -85,57 +84,3 @@ def test_should_rename_column( ) -> None: new_table = original_table.rename_column(old_column_name, new_column_name) assert_that_tagged_tables_are_equal(new_table, result_table) - - -@pytest.mark.parametrize( - ("original_table", "old_column_name", "new_column_name", "result_table", "error_msg"), - [ - ( - TaggedTable({"feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}, "target", ["feat"]), - "feet", - "feature", - TaggedTable({"feature": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}, "target", ["feature"]), - r"Could not find column\(s\) 'feet'", - ), - ], - ids=["column_does_not_exist"], -) -def test_should_raise_if_old_column_does_not_exist( - original_table: TaggedTable, - old_column_name: str, - new_column_name: str, - result_table: TaggedTable, - error_msg: str, -) -> None: - with pytest.raises(UnknownColumnNameError, match=error_msg): - assert_that_tagged_tables_are_equal( - original_table.rename_column(old_column_name, new_column_name), - result_table, - ) - - -@pytest.mark.parametrize( - ("original_table", "old_column_name", "new_column_name", "result_table", "error_msg"), - [ - ( - TaggedTable({"feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}, "target", ["feat"]), - "feat", - "non-feat", - TaggedTable({"feat": [1, 2, 3], "non-feat": [4, 5, 6], "target": [7, 8, 9]}, "target", ["feat"]), - r"Column 'non-feat' already exists.", - ), - ], - ids=["column_already_exists"], -) -def test_should_raise_if_new_column_exists_already( - original_table: TaggedTable, - old_column_name: str, - new_column_name: str, - result_table: TaggedTable, - error_msg: str, -) -> None: - with pytest.raises(DuplicateColumnNameError, match=error_msg): - assert_that_tagged_tables_are_equal( - original_table.rename_column(old_column_name, new_column_name), - result_table, - ) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py index 296a8b36e..72b773adc 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_replace_column.py @@ -1,11 +1,6 @@ import pytest from safeds.data.tabular.containers import Column, TaggedTable -from safeds.exceptions import ( - ColumnSizeError, - DuplicateColumnNameError, - IllegalSchemaModificationError, - UnknownColumnNameError, -) +from safeds.exceptions import IllegalSchemaModificationError from tests.helpers import assert_that_tagged_tables_are_equal @@ -182,47 +177,3 @@ def test_should_throw_illegal_schema_modification( match='Target column "target_old" can only be replaced by exactly one new column.', ): original_table.replace_column(column_name_to_be_replaced, new_columns) - - -@pytest.mark.parametrize( - ("old_column_name", "column", "error", "error_message"), - [ - ( - "D", - [Column("C", ["d", "e", "f"])], - UnknownColumnNameError, - r"Could not find column\(s\) 'D'", - ), - ( - "target", - [Column("non-feat", ["d", "e", "f"]), Column("D", [3, 2, 1])], - DuplicateColumnNameError, - r"Column 'non-feat' already exists.", - ), - ( - "target", - [Column("D", [7, 8]), Column("E", ["c", "b"])], - ColumnSizeError, - r"Expected a column of size 3 but got column of size 2.", - ), - ], - ids=["UnknownColumnNameError", "DuplicateColumnNameError", "ColumnSizeError"], -) -def test_should_raise_error( - old_column_name: str, - column: list[Column], - error: type[Exception], - error_message: str, -) -> None: - input_table: TaggedTable = TaggedTable( - { - "feat": [1, 2, 3], - "non-feat": [4, 5, 6], - "target": ["a", "b", "c"], - }, - "target", - ["feat"], - ) - - with pytest.raises(error, match=error_message): - input_table.replace_column(old_column_name, column) diff --git a/tests/safeds/data/tabular/containers/_table/test_rename.py b/tests/safeds/data/tabular/containers/_table/test_rename_column.py similarity index 100% rename from tests/safeds/data/tabular/containers/_table/test_rename.py rename to tests/safeds/data/tabular/containers/_table/test_rename_column.py From 9e09ae761ac6a126d687eb7208276f747fb393cc Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 13 Jul 2023 15:32:32 +0000 Subject: [PATCH 5/9] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_tagged_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index bd132da73..2a14cd1d5 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -747,7 +747,7 @@ def replace_column(self, old_column_name: str, new_columns: list[Column]) -> Tag if old_column_name not in self.features.column_names else self.features.column_names[: self.features.column_names.index(old_column_name)] + [col.name for col in new_columns] - + self.features.column_names[self.features.column_names.index(old_column_name) + 1:] + + self.features.column_names[self.features.column_names.index(old_column_name) + 1 :] ), ) From 90d948ce1a0be6e4cc7884939020c05d28b764b1 Mon Sep 17 00:00:00 2001 From: jxnior01 Date: Thu, 13 Jul 2023 18:42:39 +0200 Subject: [PATCH 6/9] code coverage --- .../_table/_tagged_table/test_add_row.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 9f399b884..12afad0cf 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -60,3 +60,22 @@ def test_should_raise_an_error_if_row_schema_invalid( ) -> None: with pytest.raises(SchemaMismatchError, match=error_msg): tagged_table.add_row(row) + + +@pytest.mark.parametrize( + ("tagged_table", "row", "expected_table"), + [ + ( + TaggedTable({"feature": [], "target": []}, "target"), + Row({"feature": 2, "target": 5}), + TaggedTable({"feature": [2], "target": [5]}, "target"), + ), + ], + ids=["empty_feature_column"], +) +def test_should_add_row_to_empty_table( + tagged_table: TaggedTable, + row: Row, + expected_table: TaggedTable, +) -> None: + assert_that_tagged_tables_are_equal(tagged_table.add_row(row), expected_table) From fa8c47fa4fb8769fb29677f83d7c663ca2de5081 Mon Sep 17 00:00:00 2001 From: jxnior01 Date: Thu, 13 Jul 2023 18:51:17 +0200 Subject: [PATCH 7/9] code coverage --- src/safeds/data/tabular/containers/_tagged_table.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 2a14cd1d5..b9cb33958 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -427,8 +427,6 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable: SchemaMismatchError If the schema of on of the row does not match the table schema. """ - if isinstance(rows, Table): - rows = rows.to_rows() for row in rows: if self.number_of_rows == 0: if self.number_of_columns == 0: From d3699227f246e6764f9c5b9f1fc055847847ce17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Gr=C3=A9us?= Date: Thu, 13 Jul 2023 18:59:13 +0200 Subject: [PATCH 8/9] refactor: Removed unnecessary errors in `TaggedTable` as they are already raised in `Table` docs: Improved some docstrings in `Table` and `TaggedTable` refactor: Fixed the order of column names for `UnknownColumnError` in `Table.add_row` and `Table.add_rows` and improved consistency feat: Removed `SchemaMismatchError` as we don't need it --- src/safeds/data/tabular/containers/_table.py | 13 ++-- .../data/tabular/containers/_tagged_table.py | 62 +++---------------- src/safeds/exceptions/__init__.py | 2 - src/safeds/exceptions/_data.py | 7 --- .../_table/_tagged_table/test_add_row.py | 13 ++-- .../_table/_tagged_table/test_add_rows.py | 13 ++-- .../tabular/containers/_table/test_add_row.py | 7 ++- .../containers/_table/test_add_rows.py | 9 ++- 8 files changed, 36 insertions(+), 90 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index ae26a62bc..2e0e875da 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -833,7 +833,7 @@ def add_column(self, column: Column) -> Table: DuplicateColumnNameError If the new column already exists. ColumnSizeError - If the size of the column does not match the amount of rows. + If the size of the column does not match the number of rows. Examples -------- @@ -874,10 +874,10 @@ def add_columns(self, columns: list[Column] | Table) -> Table: Raises ------ - ColumnSizeError - If at least one of the column sizes from the provided column list does not match the table. DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. + ColumnSizeError + If at least one of the column sizes from the provided column list does not match the table. Examples -------- @@ -945,7 +945,7 @@ def add_row(self, row: Row) -> Table: if self.number_of_columns == 0: return Table.from_rows([row]) if len(set(self.column_names) - set(row.column_names)) > 0: - raise UnknownColumnNameError(list(set(self.column_names) - set(row.column_names))) + raise UnknownColumnNameError(sorted(set(self.column_names) - set(row.column_names), key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,)) if result.number_of_rows == 0: int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64 | np.int32), row.column_names)) @@ -998,16 +998,15 @@ def add_rows(self, rows: list[Row] | Table) -> Table: """ if isinstance(rows, Table): rows = rows.to_rows() - result = self._copy() if len(rows) == 0: return self._copy() different_column_names = set() for row in rows: - different_column_names.update(set(rows[0].column_names) - set(row.column_names)) + different_column_names.update(set(self.column_names) - set(row.column_names)) if len(different_column_names) > 0: - raise UnknownColumnNameError(list(different_column_names)) + raise UnknownColumnNameError(sorted(different_column_names, key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,)) result = self._copy() diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py index 2a14cd1d5..2f02d1ca6 100644 --- a/src/safeds/data/tabular/containers/_tagged_table.py +++ b/src/safeds/data/tabular/containers/_tagged_table.py @@ -3,15 +3,10 @@ import copy from typing import TYPE_CHECKING -import pandas as pd - from safeds.data.tabular.containers import Column, Row, Table from safeds.exceptions import ( ColumnIsTargetError, - ColumnSizeError, - DuplicateColumnNameError, IllegalSchemaModificationError, - SchemaMismatchError, UnknownColumnNameError, ) @@ -240,12 +235,6 @@ def add_column_as_feature(self, column: Column) -> TaggedTable: ColumnSizeError If the size of the column does not match the number of rows. """ - if column.name in self.column_names: - raise DuplicateColumnNameError(f"Column '{column.name}' already exists.") - - if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0: - raise ColumnSizeError(str(self.number_of_rows), str(column.number_of_rows)) - return TaggedTable._from_table( super().add_column(column), target_name=self.target.name, @@ -275,16 +264,6 @@ def add_columns_as_features(self, columns: list[Column] | Table) -> TaggedTable: ColumnSizeError If the size of any feature column does not match the number of rows. """ - if isinstance(columns, Table): - columns = columns.to_columns() - - for column in columns: - if column.name in self.column_names: - raise DuplicateColumnNameError(column.name) - - if column.number_of_rows != self.number_of_rows and self.number_of_columns != 0: - raise ColumnSizeError(str(self.number_of_rows), str(column.number_of_rows)) - return TaggedTable._from_table( super().add_columns(columns), target_name=self.target.name, @@ -336,7 +315,7 @@ def add_column(self, column: Column) -> TaggedTable: DuplicateColumnNameError If the new column already exists. ColumnSizeError - If the size of the column does not match the amount of rows. + If the size of the column does not match the number of rows. """ return TaggedTable._from_table( super().add_column(column), @@ -362,10 +341,10 @@ def add_columns(self, columns: list[Column] | Table) -> TaggedTable: Raises ------ - ColumnSizeError - If at least one of the column sizes from the provided column list does not match the table. DuplicateColumnNameError If at least one column name from the provided column list already exists in the table. + ColumnSizeError + If at least one of the column sizes from the provided column list does not match the table. """ return TaggedTable._from_table( super().add_columns(columns), @@ -391,19 +370,9 @@ def add_row(self, row: Row) -> TaggedTable: Raises ------ - SchemaMismatchError - If the schema of the row does not match the table schema. - """ - if self.number_of_rows == 0: - if self.number_of_columns == 0: - for column in row.column_names: - self._data[column] = Column(column, []) - self._schema = Table._from_pandas_dataframe(pd.DataFrame(columns=row.column_names))._schema - elif self.column_names != row.column_names: - raise SchemaMismatchError - elif self._schema != row.schema: - raise SchemaMismatchError - + UnknownColumnNameError + If the row has different column names than the table. + """ return TaggedTable._from_table(super().add_row(row), target_name=self.target.name) def add_rows(self, rows: list[Row] | Table) -> TaggedTable: @@ -424,22 +393,9 @@ def add_rows(self, rows: list[Row] | Table) -> TaggedTable: Raises ------ - SchemaMismatchError - If the schema of on of the row does not match the table schema. - """ - if isinstance(rows, Table): - rows = rows.to_rows() - for row in rows: - if self.number_of_rows == 0: - if self.number_of_columns == 0: - for column in row.column_names: - self._data[column] = Column(column, []) - self._schema = Table._from_pandas_dataframe(pd.DataFrame(columns=self.column_names))._schema - elif self.column_names != row.column_names: - raise SchemaMismatchError - elif self._schema != row.schema: - raise SchemaMismatchError - + UnknownColumnNameError + If at least one of the rows have different column names than the table. + """ return TaggedTable._from_table(super().add_rows(rows), target_name=self.target.name) def filter_rows(self, query: Callable[[Row], bool]) -> TaggedTable: diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py index 021736287..352334cf5 100644 --- a/src/safeds/exceptions/__init__.py +++ b/src/safeds/exceptions/__init__.py @@ -9,7 +9,6 @@ IndexOutOfBoundsError, MissingValuesColumnError, NonNumericColumnError, - SchemaMismatchError, TransformerNotFittedError, UnknownColumnNameError, ValueNotPresentWhenFittedError, @@ -43,7 +42,6 @@ "IndexOutOfBoundsError", "MissingValuesColumnError", "NonNumericColumnError", - "SchemaMismatchError", "TransformerNotFittedError", "UnknownColumnNameError", "ValueNotPresentWhenFittedError", diff --git a/src/safeds/exceptions/_data.py b/src/safeds/exceptions/_data.py index f11c7a334..2d2fb7880 100644 --- a/src/safeds/exceptions/_data.py +++ b/src/safeds/exceptions/_data.py @@ -93,13 +93,6 @@ def __init__(self, expected_size: str, actual_size: str): super().__init__(f"Expected a column of size {expected_size} but got column of size {actual_size}.") -class SchemaMismatchError(Exception): - """Exception raised when schemas are unequal.""" - - def __init__(self) -> None: - super().__init__("Failed because at least two schemas didn't match.") - - class ColumnLengthMismatchError(Exception): """Exception raised when the lengths of two or more columns do not match.""" diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py index 9f399b884..f4eff7403 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_row.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Row, TaggedTable -from safeds.exceptions import SchemaMismatchError +from safeds.exceptions import UnknownColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -40,23 +40,18 @@ def test_should_add_row(table: TaggedTable, row: Row, expected: TaggedTable) -> @pytest.mark.parametrize( ("tagged_table", "row", "error_msg"), [ - ( - TaggedTable({"feature": [1, 2, 3], "target": [4, 5, 6]}, "target", ["feature"]), - Row({"feature": "a", "target": 8}), - r"Failed because at least two schemas didn't match.", - ), ( TaggedTable({"feature": [], "target": []}, "target", ["feature"]), Row({"feat": None, "targ": None}), - r"Failed because at least two schemas didn't match.", + r"Could not find column\(s\) 'feature, target'", ), ], - ids=["invalid_schemas", "schemas_mismatch"], + ids=["columns_missing"], ) def test_should_raise_an_error_if_row_schema_invalid( tagged_table: TaggedTable, row: Row, error_msg: str, ) -> None: - with pytest.raises(SchemaMismatchError, match=error_msg): + with pytest.raises(UnknownColumnNameError, match=error_msg): tagged_table.add_row(row) diff --git a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py index c333a083a..31b614776 100644 --- a/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/_tagged_table/test_add_rows.py @@ -1,6 +1,6 @@ import pytest from safeds.data.tabular.containers import Row, Table, TaggedTable -from safeds.exceptions import SchemaMismatchError +from safeds.exceptions import UnknownColumnNameError from tests.helpers import assert_that_tagged_tables_are_equal @@ -43,23 +43,18 @@ def test_should_add_rows(table: TaggedTable, rows: list[Row], expected: TaggedTa @pytest.mark.parametrize( ("tagged_table", "rows", "error_msg"), [ - ( - TaggedTable({"feature": [1, 2, 3], "target": [4, 5, 6]}, "target", ["feature"]), - [Row({"feature": 7, "target": 8}), Row({"feature": "a", "target": 9})], - r"Failed because at least two schemas didn't match.", - ), ( TaggedTable({"feature": [], "target": []}, "target", ["feature"]), [Row({"feat": None, "targ": None}), Row({"targ": None, "feat": None})], - r"Failed because at least two schemas didn't match.", + r"Could not find column\(s\) 'feature, target'", ), ], - ids=["invalid_schemas", "schemas_mismatch"], + ids=["columns_missing"], ) def test_should_raise_an_error_if_rows_schemas_are_invalid( tagged_table: TaggedTable, rows: list[Row] | Table, error_msg: str, ) -> None: - with pytest.raises(SchemaMismatchError, match=error_msg): + with pytest.raises(UnknownColumnNameError, match=error_msg): tagged_table.add_rows(rows) diff --git a/tests/safeds/data/tabular/containers/_table/test_add_row.py b/tests/safeds/data/tabular/containers/_table/test_add_row.py index d64e47f41..ab5f81367 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_row.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_row.py @@ -62,8 +62,13 @@ def test_should_add_row(table: Table, row: Row, expected: Table, expected_schema Row({"col1": 5, "col3": "Hallo"}), r"Could not find column\(s\) 'col2'", ), + ( + Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), + Row({"col3": 5, "col4": "Hallo"}), + r"Could not find column\(s\) 'col1, col2'", + ), ], - ids=["unknown column col2 in row"], + ids=["unknown column col2 in row", "multiple columns missing"], ) def test_should_raise_error_if_row_column_names_invalid(table: Table, row: Row, expected_error_msg: str) -> None: with raises(UnknownColumnNameError, match=expected_error_msg): diff --git a/tests/safeds/data/tabular/containers/_table/test_add_rows.py b/tests/safeds/data/tabular/containers/_table/test_add_rows.py index b5cd8742a..76b1f345f 100644 --- a/tests/safeds/data/tabular/containers/_table/test_add_rows.py +++ b/tests/safeds/data/tabular/containers/_table/test_add_rows.py @@ -73,10 +73,15 @@ def test_should_add_rows_from_table(table1: Table, table2: Table, expected: Tabl ( Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), [Row({"col1": 2, "col3": 4}), Row({"col1": 5, "col2": "Hallo"})], - r"Could not find column\(s\) 'col3'", + r"Could not find column\(s\) 'col2'", + ), + ( + Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), + [Row({"col1": 2, "col3": 4}), Row({"colA": 5, "col2": "Hallo"})], + r"Could not find column\(s\) 'col1, col2'", ), ], - ids=["column names do not match"], + ids=["column names do not match", "multiple columns missing"], ) def test_should_raise_error_if_row_column_names_invalid(table: Table, rows: list[Row], expected_error_msg: str) -> None: with pytest.raises(UnknownColumnNameError, match=expected_error_msg): From 911721dfa048c3f8d9d2168891f151f63110522b Mon Sep 17 00:00:00 2001 From: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Date: Thu, 13 Jul 2023 17:02:26 +0000 Subject: [PATCH 9/9] style: apply automated linter fixes --- src/safeds/data/tabular/containers/_table.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py index 2e0e875da..0c9c1ee0d 100644 --- a/src/safeds/data/tabular/containers/_table.py +++ b/src/safeds/data/tabular/containers/_table.py @@ -945,7 +945,12 @@ def add_row(self, row: Row) -> Table: if self.number_of_columns == 0: return Table.from_rows([row]) if len(set(self.column_names) - set(row.column_names)) > 0: - raise UnknownColumnNameError(sorted(set(self.column_names) - set(row.column_names), key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,)) + raise UnknownColumnNameError( + sorted( + set(self.column_names) - set(row.column_names), + key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__, + ), + ) if result.number_of_rows == 0: int_columns = list(filter(lambda name: isinstance(row[name], int | np.int64 | np.int32), row.column_names)) @@ -1006,7 +1011,12 @@ def add_rows(self, rows: list[Row] | Table) -> Table: for row in rows: different_column_names.update(set(self.column_names) - set(row.column_names)) if len(different_column_names) > 0: - raise UnknownColumnNameError(sorted(different_column_names, key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__,)) + raise UnknownColumnNameError( + sorted( + different_column_names, + key={val: ix for ix, val in enumerate(self.column_names)}.__getitem__, + ), + ) result = self._copy()