From 74eea1f995d03732d14da16d4393e1d61ad33625 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Tue, 21 Mar 2023 22:32:56 +0100 Subject: [PATCH] feat: make `Column` and `Row` iterable (#55) Closes #47. ### Summary of Changes * Add `__iter__` method to `Column` and `Row` to iterate over the values: * Iterating over a `Column` returns the values. * Iterating over a `Row` returns the column names, as specified in the [documenetation of `__iter__`](https://docs.python.org/3/reference/datamodel.html#object.__iter__). * Add `__len__` method to `Column` and `Row` to compute their length. * Change superclasses of exceptions as needed for [`__getitem__`](https://docs.python.org/3/reference/datamodel.html#object.__getitem__): * Change superclass of `IndexOutOfBoundsError` to `IndexError`. * Change superclass of `UnknownColumnNameError` to `KeyError`. --------- Co-authored-by: lars-reimann --- src/safeds/data/tabular/_column.py | 58 ++++++++++--------- src/safeds/data/tabular/_row.py | 17 ++++++ src/safeds/exceptions/_data_exceptions.py | 4 +- .../data/tabular/_column/test_column.py | 2 +- .../safeds/data/tabular/_column/test_count.py | 7 +-- .../safeds/data/tabular/_column/test_iter.py | 6 ++ tests/safeds/data/tabular/_column/test_len.py | 6 ++ tests/safeds/data/tabular/_row/test_count.py | 12 ++++ tests/safeds/data/tabular/_row/test_iter.py | 12 ++++ tests/safeds/data/tabular/_row/test_len.py | 12 ++++ .../{_column => _table}/test_from_columns.py | 0 .../{_row => _table}/test_from_rows.py | 0 .../{_column => _table}/test_to_columns.py | 0 .../tabular/{_row => _table}/test_to_rows.py | 0 14 files changed, 102 insertions(+), 34 deletions(-) create mode 100644 tests/safeds/data/tabular/_column/test_iter.py create mode 100644 tests/safeds/data/tabular/_column/test_len.py create mode 100644 tests/safeds/data/tabular/_row/test_count.py create mode 100644 tests/safeds/data/tabular/_row/test_iter.py create mode 100644 tests/safeds/data/tabular/_row/test_len.py rename tests/safeds/data/tabular/{_column => _table}/test_from_columns.py (100%) rename tests/safeds/data/tabular/{_row => _table}/test_from_rows.py (100%) rename tests/safeds/data/tabular/{_column => _table}/test_to_columns.py (100%) rename tests/safeds/data/tabular/{_row => _table}/test_to_rows.py (100%) diff --git a/src/safeds/data/tabular/_column.py b/src/safeds/data/tabular/_column.py index 3d9c7ac85..a0f90d8d1 100644 --- a/src/safeds/data/tabular/_column.py +++ b/src/safeds/data/tabular/_column.py @@ -2,7 +2,7 @@ import typing from numbers import Number -from typing import Any, Callable +from typing import Any, Callable, Iterator import numpy as np import pandas as pd @@ -34,6 +34,10 @@ def name(self) -> str: """ return self._name + @property + def statistics(self) -> ColumnStatistics: + return ColumnStatistics(self) + @property def type(self) -> ColumnType: """ @@ -46,9 +50,35 @@ def type(self) -> ColumnType: """ return self._type + def __eq__(self, other: object) -> bool: + if not isinstance(other, Column): + return NotImplemented + if self is other: + return True + return self._data.equals(other._data) and self.name == other.name + def __getitem__(self, index: int) -> Any: return self.get_value(index) + def __hash__(self) -> int: + return hash(self._data) + + def __iter__(self) -> Iterator[Any]: + return iter(self._data) + + def __len__(self) -> int: + return len(self._data) + + def __repr__(self) -> str: + tmp = self._data.to_frame() + tmp.columns = [self.name] + return tmp.__repr__() + + def __str__(self) -> str: + tmp = self._data.to_frame() + tmp.columns = [self.name] + return tmp.__str__() + def get_value(self, index: int) -> Any: """ Return column value at specified index, starting at 0. @@ -73,10 +103,6 @@ def get_value(self, index: int) -> Any: return self._data[index] - @property - def statistics(self) -> ColumnStatistics: - return ColumnStatistics(self) - def count(self) -> int: """ Return the number of elements in the column. @@ -223,26 +249,6 @@ def get_unique_values(self) -> list[typing.Any]: """ return list(self._data.unique()) - def __eq__(self, other: object) -> bool: - if not isinstance(other, Column): - return NotImplemented - if self is other: - return True - return self._data.equals(other._data) and self.name == other.name - - def __hash__(self) -> int: - return hash(self._data) - - def __str__(self) -> str: - tmp = self._data.to_frame() - tmp.columns = [self.name] - return tmp.__str__() - - def __repr__(self) -> str: - tmp = self._data.to_frame() - tmp.columns = [self.name] - return tmp.__repr__() - def _ipython_display_(self) -> DisplayHandle: """ Return a display object for the column to be used in Jupyter Notebooks. @@ -378,7 +384,6 @@ def sum(self) -> float: return self._column._data.sum() def variance(self) -> float: - """ Return the variance of the column. The column has to be numerical. @@ -401,7 +406,6 @@ def variance(self) -> float: return self._column._data.var() def standard_deviation(self) -> float: - """ Return the standard deviation of the column. The column has to be numerical. diff --git a/src/safeds/data/tabular/_row.py b/src/safeds/data/tabular/_row.py index a7951357c..9dbbeec8c 100644 --- a/src/safeds/data/tabular/_row.py +++ b/src/safeds/data/tabular/_row.py @@ -16,6 +16,12 @@ def __init__(self, data: typing.Iterable, schema: TableSchema): def __getitem__(self, column_name: str) -> Any: return self.get_value(column_name) + def __iter__(self) -> typing.Iterator[Any]: + return iter(self.get_column_names()) + + def __len__(self) -> int: + return len(self._data) + def get_value(self, column_name: str) -> Any: """ Return the value of a specified column. @@ -34,6 +40,17 @@ def get_value(self, column_name: str) -> Any: raise UnknownColumnNameError([column_name]) return self._data[self.schema._get_column_index_by_name(column_name)] + def count(self) -> int: + """ + Return the number of columns in this row. + + Returns + ------- + count : int + The number of columns. + """ + return len(self._data) + def has_column(self, column_name: str) -> bool: """ Return whether the row contains a given column. diff --git a/src/safeds/exceptions/_data_exceptions.py b/src/safeds/exceptions/_data_exceptions.py index 42f62fd63..a2b1916a5 100644 --- a/src/safeds/exceptions/_data_exceptions.py +++ b/src/safeds/exceptions/_data_exceptions.py @@ -1,4 +1,4 @@ -class UnknownColumnNameError(Exception): +class UnknownColumnNameError(KeyError): """ Exception raised for trying to access an invalid column name. @@ -37,7 +37,7 @@ def __init__(self, column_name: str): super().__init__(f"Column '{column_name}' already exists.") -class IndexOutOfBoundsError(Exception): +class IndexOutOfBoundsError(IndexError): """ Exception raised for trying to access an element by an index that does not exist in the underlying data. diff --git a/tests/safeds/data/tabular/_column/test_column.py b/tests/safeds/data/tabular/_column/test_column.py index 4c8811705..fe9ef660d 100644 --- a/tests/safeds/data/tabular/_column/test_column.py +++ b/tests/safeds/data/tabular/_column/test_column.py @@ -9,7 +9,7 @@ def test_from_columns() -> None: assert column1._type == column2._type -def negative_test_from_columns() -> None: +def test_from_columns_negative() -> None: column1 = Column(pd.Series([1, 4]), "A") column2 = Column(pd.Series(["2", "5"]), "B") diff --git a/tests/safeds/data/tabular/_column/test_count.py b/tests/safeds/data/tabular/_column/test_count.py index 2828b65de..bbaadcd29 100644 --- a/tests/safeds/data/tabular/_column/test_count.py +++ b/tests/safeds/data/tabular/_column/test_count.py @@ -1,7 +1,6 @@ -import pandas as pd -from safeds.data.tabular import Table +from safeds.data.tabular import Column def test_count_valid() -> None: - table = Table(pd.DataFrame(data={"col1": [1, 2, 3, 4, 5], "col2": [2, 3, 4, 5, 6]})) - assert table.get_column("col1").count() == 5 + column = Column([1, 2, 3, 4, 5], "col1") + assert column.count() == 5 diff --git a/tests/safeds/data/tabular/_column/test_iter.py b/tests/safeds/data/tabular/_column/test_iter.py new file mode 100644 index 000000000..7a3276d2f --- /dev/null +++ b/tests/safeds/data/tabular/_column/test_iter.py @@ -0,0 +1,6 @@ +from safeds.data.tabular import Column + + +def test_iter() -> None: + column = Column([0, "1"], "testColumn") + assert list(column) == [0, "1"] diff --git a/tests/safeds/data/tabular/_column/test_len.py b/tests/safeds/data/tabular/_column/test_len.py new file mode 100644 index 000000000..1a43a0429 --- /dev/null +++ b/tests/safeds/data/tabular/_column/test_len.py @@ -0,0 +1,6 @@ +from safeds.data.tabular import Column + + +def test_count_valid() -> None: + column = Column([1, 2, 3, 4, 5], "col1") + assert len(column) == 5 diff --git a/tests/safeds/data/tabular/_row/test_count.py b/tests/safeds/data/tabular/_row/test_count.py new file mode 100644 index 000000000..6950b7e48 --- /dev/null +++ b/tests/safeds/data/tabular/_row/test_count.py @@ -0,0 +1,12 @@ +from safeds.data.tabular import Row +from safeds.data.tabular.typing import IntColumnType, StringColumnType, TableSchema + + +def test_count() -> None: + row = Row( + [0, "1"], + TableSchema( + {"testColumn1": IntColumnType(), "testColumn2": StringColumnType()} + ), + ) + assert row.count() == 2 diff --git a/tests/safeds/data/tabular/_row/test_iter.py b/tests/safeds/data/tabular/_row/test_iter.py new file mode 100644 index 000000000..8449fe2f5 --- /dev/null +++ b/tests/safeds/data/tabular/_row/test_iter.py @@ -0,0 +1,12 @@ +from safeds.data.tabular import Row +from safeds.data.tabular.typing import IntColumnType, StringColumnType, TableSchema + + +def test_iter() -> None: + row = Row( + [0, "1"], + TableSchema( + {"testColumn1": IntColumnType(), "testColumn2": StringColumnType()} + ), + ) + assert list(row) == ["testColumn1", "testColumn2"] diff --git a/tests/safeds/data/tabular/_row/test_len.py b/tests/safeds/data/tabular/_row/test_len.py new file mode 100644 index 000000000..3b1f6945b --- /dev/null +++ b/tests/safeds/data/tabular/_row/test_len.py @@ -0,0 +1,12 @@ +from safeds.data.tabular import Row +from safeds.data.tabular.typing import IntColumnType, StringColumnType, TableSchema + + +def test_count() -> None: + row = Row( + [0, "1"], + TableSchema( + {"testColumn1": IntColumnType(), "testColumn2": StringColumnType()} + ), + ) + assert len(row) == 2 diff --git a/tests/safeds/data/tabular/_column/test_from_columns.py b/tests/safeds/data/tabular/_table/test_from_columns.py similarity index 100% rename from tests/safeds/data/tabular/_column/test_from_columns.py rename to tests/safeds/data/tabular/_table/test_from_columns.py diff --git a/tests/safeds/data/tabular/_row/test_from_rows.py b/tests/safeds/data/tabular/_table/test_from_rows.py similarity index 100% rename from tests/safeds/data/tabular/_row/test_from_rows.py rename to tests/safeds/data/tabular/_table/test_from_rows.py diff --git a/tests/safeds/data/tabular/_column/test_to_columns.py b/tests/safeds/data/tabular/_table/test_to_columns.py similarity index 100% rename from tests/safeds/data/tabular/_column/test_to_columns.py rename to tests/safeds/data/tabular/_table/test_to_columns.py diff --git a/tests/safeds/data/tabular/_row/test_to_rows.py b/tests/safeds/data/tabular/_table/test_to_rows.py similarity index 100% rename from tests/safeds/data/tabular/_row/test_to_rows.py rename to tests/safeds/data/tabular/_table/test_to_rows.py