Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make Column a subclass of Sequence #245

Merged
merged 11 commits into from
Apr 22, 2023
1 change: 1 addition & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
exclude_lines =
pragma: no cover
if\s+(typing\.)?TYPE_CHECKING:
\.\.\.
2 changes: 2 additions & 0 deletions .github/linters/.ruff.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ ignore = [
"FBT002",
# builtin-attribute-shadowing (not an issue)
"A003",
# implicit-return (can add a return even though all cases are covered)
"RET503",
# superfluous-else-return (sometimes it's more readable)
"RET505",
# superfluous-else-raise (sometimes it's more readable)
Expand Down
124 changes: 101 additions & 23 deletions src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

import io
from collections.abc import Sequence
from numbers import Number
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, TypeVar, overload

import matplotlib.pyplot as plt
import numpy as np
Expand All @@ -20,32 +21,90 @@
from safeds.data.tabular.typing import ColumnType

if TYPE_CHECKING:
from collections.abc import Callable, Iterable, Iterator
from collections.abc import Callable, Iterator

_T = TypeVar("_T")

class Column:

class Column(Sequence[_T]):
"""
A column is a named collection of values.

Parameters
----------
name : str
The name of the column.
data : Iterable
data : Sequence[_T]
The data.
type_ : Optional[ColumnType]
The type of the column. If not specified, the type will be inferred from the data.

Examples
--------
>>> from safeds.data.tabular.containers import Column
>>> column = Column("test", [1, 2, 3])
"""

# ------------------------------------------------------------------------------------------------------------------
# Creation
# ------------------------------------------------------------------------------------------------------------------

@staticmethod
def _from_pandas_series(data: pd.Series, type_: ColumnType | None = None) -> Column:
"""
Create a column from a `pandas.Series`.

Parameters
----------
data : pd.Series
The data.
type_ : ColumnType | None
The type. If None, the type is inferred from the data.

Returns
-------
column : Column
The created column.

Examples
--------
>>> import pandas as pd
>>> from safeds.data.tabular.containers import Column
>>> column = Column._from_pandas_series(pd.Series([1, 2, 3], name="test"))
"""
result = object.__new__(Column)
result._name = data.name
result._data = data
# noinspection PyProtectedMember
result._type = type_ if type_ is not None else ColumnType._from_numpy_data_type(data.dtype)

return result

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, name: str, data: Iterable, type_: ColumnType | None = None) -> None:
def __init__(self, name: str, data: Sequence[_T]) -> None:
"""
Create a column.

Parameters
----------
name : str
The name of the column.
data : Sequence[_T]
The data.

Examples
--------
>>> from safeds.data.tabular.containers import Column
>>> column = Column("test", [1, 2, 3])
"""
self._name: str = name
self._data: pd.Series = data if isinstance(data, pd.Series) else pd.Series(data)
# noinspection PyProtectedMember
self._type: ColumnType = type_ if type_ is not None else ColumnType._from_numpy_data_type(self._data.dtype)
self._type: ColumnType = ColumnType._from_numpy_data_type(self._data.dtype)

def __contains__(self, item: Any) -> bool:
return item in self._data

def __eq__(self, other: object) -> bool:
if not isinstance(other, Column):
Expand All @@ -54,10 +113,29 @@ def __eq__(self, other: object) -> bool:
return True
return self.name == other.name and self._data.equals(other._data)

def __getitem__(self, index: int) -> Any:
return self.get_value(index)

def __iter__(self) -> Iterator[Any]:
@overload
def __getitem__(self, index: int) -> _T:
...

@overload
def __getitem__(self, index: slice) -> Column[_T]:
...

def __getitem__(self, index: int | slice) -> _T | Column[_T]:
if isinstance(index, int):
if index < 0 or index >= self._data.size:
raise IndexOutOfBoundsError(index)
return self._data[index]

if isinstance(index, slice):
if index.start < 0 or index.start > self._data.size:
raise IndexOutOfBoundsError(index)
if index.stop < 0 or index.stop > self._data.size:
raise IndexOutOfBoundsError(index)
data = self._data[index].reset_index(drop=True).rename(self.name)
return Column._from_pandas_series(data, self._type)

def __iter__(self) -> Iterator[_T]:
return iter(self._data)

def __len__(self) -> int:
Expand Down Expand Up @@ -117,18 +195,18 @@ def type(self) -> ColumnType:
# Getters
# ------------------------------------------------------------------------------------------------------------------

def get_unique_values(self) -> list[Any]:
def get_unique_values(self) -> list[_T]:
"""
Return a list of all unique values in the column.

Returns
-------
unique_values : list[any]
unique_values : list[_T]
List of unique values in the column.
"""
return list(self._data.unique())

def get_value(self, index: int) -> Any:
def get_value(self, index: int) -> _T:
"""
Return column value at specified index, starting at 0.

Expand Down Expand Up @@ -156,13 +234,13 @@ def get_value(self, index: int) -> Any:
# Information
# ------------------------------------------------------------------------------------------------------------------

def all(self, predicate: Callable[[Any], bool]) -> bool:
def all(self, predicate: Callable[[_T], bool]) -> bool:
"""
Check if all values have a given property.

Parameters
----------
predicate : Callable[[Any], bool])
predicate : Callable[[_T], bool])
Callable that is used to find matches.

Returns
Expand All @@ -173,13 +251,13 @@ def all(self, predicate: Callable[[Any], bool]) -> bool:
"""
return all(predicate(value) for value in self._data)

def any(self, predicate: Callable[[Any], bool]) -> bool:
def any(self, predicate: Callable[[_T], bool]) -> bool:
"""
Check if any value has a given property.

Parameters
----------
predicate : Callable[[Any], bool])
predicate : Callable[[_T], bool])
Callable that is used to find matches.

Returns
Expand All @@ -190,13 +268,13 @@ def any(self, predicate: Callable[[Any], bool]) -> bool:
"""
return any(predicate(value) for value in self._data)

def none(self, predicate: Callable[[Any], bool]) -> bool:
def none(self, predicate: Callable[[_T], bool]) -> bool:
"""
Check if no values has a given property.

Parameters
----------
predicate : Callable[[Any], bool])
predicate : Callable[[_T], bool])
Callable that is used to find matches.

Returns
Expand Down Expand Up @@ -236,7 +314,7 @@ def rename(self, new_name: str) -> Column:
column : Column
A new column with the new name.
"""
return Column(new_name, self._data, self._type)
return Column._from_pandas_series(self._data.rename(new_name), self._type)

# ------------------------------------------------------------------------------------------------------------------
# Statistics
Expand Down Expand Up @@ -375,7 +453,7 @@ def missing_value_ratio(self) -> float:
raise ColumnSizeError("> 0", "0")
return self._count_missing_values() / self._data.size

def mode(self) -> Any:
def mode(self) -> list[_T]:
"""
Return the mode of the column.

Expand Down
16 changes: 7 additions & 9 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,15 +338,13 @@ def get_column(self, column_name: str) -> Column:
UnknownColumnNameError
If the specified target column name does not exist.
"""
if self._schema.has_column(column_name):
output_column = Column(
column_name,
self._data.iloc[:, [self._schema._get_column_index(column_name)]].squeeze(),
self._schema.get_column_type(column_name),
)
return output_column
if not self.has_column(column_name):
raise UnknownColumnNameError([column_name])

raise UnknownColumnNameError([column_name])
return Column._from_pandas_series(
self._data[column_name],
self.get_column_type(column_name),
)

def has_column(self, column_name: str) -> bool:
"""
Expand Down Expand Up @@ -980,7 +978,7 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tabl
"""
if self.has_column(name):
items: list = [transformer(item) for item in self.to_rows()]
result: Column = Column(name, pd.Series(items))
result: Column = Column(name, items)
return self.replace_column(name, result)
raise UnknownColumnNameError([name])

Expand Down
4 changes: 2 additions & 2 deletions src/safeds/data/tabular/exceptions/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ class IndexOutOfBoundsError(IndexError):

Parameters
----------
index : int
index : int | slice
The wrongly used index.
"""

def __init__(self, index: int):
def __init__(self, index: int | slice):
super().__init__(f"There is no element at index '{index}'.")


Expand Down
28 changes: 0 additions & 28 deletions src/safeds/data/tabular/typing/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,31 +239,3 @@ def _repr_markdown_(self) -> str:
lines = (f"| {name} | {type_} |" for name, type_ in self._schema.items())
joined = "\n".join(lines)
return f"| Column Name | Column Type |\n| --- | --- |\n{joined}"

# ------------------------------------------------------------------------------------------------------------------
# Other
# ------------------------------------------------------------------------------------------------------------------

def _get_column_index(self, column_name: str) -> int:
"""
Return the index of the column with specified column name.

Parameters
----------
column_name : str
The name of the column.

Returns
-------
index : int
The index of the column.

Raises
------
ColumnNameError
If the specified column name does not exist.
"""
if not self.has_column(column_name):
raise UnknownColumnNameError([column_name])

return list(self._schema.keys()).index(column_name)
19 changes: 0 additions & 19 deletions tests/safeds/data/tabular/containers/_column/test_getitem.py

This file was deleted.

Loading