Skip to content

Commit

Permalink
feat: improve ColumnTypes (#132)
Browse files Browse the repository at this point in the history
Closes #113.

### Summary of Changes

* Rename subclasses:
  | old| new |
  |--------|--------|
  | IntColumnType | Integer |
  | BooleanColumnType | Boolean |
  | FloatColumnType | RealNumber |
  | StringColumnType | String |
* Add new subclass `Anything` for columns that contain any values
* Add parameter `is_nullable` to all types to indicate that null values
are allowed
* Add tests

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
lars-reimann and github-actions[bot] authored Mar 31, 2023
1 parent e8cf7a6 commit 1786a87
Show file tree
Hide file tree
Showing 21 changed files with 383 additions and 126 deletions.
26 changes: 6 additions & 20 deletions src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,11 @@ class Column:
The type of the column. If not specified, the type will be inferred from the data.
"""

def __init__(
self, name: str, data: Iterable, type_: Optional[ColumnType] = None
) -> None:
def __init__(self, name: str, data: Iterable, type_: Optional[ColumnType] = None) -> None:
self._name: str = name
self._data: pd.Series = data if isinstance(data, pd.Series) else pd.Series(data)
self._type: ColumnType = (
type_
if type_ is not None
else ColumnType.from_numpy_dtype(self._data.dtype)
)
# noinspection PyProtectedMember
self._type: ColumnType = type_ if type_ is not None else ColumnType._from_numpy_dtype(self._data.dtype)

@property
def name(self) -> str:
Expand Down Expand Up @@ -239,10 +234,7 @@ def has_missing_values(self) -> bool:
missing_values_exist : bool
True if missing values exist.
"""
return self.any(
lambda value: value is None
or (isinstance(value, Number) and np.isnan(value))
)
return self.any(lambda value: value is None or (isinstance(value, Number) and np.isnan(value)))

def correlation_with(self, other_column: Column) -> float:
"""
Expand Down Expand Up @@ -293,9 +285,7 @@ def _ipython_display_(self) -> DisplayHandle:
tmp = self._data.to_frame()
tmp.columns = [self.name]

with pd.option_context(
"display.max_rows", tmp.shape[0], "display.max_columns", tmp.shape[1]
):
with pd.option_context("display.max_rows", tmp.shape[0], "display.max_columns", tmp.shape[1]):
return display(tmp)

def maximum(self) -> float:
Expand Down Expand Up @@ -493,11 +483,7 @@ def boxplot(self) -> None:
If the column contains non-numerical data or complex data.
"""
for data in self._data:
if (
not isinstance(data, int)
and not isinstance(data, float)
and not isinstance(data, complex)
):
if not isinstance(data, int) and not isinstance(data, float) and not isinstance(data, complex):
raise NonNumericColumnError(self.name)
if isinstance(data, complex):
raise TypeError(
Expand Down
9 changes: 1 addition & 8 deletions src/safeds/data/tabular/typing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,2 @@
from ._column_type import (
BooleanColumnType,
ColumnType,
FloatColumnType,
IntColumnType,
OptionalColumnType,
StringColumnType,
)
from ._column_type import Anything, Boolean, ColumnType, Integer, RealNumber, String
from ._table_schema import TableSchema
256 changes: 223 additions & 33 deletions src/safeds/data/tabular/typing/_column_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,40 @@

class ColumnType(ABC):
"""
Base type for Column, stored in TableSchema.
Abstract base class for column types.
"""

@abstractmethod
def is_nullable(self) -> bool:
"""
Return whether the given column type is nullable.
Returns
-------
is_nullable : bool
True if the column is nullable.
"""

@abstractmethod
def is_numeric(self) -> bool:
"""
Return whether the given column type is numeric.
Returns
-------
is_numeric : bool
True if the column is numeric.
"""
return False

@staticmethod
def from_numpy_dtype(_type: np.dtype) -> ColumnType:
def _from_numpy_dtype(dtype: np.dtype) -> ColumnType:
"""
Return the column type for a given numpy dtype.
Parameters
----------
_type : numpy.dtype
dtype : numpy.dtype
The numpy dtype.
Returns
-------
Expand All @@ -38,62 +51,239 @@ def from_numpy_dtype(_type: np.dtype) -> ColumnType:
Raises
-------
TypeError
If an unexpected column type is parsed.
If the given dtype is not supported.
"""
if _type.kind in ("u", "i"):
return IntColumnType()
if _type.kind == "b":
return BooleanColumnType()
if _type.kind == "f":
return FloatColumnType()
if _type.kind in ("S", "U", "O"):
return StringColumnType()
if dtype.kind in ("u", "i"):
return Integer()
if dtype.kind == "b":
return Boolean()
if dtype.kind == "f":
return RealNumber()
if dtype.kind in ("S", "U", "O", "M", "m"):
return String()
raise TypeError("Unexpected column type")


@dataclass
class IntColumnType(ColumnType):
def is_numeric(self) -> bool:
return True
class Anything(ColumnType):
"""
Type for a column that contains anything.
Parameters
----------
is_nullable : bool
Whether the type also allows null values.
"""

_is_nullable: bool

def __init__(self, is_nullable: bool = False):
self._is_nullable = is_nullable

def __repr__(self) -> str:
return "int"
result = "Anything"
if self._is_nullable:
result += "?"
return result

def is_nullable(self) -> bool:
"""
Return whether the given column type is nullable.
Returns
-------
is_nullable : bool
True if the column is nullable.
"""
return self._is_nullable

@dataclass
class BooleanColumnType(ColumnType):
def is_numeric(self) -> bool:
"""
Return whether the given column type is numeric.
Returns
-------
is_numeric : bool
True if the column is numeric.
"""
return False


@dataclass
class Boolean(ColumnType):
"""
Type for a column that only contains booleans.
Parameters
----------
is_nullable : bool
Whether the type also allows null values.
"""

_is_nullable: bool

def __init__(self, is_nullable: bool = False):
self._is_nullable = is_nullable

def __repr__(self) -> str:
return "bool"
result = "Boolean"
if self._is_nullable:
result += "?"
return result

def is_nullable(self) -> bool:
"""
Return whether the given column type is nullable.
Returns
-------
is_nullable : bool
True if the column is nullable.
"""
return self._is_nullable

def is_numeric(self) -> bool:
"""
Return whether the given column type is numeric.
Returns
-------
is_numeric : bool
True if the column is numeric.
"""
return False


@dataclass
class FloatColumnType(ColumnType):
class RealNumber(ColumnType):
"""
Type for a column that only contains real numbers.
Parameters
----------
is_nullable : bool
Whether the type also allows null values.
"""

_is_nullable: bool

def __init__(self, is_nullable: bool = False):
self._is_nullable = is_nullable

def __repr__(self) -> str:
result = "RealNumber"
if self._is_nullable:
result += "?"
return result

def is_nullable(self) -> bool:
"""
Return whether the given column type is nullable.
Returns
-------
is_nullable : bool
True if the column is nullable.
"""
return self._is_nullable

def is_numeric(self) -> bool:
"""
Return whether the given column type is numeric.
Returns
-------
is_numeric : bool
True if the column is numeric.
"""
return True


@dataclass
class Integer(ColumnType):
"""
Type for a column that only contains integers.
Parameters
----------
is_nullable : bool
Whether the type also allows null values.
"""

_is_nullable: bool

def __init__(self, is_nullable: bool = False):
self._is_nullable = is_nullable

def __repr__(self) -> str:
return "float"
result = "Integer"
if self._is_nullable:
result += "?"
return result

def is_nullable(self) -> bool:
"""
Return whether the given column type is nullable.
Returns
-------
is_nullable : bool
True if the column is nullable.
"""
return self._is_nullable

@dataclass
class StringColumnType(ColumnType):
def is_numeric(self) -> bool:
return False
"""
Return whether the given column type is numeric.
def __repr__(self) -> str:
return "string"
Returns
-------
is_numeric : bool
True if the column is numeric.
"""
return True


@dataclass
class OptionalColumnType(ColumnType):
_type: ColumnType
class String(ColumnType):
"""
Type for a column that only contains strings.
def is_numeric(self) -> bool:
return self._type.is_numeric()
Parameters
----------
is_nullable : bool
Whether the type also allows null values.
"""

_is_nullable: bool

def __init__(self, is_nullable: bool = False):
self._is_nullable = is_nullable

def __repr__(self) -> str:
return f"optional({self._type.__repr__()})"
result = "String"
if self._is_nullable:
result += "?"
return result

def is_nullable(self) -> bool:
"""
Return whether the given column type is nullable.
Returns
-------
is_nullable : bool
True if the column is nullable.
"""
return self._is_nullable

def is_numeric(self) -> bool:
"""
Return whether the given column type is numeric.
Returns
-------
is_numeric : bool
True if the column is numeric.
"""
return False
3 changes: 2 additions & 1 deletion src/safeds/data/tabular/typing/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def _from_dataframe(dataframe: pd.DataFrame) -> TableSchema:
"""

names = dataframe.columns
types = (ColumnType.from_numpy_dtype(dtype) for dtype in dataframe.dtypes)
# noinspection PyProtectedMember
types = (ColumnType._from_numpy_dtype(dtype) for dtype in dataframe.dtypes)

return TableSchema(dict(zip(names, types)))

Expand Down
Loading

0 comments on commit 1786a87

Please sign in to comment.