Skip to content

Commit

Permalink
feat: usable constructor for Table (#294)
Browse files Browse the repository at this point in the history
Closes #266.

### Summary of Changes

The constructor for `Table` now accepts a dictionary instead of a
`pandas.DataFrame` so we no longer need to "hide" it from users.
Internally, we can use `Table._from_pandas_dataframe` now to create a
`Table` from a `pandas.DataFrame`.

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
lars-reimann and megalinter-bot authored May 6, 2023
1 parent dcf2e6c commit 56a1fc4
Show file tree
Hide file tree
Showing 83 changed files with 443 additions and 412 deletions.
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/containers/_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def _from_pandas_dataframe(data: pd.DataFrame, schema: Schema | None = None) ->
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, data: Mapping[str, Any] | None = None):
def __init__(self, data: Mapping[str, Any] | None = None) -> None:
"""
Create a row from a mapping of column names to column values.
Expand Down
133 changes: 67 additions & 66 deletions src/safeds/data/tabular/containers/_table.py

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions src/safeds/data/tabular/containers/_tagged_table.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections.abc import Iterable
import pandas as pd

from safeds.data.tabular.containers import Column, Table
from safeds.data.tabular.typing import Schema
Expand All @@ -24,14 +24,16 @@ class TaggedTable(Table):
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

# noinspection PyMissingConstructor
def __init__(
self,
data: Iterable,
data: pd.DataFrame,
schema: Schema,
target_name: str,
feature_names: list[str] | None = None,
schema: Schema | None = None,
):
super().__init__(data, schema)
self._data = data
self._schema = schema

# If no feature names are specified, use all columns except the target column
if feature_names is None:
Expand Down
2 changes: 0 additions & 2 deletions src/safeds/data/tabular/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
ColumnSizeError,
DuplicateColumnNameError,
IndexOutOfBoundsError,
MissingDataError,
NonNumericColumnError,
SchemaMismatchError,
TransformerNotFittedError,
Expand All @@ -17,7 +16,6 @@
"ColumnSizeError",
"DuplicateColumnNameError",
"IndexOutOfBoundsError",
"MissingDataError",
"NonNumericColumnError",
"SchemaMismatchError",
"TransformerNotFittedError",
Expand Down
7 changes: 0 additions & 7 deletions src/safeds/data/tabular/exceptions/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,6 @@ def __init__(self, column_info: str):
super().__init__(f"The length of at least one column differs: \n{column_info}")


class MissingDataError(Exception):
"""Exception raised if a function is not given enough data to succeed."""

def __init__(self, missing_data_info: str):
super().__init__(f"The function is missing data: \n{missing_data_info}")


class TransformerNotFittedError(Exception):
"""Raised when a transformer is used before fitting it."""

Expand Down
2 changes: 1 addition & 1 deletion src/safeds/data/tabular/transformation/_imputer.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def transform(self, table: Table) -> Table:
self._wrapped_transformer.transform(data[self._column_names]),
columns=self._column_names,
)
return Table(data, table.schema)
return Table._from_pandas_dataframe(data, table.schema)

def is_fitted(self) -> bool:
"""
Expand Down
4 changes: 2 additions & 2 deletions src/safeds/data/tabular/transformation/_label_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def transform(self, table: Table) -> Table:
data = table._data.copy()
data.columns = table.column_names
data[self._column_names] = self._wrapped_transformer.transform(data[self._column_names])
return Table(data)
return Table._from_pandas_dataframe(data)

def inverse_transform(self, transformed_table: Table) -> Table:
"""
Expand Down Expand Up @@ -114,7 +114,7 @@ def inverse_transform(self, transformed_table: Table) -> Table:
data = transformed_table._data.copy()
data.columns = transformed_table.column_names
data[self._column_names] = self._wrapped_transformer.inverse_transform(data[self._column_names])
return Table(data)
return Table._from_pandas_dataframe(data)

def is_fitted(self) -> bool:
"""
Expand Down
4 changes: 2 additions & 2 deletions src/safeds/data/tabular/transformation/_one_hot_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def transform(self, table: Table) -> Table:

unchanged = original.drop(self._column_names.keys(), axis=1)

res = Table(pd.concat([unchanged, one_hot_encoded], axis=1))
res = Table._from_pandas_dataframe(pd.concat([unchanged, one_hot_encoded], axis=1))
column_names = []

for name in table.column_names:
Expand Down Expand Up @@ -150,7 +150,7 @@ def inverse_transform(self, transformed_table: Table) -> Table:
)
unchanged = data.drop(self._wrapped_transformer.get_feature_names_out(), axis=1)

res = Table(pd.concat([unchanged, decoded], axis=1))
res = Table._from_pandas_dataframe(pd.concat([unchanged, decoded], axis=1))
column_names = [
(
name
Expand Down
18 changes: 18 additions & 0 deletions src/safeds/data/tabular/transformation/_table_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,24 @@ def fit_and_transform(self, table: Table, column_names: list[str] | None = None)
class InvertibleTableTransformer(TableTransformer):
"""A `TableTransformer` that can also undo the learned transformation after it has been applied."""

@abstractmethod
def fit(self, table: Table, column_names: list[str] | None) -> InvertibleTableTransformer:
"""
Learn a transformation for a set of columns in a table.
Parameters
----------
table : Table
The table used to fit the transformer.
column_names : Optional[list[str]]
The list of columns from the table used to fit the transformer. If `None`, all columns are used.
Returns
-------
fitted_transformer : InvertibleTableTransformer
The fitted transformer.
"""

@abstractmethod
def inverse_transform(self, transformed_table: Table) -> Table:
"""
Expand Down
5 changes: 4 additions & 1 deletion src/safeds/ml/classical/_util_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ def predict(model: Any, dataset: Table, feature_names: list[str] | None, target_
warnings.filterwarnings("ignore", message="X does not have valid feature names")
predicted_target_vector = model.predict(dataset_df.values)
result_set[target_name] = predicted_target_vector
return Table(result_set).tag_columns(target_name=target_name, feature_names=feature_names)
return Table._from_pandas_dataframe(result_set).tag_columns(
target_name=target_name,
feature_names=feature_names,
)
except ValueError as exception:
raise PredictionError(str(exception)) from exception
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@


def test_should_match_snapshot() -> None:
table = Table.from_dict({"A": [1, 2, 3]})
table = Table({"A": [1, 2, 3]})
table.get_column("A").plot_boxplot()
current = table.get_column("A").plot_boxplot()
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_boxplot.png"))
assert snapshot._image.tobytes() == current._image.tobytes()


def test_should_raise_if_column_contains_non_numerical_values() -> None:
table = Table.from_dict({"A": [1, 2, "A"]})
table = Table({"A": [1, 2, "A"]})
with pytest.raises(NonNumericColumnError):
table.get_column("A").plot_boxplot()
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@


def test_should_match_snapshot_numeric() -> None:
table = Table.from_dict({"A": [1, 2, 3]})
table = Table({"A": [1, 2, 3]})
current = table.get_column("A").plot_histogram()
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_histogram_numeric.png"))
assert snapshot._image.tobytes() == current._image.tobytes()


def test_should_match_snapshot_str() -> None:
table = Table.from_dict({"A": ["A", "B", "Apple"]})
table = Table({"A": ["A", "B", "Apple"]})
current = table.get_column("A").plot_histogram()
snapshot = Image.from_png_file(resolve_resource_path("./image/snapshot_histogram_str.png"))
assert snapshot._image.tobytes() == current._image.tobytes()
12 changes: 6 additions & 6 deletions tests/safeds/data/tabular/containers/_table/test_add_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@
("table1", "column", "expected"),
[
(
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Column("col3", ["a", "b", "c"]),
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": ["a", "b", "c"]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": ["a", "b", "c"]}),
),
(
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Column("col3", [0, -1, -2]),
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2]}),
),
],
ids=["String", "Integer"],
Expand All @@ -25,12 +25,12 @@ def test_should_add_column(table1: Table, column: Column, expected: Table) -> No


def test_should_raise_error_if_column_name_exists() -> None:
table1 = Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]})
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
with pytest.raises(DuplicateColumnNameError):
table1.add_column(Column("col1", ["a", "b", "c"]))


def test_should_raise_error_if_column_size_invalid() -> None:
table1 = Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]})
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
with pytest.raises(ColumnSizeError):
table1.add_column(Column("col3", ["a", "b", "c", "d"]))
14 changes: 7 additions & 7 deletions tests/safeds/data/tabular/containers/_table/test_add_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
("table1", "columns", "expected"),
[
(
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
[Column("col3", [0, -1, -2]), Column("col4", ["a", "b", "c"])],
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
),
],
ids=["add 2 columns"],
Expand All @@ -23,9 +23,9 @@ def test_should_add_columns(table1: Table, columns: list[Column], expected: Tabl
("table1", "table2", "expected"),
[
(
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table.from_dict({"col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table({"col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4], "col3": [0, -1, -2], "col4": ["a", "b", "c"]}),
),
],
ids=["add a table with 2 columns"],
Expand All @@ -39,7 +39,7 @@ def test_should_add_columns_from_table(table1: Table, table2: Table, expected: T
("table", "columns"),
[
(
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
[Column("col3", ["a", "b", "c", "d"]), Column("col4", ["e", "f", "g", "h"])],
),
],
Expand All @@ -54,7 +54,7 @@ def test_should_raise_error_if_column_size_invalid(table: Table, columns: list[C
("table", "columns"),
[
(
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
[Column("col2", ["a", "b", "c"]), Column("col3", [2, 3, 4])],
),
],
Expand Down
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_table/test_add_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
@pytest.mark.parametrize(
("table", "row"),
[
(Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Row({"col1": 5, "col2": 6})),
(Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}), Row({"col1": 5, "col2": 6})),
],
ids=["added row"],
)
Expand All @@ -19,7 +19,7 @@ def test_should_add_row(table: Table, row: Row) -> None:


def test_should_raise_error_if_row_schema_invalid() -> None:
table1 = Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]})
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
row = Row({"col1": 5, "col2": "Hallo"})
with raises(SchemaMismatchError):
table1.add_row(row)
12 changes: 6 additions & 6 deletions tests/safeds/data/tabular/containers/_table/test_add_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
("table1", "rows", "table2"),
[
(
Table.from_dict({"col1": ["a", "b", "c"], "col2": [1, 2, 4]}),
Table({"col1": ["a", "b", "c"], "col2": [1, 2, 4]}),
[Row({"col1": "d", "col2": 6}), Row({"col1": "e", "col2": 8})],
Table.from_dict({"col1": ["a", "b", "c", "d", "e"], "col2": [1, 2, 4, 6, 8]}),
Table({"col1": ["a", "b", "c", "d", "e"], "col2": [1, 2, 4, 6, 8]}),
),
],
ids=["Rows with string and integer values"],
Expand All @@ -23,9 +23,9 @@ def test_should_add_rows(table1: Table, rows: list[Row], table2: Table) -> None:
("table1", "table2", "expected"),
[
(
Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table.from_dict({"col1": [5, 7], "col2": [6, 8]}),
Table.from_dict({"col1": [1, 2, 1, 5, 7], "col2": [1, 2, 4, 6, 8]}),
Table({"col1": [1, 2, 1], "col2": [1, 2, 4]}),
Table({"col1": [5, 7], "col2": [6, 8]}),
Table({"col1": [1, 2, 1, 5, 7], "col2": [1, 2, 4, 6, 8]}),
),
],
ids=["Rows from table"],
Expand All @@ -36,7 +36,7 @@ def test_should_add_rows_from_table(table1: Table, table2: Table, expected: Tabl


def test_should_raise_error_if_row_schema_invalid() -> None:
table1 = Table.from_dict({"col1": [1, 2, 1], "col2": [1, 2, 4]})
table1 = Table({"col1": [1, 2, 1], "col2": [1, 2, 4]})
row = [Row({"col1": 2, "col2": 4}), Row({"col1": 5, "col2": "Hallo"})]
with pytest.raises(SchemaMismatchError):
table1.add_rows(row)
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
@pytest.mark.parametrize(
("table", "expected"),
[
(Table.from_dict({"col1": [1], "col2": [1]}), ["col1", "col2"]),
(Table([]), []),
(Table({"col1": [1], "col2": [1]}), ["col1", "col2"]),
(Table(), []),
],
ids=["Integer", "empty"],
)
Expand Down
8 changes: 4 additions & 4 deletions tests/safeds/data/tabular/containers/_table/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
@pytest.mark.parametrize(
"table",
[
Table([]),
Table.from_dict({"a": [1, 2], "b": [3, 4]}),
Table(),
Table({"a": [1, 2], "b": [3, 4]}),
],
ids=[
"empty",
Expand All @@ -16,12 +16,12 @@
)
def test_should_restore_table_from_exchange_object(table: Table) -> None:
exchange_object = table.__dataframe__()
restored = Table(from_dataframe(exchange_object))
restored = Table._from_pandas_dataframe(from_dataframe(exchange_object))

assert restored == table


def test_should_raise_error_if_allow_copy_is_false() -> None:
table = Table.from_dict({})
table = Table()
with pytest.raises(NotImplementedError, match="`allow_copy` must be True"):
table.__dataframe__(allow_copy=False)
16 changes: 8 additions & 8 deletions tests/safeds/data/tabular/containers/_table/test_eq.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
@pytest.mark.parametrize(
("table1", "table2", "expected"),
[
(Table({}), Table({}), True),
(Table.from_dict({"col1": [1]}), Table.from_dict({"col1": [1]}), True),
(Table.from_dict({"col1": [1]}), Table.from_dict({"col2": [1]}), False),
(Table.from_dict({"col1": [1, 2, 3]}), Table.from_dict({"col1": [1, 1, 3]}), False),
(Table.from_dict({"col1": [1, 2, 3]}), Table.from_dict({"col1": ["1", "2", "3"]}), False),
(Table(), Table(), True),
(Table({"col1": [1]}), Table({"col1": [1]}), True),
(Table({"col1": [1]}), Table({"col2": [1]}), False),
(Table({"col1": [1, 2, 3]}), Table({"col1": [1, 1, 3]}), False),
(Table({"col1": [1, 2, 3]}), Table({"col1": ["1", "2", "3"]}), False),
],
ids=[
"empty Table",
Expand All @@ -27,7 +27,7 @@ def test_should_return_whether_two_tables_are_equal(table1: Table, table2: Table

@pytest.mark.parametrize(
"table",
[Table.from_dict({}), Table.from_dict({"col1": [1]})],
[Table(), Table({"col1": [1]})],
ids=[
"empty",
"non-empty",
Expand All @@ -40,8 +40,8 @@ def test_should_return_true_if_objects_are_identical(table: Table) -> None:
@pytest.mark.parametrize(
("table", "other"),
[
(Table.from_dict({"col1": [1]}), None),
(Table.from_dict({"col1": [1]}), Row()),
(Table({"col1": [1]}), None),
(Table({"col1": [1]}), Row()),
],
ids=[
"Table vs. None",
Expand Down
Loading

0 comments on commit 56a1fc4

Please sign in to comment.