Skip to content

Commit

Permalink
feat: convert Schema to dict and format it nicely in a notebook (#…
Browse files Browse the repository at this point in the history
…244)

Closes #151.

### Summary of Changes

* Add method `to_dict` to `Schema` to convert a `Schema` to a `dict`
* Nicely format `Schema` in Jupyter notebook
* Add examples to docstrings

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
lars-reimann and megalinter-bot authored Apr 22, 2023
1 parent 683c279 commit ad1cac5
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 1 deletion.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
filterwarnings = [
"ignore:Deprecated call to `pkg_resources.declare_namespace",
"ignore:Jupyter is migrating its paths to use standard platformdirs"
]

Expand Down
114 changes: 114 additions & 0 deletions src/safeds/data/tabular/typing/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,19 @@ class Schema:
----------
schema : dict[str, ColumnType]
Map from column names to data types.
Examples
--------
>>> from safeds.data.tabular.typing import Integer, Schema, String
>>> schema = Schema({"A": Integer(), "B": String()})
"""

_schema: dict[str, ColumnType]

# ------------------------------------------------------------------------------------------------------------------
# Creation
# ------------------------------------------------------------------------------------------------------------------

@staticmethod
def _from_pandas_dataframe(dataframe: pd.DataFrame) -> Schema:
"""
Expand All @@ -44,6 +53,10 @@ def _from_pandas_dataframe(dataframe: pd.DataFrame) -> Schema:

return Schema(dict(zip(names, types, strict=True)))

# ------------------------------------------------------------------------------------------------------------------
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, schema: dict[str, ColumnType]):
self._schema = dict(schema) # Defensive copy

Expand All @@ -55,11 +68,35 @@ def __hash__(self) -> int:
-------
hash : int
The hash value.
Examples
--------
>>> from safeds.data.tabular.typing import Integer, Schema, String
>>> schema = Schema({"A": Integer(), "B": String()})
>>> hash_value = hash(schema)
"""
column_names = self._schema.keys()
column_types = map(repr, self._schema.values())
return hash(tuple(zip(column_names, column_types, strict=True)))

def __repr__(self) -> str:
"""
Return an unambiguous string representation of this row.
Returns
-------
representation : str
The string representation.
Examples
--------
>>> from safeds.data.tabular.typing import Integer, Schema, String
>>> schema = Schema({"A": Integer()})
>>> repr(schema)
"Schema({'A': Integer})"
"""
return f"Schema({str(self)})"

def __str__(self) -> str:
"""
Return a user-friendly string representation of the schema.
Expand All @@ -68,6 +105,13 @@ def __str__(self) -> str:
-------
string : str
The string representation.
Examples
--------
>>> from safeds.data.tabular.typing import Integer, Schema, String
>>> schema = Schema({"A": Integer()})
>>> str(schema)
"{'A': Integer}"
"""
match len(self._schema):
case 0:
Expand All @@ -88,6 +132,13 @@ def column_names(self) -> list[str]:
-------
column_names : list[str]
The column names.
Examples
--------
>>> from safeds.data.tabular.typing import Integer, Schema, String
>>> schema = Schema({"A": Integer(), "B": String()})
>>> schema.column_names
['A', 'B']
"""
return list(self._schema.keys())

Expand All @@ -104,6 +155,16 @@ def has_column(self, column_name: str) -> bool:
-------
contains : bool
True if the schema contains the column.
Examples
--------
>>> from safeds.data.tabular.typing import Integer, Schema, String
>>> schema = Schema({"A": Integer(), "B": String()})
>>> schema.has_column("A")
True
>>> schema.has_column("C")
False
"""
return column_name in self._schema

Expand All @@ -125,11 +186,64 @@ def get_column_type(self, column_name: str) -> ColumnType:
------
ColumnNameError
If the specified column name does not exist.
Examples
--------
>>> from safeds.data.tabular.typing import Integer, Schema, String
>>> schema = Schema({"A": Integer(), "B": String()})
>>> schema.get_column_type("A")
Integer
"""
if not self.has_column(column_name):
raise UnknownColumnNameError([column_name])
return self._schema[column_name]

# ------------------------------------------------------------------------------------------------------------------
# Conversion
# ------------------------------------------------------------------------------------------------------------------

def to_dict(self) -> dict[str, ColumnType]:
"""
Return a dictionary that maps column names to column types.
Returns
-------
data : dict[str, ColumnType]
Dictionary representation of the schema.
Examples
--------
>>> from safeds.data.tabular.typing import Integer, Schema, String
>>> schema = Schema({"A": Integer(), "B": String()})
>>> schema.to_dict()
{'A': Integer, 'B': String}
"""
return dict(self._schema) # defensive copy

# ------------------------------------------------------------------------------------------------------------------
# IPython Integration
# ------------------------------------------------------------------------------------------------------------------

def _repr_markdown_(self) -> str:
"""
Return a Markdown representation of the schema.
Returns
-------
markdown : str
The Markdown representation.
"""
if len(self._schema) == 0:
return "Empty Schema"

lines = (f"| {name} | {type_} |" for name, type_ in self._schema.items())
joined = "\n".join(lines)
return f"| Column Name | Column Type |\n| --- | --- |\n{joined}"

# ------------------------------------------------------------------------------------------------------------------
# Other
# ------------------------------------------------------------------------------------------------------------------

def _get_column_index(self, column_name: str) -> int:
"""
Return the index of the column with specified column name.
Expand Down
59 changes: 58 additions & 1 deletion tests/safeds/data/tabular/typing/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,24 @@ def test_should_create_schema_from_pandas_dataframe(self, dataframe: pd.DataFram
assert Schema._from_pandas_dataframe(dataframe) == expected


class TestRepr:
@pytest.mark.parametrize(
("schema", "expected"),
[
(Schema({}), "Schema({})"),
(Schema({"A": Integer()}), "Schema({'A': Integer})"),
(Schema({"A": Integer(), "B": String()}), "Schema({\n 'A': Integer,\n 'B': String\n})"),
],
ids=[
"empty",
"single column",
"multiple columns",
],
)
def test_should_create_a_string_representation(self, schema: Schema, expected: str) -> None:
assert repr(schema) == expected


class TestStr:
@pytest.mark.parametrize(
("schema", "expected"),
Expand All @@ -62,7 +80,7 @@ class TestStr:
"multiple columns",
],
)
def test_should_create_a_printable_representation(self, schema: Schema, expected: str) -> None:
def test_should_create_a_string_representation(self, schema: Schema, expected: str) -> None:
assert str(schema) == expected


Expand Down Expand Up @@ -213,3 +231,42 @@ def test_should_raise_if_column_does_not_exist(self) -> None:
schema = Schema({"A": Integer()})
with pytest.raises(UnknownColumnNameError):
schema._get_column_index("B")


class TestToDict:
@pytest.mark.parametrize(
("schema", "expected"),
[
(Schema({}), {}),
(Schema({"A": Integer()}), {"A": Integer()}),
(Schema({"A": Integer(), "B": String()}), {"A": Integer(), "B": String()}),
],
ids=[
"empty",
"single column",
"multiple columns",
],
)
def test_should_return_dict_for_schema(self, schema: Schema, expected: str) -> None:
assert schema.to_dict() == expected


class TestReprMarkdown:
@pytest.mark.parametrize(
("schema", "expected"),
[
(Schema({}), "Empty Schema"),
(Schema({"A": Integer()}), "| Column Name | Column Type |\n| --- | --- |\n| A | Integer |"),
(
Schema({"A": Integer(), "B": String()}),
"| Column Name | Column Type |\n| --- | --- |\n| A | Integer |\n| B | String |",
),
],
ids=[
"empty",
"single column",
"multiple columns",
],
)
def test_should_create_a_string_representation(self, schema: Schema, expected: str) -> None:
assert schema._repr_markdown_() == expected

0 comments on commit ad1cac5

Please sign in to comment.