feat: convert Schema to dict and format it nicely in a notebook (#…

…244) Closes #151. ### Summary of Changes * Add method `to_dict` to `Schema` to convert a `Schema` to a `dict` * Nicely format `Schema` in Jupyter notebook * Add examples to docstrings --------- Co-authored-by: megalinter-bot <[email protected]>
Safe-DS · Apr 22, 2023 · ad1cac5 · ad1cac5
1 parent 683c279
commit ad1cac5
Show file tree

Hide file tree

Showing 3 changed files with 173 additions and 1 deletion.
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,6 +45,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.pytest.ini_options]
 filterwarnings = [
+    "ignore:Deprecated call to `pkg_resources.declare_namespace",
     "ignore:Jupyter is migrating its paths to use standard platformdirs"
 ]
 

diff --git a/src/safeds/data/tabular/typing/_schema.py b/src/safeds/data/tabular/typing/_schema.py
@@ -19,10 +19,19 @@ class Schema:
     ----------
     schema : dict[str, ColumnType]
         Map from column names to data types.
+
+    Examples
+    --------
+    >>> from safeds.data.tabular.typing import Integer, Schema, String
+    >>> schema = Schema({"A": Integer(), "B": String()})
     """
 
     _schema: dict[str, ColumnType]
 
+    # ------------------------------------------------------------------------------------------------------------------
+    # Creation
+    # ------------------------------------------------------------------------------------------------------------------
+
     @staticmethod
     def _from_pandas_dataframe(dataframe: pd.DataFrame) -> Schema:
         """
@@ -44,6 +53,10 @@ def _from_pandas_dataframe(dataframe: pd.DataFrame) -> Schema:
 
         return Schema(dict(zip(names, types, strict=True)))
 
+    # ------------------------------------------------------------------------------------------------------------------
+    # Dunder methods
+    # ------------------------------------------------------------------------------------------------------------------
+
     def __init__(self, schema: dict[str, ColumnType]):
         self._schema = dict(schema)  # Defensive copy
 
@@ -55,11 +68,35 @@ def __hash__(self) -> int:
         -------
         hash : int
             The hash value.
+
+        Examples
+        --------
+        >>> from safeds.data.tabular.typing import Integer, Schema, String
+        >>> schema = Schema({"A": Integer(), "B": String()})
+        >>> hash_value = hash(schema)
         """
         column_names = self._schema.keys()
         column_types = map(repr, self._schema.values())
         return hash(tuple(zip(column_names, column_types, strict=True)))
 
+    def __repr__(self) -> str:
+        """
+        Return an unambiguous string representation of this row.
+
+        Returns
+        -------
+        representation : str
+            The string representation.
+
+        Examples
+        --------
+        >>> from safeds.data.tabular.typing import Integer, Schema, String
+        >>> schema = Schema({"A": Integer()})
+        >>> repr(schema)
+        "Schema({'A': Integer})"
+        """
+        return f"Schema({str(self)})"
+
     def __str__(self) -> str:
         """
         Return a user-friendly string representation of the schema.
@@ -68,6 +105,13 @@ def __str__(self) -> str:
         -------
         string : str
             The string representation.
+
+        Examples
+        --------
+        >>> from safeds.data.tabular.typing import Integer, Schema, String
+        >>> schema = Schema({"A": Integer()})
+        >>> str(schema)
+        "{'A': Integer}"
         """
         match len(self._schema):
             case 0:
@@ -88,6 +132,13 @@ def column_names(self) -> list[str]:
         -------
         column_names : list[str]
             The column names.
+
+        Examples
+        --------
+        >>> from safeds.data.tabular.typing import Integer, Schema, String
+        >>> schema = Schema({"A": Integer(), "B": String()})
+        >>> schema.column_names
+        ['A', 'B']
         """
         return list(self._schema.keys())
 
@@ -104,6 +155,16 @@ def has_column(self, column_name: str) -> bool:
         -------
         contains : bool
             True if the schema contains the column.
+
+        Examples
+        --------
+        >>> from safeds.data.tabular.typing import Integer, Schema, String
+        >>> schema = Schema({"A": Integer(), "B": String()})
+        >>> schema.has_column("A")
+        True
+
+        >>> schema.has_column("C")
+        False
         """
         return column_name in self._schema
 
@@ -125,11 +186,64 @@ def get_column_type(self, column_name: str) -> ColumnType:
         ------
         ColumnNameError
             If the specified column name does not exist.
+
+        Examples
+        --------
+        >>> from safeds.data.tabular.typing import Integer, Schema, String
+        >>> schema = Schema({"A": Integer(), "B": String()})
+        >>> schema.get_column_type("A")
+        Integer
         """
         if not self.has_column(column_name):
             raise UnknownColumnNameError([column_name])
         return self._schema[column_name]
 
+    # ------------------------------------------------------------------------------------------------------------------
+    # Conversion
+    # ------------------------------------------------------------------------------------------------------------------
+
+    def to_dict(self) -> dict[str, ColumnType]:
+        """
+        Return a dictionary that maps column names to column types.
+
+        Returns
+        -------
+        data : dict[str, ColumnType]
+            Dictionary representation of the schema.
+
+        Examples
+        --------
+        >>> from safeds.data.tabular.typing import Integer, Schema, String
+        >>> schema = Schema({"A": Integer(), "B": String()})
+        >>> schema.to_dict()
+        {'A': Integer, 'B': String}
+        """
+        return dict(self._schema)  # defensive copy
+
+    # ------------------------------------------------------------------------------------------------------------------
+    # IPython Integration
+    # ------------------------------------------------------------------------------------------------------------------
+
+    def _repr_markdown_(self) -> str:
+        """
+        Return a Markdown representation of the schema.
+
+        Returns
+        -------
+        markdown : str
+            The Markdown representation.
+        """
+        if len(self._schema) == 0:
+            return "Empty Schema"
+
+        lines = (f"| {name} | {type_} |" for name, type_ in self._schema.items())
+        joined = "\n".join(lines)
+        return f"| Column Name | Column Type |\n| --- | --- |\n{joined}"
+
+    # ------------------------------------------------------------------------------------------------------------------
+    # Other
+    # ------------------------------------------------------------------------------------------------------------------
+
     def _get_column_index(self, column_name: str) -> int:
         """
          Return the index of the column with specified column name.

diff --git a/tests/safeds/data/tabular/typing/test_schema.py b/tests/safeds/data/tabular/typing/test_schema.py
@@ -48,6 +48,24 @@ def test_should_create_schema_from_pandas_dataframe(self, dataframe: pd.DataFram
         assert Schema._from_pandas_dataframe(dataframe) == expected
 
 
+class TestRepr:
+    @pytest.mark.parametrize(
+        ("schema", "expected"),
+        [
+            (Schema({}), "Schema({})"),
+            (Schema({"A": Integer()}), "Schema({'A': Integer})"),
+            (Schema({"A": Integer(), "B": String()}), "Schema({\n    'A': Integer,\n    'B': String\n})"),
+        ],
+        ids=[
+            "empty",
+            "single column",
+            "multiple columns",
+        ],
+    )
+    def test_should_create_a_string_representation(self, schema: Schema, expected: str) -> None:
+        assert repr(schema) == expected
+
+
 class TestStr:
     @pytest.mark.parametrize(
         ("schema", "expected"),
@@ -62,7 +80,7 @@ class TestStr:
             "multiple columns",
         ],
     )
-    def test_should_create_a_printable_representation(self, schema: Schema, expected: str) -> None:
+    def test_should_create_a_string_representation(self, schema: Schema, expected: str) -> None:
         assert str(schema) == expected
 
 
@@ -213,3 +231,42 @@ def test_should_raise_if_column_does_not_exist(self) -> None:
         schema = Schema({"A": Integer()})
         with pytest.raises(UnknownColumnNameError):
             schema._get_column_index("B")
+
+
+class TestToDict:
+    @pytest.mark.parametrize(
+        ("schema", "expected"),
+        [
+            (Schema({}), {}),
+            (Schema({"A": Integer()}), {"A": Integer()}),
+            (Schema({"A": Integer(), "B": String()}), {"A": Integer(), "B": String()}),
+        ],
+        ids=[
+            "empty",
+            "single column",
+            "multiple columns",
+        ],
+    )
+    def test_should_return_dict_for_schema(self, schema: Schema, expected: str) -> None:
+        assert schema.to_dict() == expected
+
+
+class TestReprMarkdown:
+    @pytest.mark.parametrize(
+        ("schema", "expected"),
+        [
+            (Schema({}), "Empty Schema"),
+            (Schema({"A": Integer()}), "| Column Name | Column Type |\n| --- | --- |\n| A | Integer |"),
+            (
+                Schema({"A": Integer(), "B": String()}),
+                "| Column Name | Column Type |\n| --- | --- |\n| A | Integer |\n| B | String |",
+            ),
+        ],
+        ids=[
+            "empty",
+            "single column",
+            "multiple columns",
+        ],
+    )
+    def test_should_create_a_string_representation(self, schema: Schema, expected: str) -> None:
+        assert schema._repr_markdown_() == expected