join together duplicate entries in the text repr (#7225)

* group the variable names of common indexes * implement option 1: separate with just newlines * implement option 2: list-like presentation of the indexes * implement option 3: light box components in front of the coord names * implement option 4: light box components after the coord names * fix the repr of `Indexes` * only add combine markers for xarray indexes * select option 3 Support for the box components should be common enough that we can use it (and the worst that can happen is that those characters are displayed with a replacement glyph – usually a diamond with a question mark) * improve typing Co-authored-by: Illviljan <[email protected]> * whats-new.rst * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * simplify the visual hint construction * improve the existing index repr test * check that the hints are constructed correctly * convert the names to tuples * revert the typing of `index` * more conversion to tuple * type hint the instance variable * whats-new entry * don't type-check the monkeypatching * adjust the typing, again * use a subclass instead of monkeypatching and type-ignores --------- Co-authored-by: Illviljan <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Deepak Cherian <[email protected]>
pydata · Jul 20, 2023 · 6b1ff6d · 6b1ff6d
1 parent 1d8e92b
commit 6b1ff6d
Show file tree

Hide file tree

Showing 4 changed files with 95 additions and 31 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -21,6 +21,9 @@ v2023.07.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
+
+- Visually group together coordinates with the same indexes in the index section of the text repr (:pull:`7225`).
+  By `Justus Magin <https://github.com/keewis>`_.
 - Allow creating Xarray objects where a multidimensional variable shares its name
   with a dimension. Examples include output from finite volume models like FVCOM.
   (:issue:`2233`, :pull:`7989`)

diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
@@ -424,21 +424,37 @@ def inline_index_repr(index, max_width=None):
 
 
 def summarize_index(
-    name: Hashable, index, col_width: int, max_width: int | None = None
-):
+    names: tuple[Hashable, ...],
+    index,
+    col_width: int,
+    max_width: int | None = None,
+) -> str:
     if max_width is None:
         max_width = OPTIONS["display_width"]
 
-    preformatted = pretty_print(f"    {name} ", col_width)
+    def prefixes(length: int) -> list[str]:
+        if length in (0, 1):
+            return [" "]
+
+        return ["┌"] + ["│"] * max(length - 2, 0) + ["└"]
 
-    index_width = max_width - len(preformatted)
+    preformatted = [
+        pretty_print(f"  {prefix} {name}", col_width)
+        for prefix, name in zip(prefixes(len(names)), names)
+    ]
+
+    head, *tail = preformatted
+    index_width = max_width - len(head)
     repr_ = inline_index_repr(index, max_width=index_width)
-    return preformatted + repr_
+    return "\n".join([head + repr_] + [line.rstrip() for line in tail])
 
 
-def nondefault_indexes(indexes):
+def filter_nondefault_indexes(indexes, filter_indexes: bool):
     from xarray.core.indexes import PandasIndex, PandasMultiIndex
 
+    if not filter_indexes:
+        return indexes
+
     default_indexes = (PandasIndex, PandasMultiIndex)
 
     return {
@@ -448,7 +464,9 @@ def nondefault_indexes(indexes):
     }
 
 
-def indexes_repr(indexes, col_width=None, max_rows=None):
+def indexes_repr(indexes, max_rows: int | None = None) -> str:
+    col_width = _calculate_col_width(chain.from_iterable(indexes))
+
     return _mapping_repr(
         indexes,
         "Indexes",
@@ -599,6 +617,12 @@ def short_data_repr(array):
         return f"[{array.size} values with dtype={array.dtype}]"
 
 
+def _get_indexes_dict(indexes):
+    return {
+        tuple(index_vars.keys()): idx for idx, index_vars in indexes.group_by_index()
+    }
+
+
 @recursive_repr("<recursive array>")
 def array_repr(arr):
     from xarray.core.variable import Variable
@@ -643,15 +667,13 @@ def array_repr(arr):
         display_default_indexes = _get_boolean_with_default(
             "display_default_indexes", False
         )
-        if display_default_indexes:
-            xindexes = arr.xindexes
-        else:
-            xindexes = nondefault_indexes(arr.xindexes)
+
+        xindexes = filter_nondefault_indexes(
+            _get_indexes_dict(arr.xindexes), not display_default_indexes
+        )
 
         if xindexes:
-            summary.append(
-                indexes_repr(xindexes, col_width=col_width, max_rows=max_rows)
-            )
+            summary.append(indexes_repr(xindexes, max_rows=max_rows))
 
     if arr.attrs:
         summary.append(attrs_repr(arr.attrs, max_rows=max_rows))
@@ -682,12 +704,11 @@ def dataset_repr(ds):
     display_default_indexes = _get_boolean_with_default(
         "display_default_indexes", False
     )
-    if display_default_indexes:
-        xindexes = ds.xindexes
-    else:
-        xindexes = nondefault_indexes(ds.xindexes)
+    xindexes = filter_nondefault_indexes(
+        _get_indexes_dict(ds.xindexes), not display_default_indexes
+    )
     if xindexes:
-        summary.append(indexes_repr(xindexes, col_width=col_width, max_rows=max_rows))
+        summary.append(indexes_repr(xindexes, max_rows=max_rows))
 
     if ds.attrs:
         summary.append(attrs_repr(ds.attrs, max_rows=max_rows))

diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
@@ -1621,7 +1621,8 @@ def __getitem__(self, key) -> T_PandasOrXarrayIndex:
         return self._indexes[key]
 
     def __repr__(self):
-        return formatting.indexes_repr(self)
+        indexes = formatting._get_indexes_dict(self)
+        return formatting.indexes_repr(indexes)
 
 
 def default_indexes(

diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py
@@ -218,31 +218,70 @@ def test_attribute_repr(self) -> None:
         assert "\n" not in newlines
         assert "\t" not in tabs
 
-    def test_index_repr(self):
+    def test_index_repr(self) -> None:
         from xarray.core.indexes import Index
 
         class CustomIndex(Index):
-            def __init__(self, names):
+            names: tuple[str, ...]
+
+            def __init__(self, names: tuple[str, ...]):
                 self.names = names
 
             def __repr__(self):
                 return f"CustomIndex(coords={self.names})"
 
-        coord_names = ["x", "y"]
+        coord_names = ("x", "y")
         index = CustomIndex(coord_names)
-        name = "x"
+        names = ("x",)
 
-        normal = formatting.summarize_index(name, index, col_width=20)
-        assert name in normal
+        normal = formatting.summarize_index(names, index, col_width=20)
+        assert names[0] in normal
+        assert len(normal.splitlines()) == len(names)
         assert "CustomIndex" in normal
 
-        CustomIndex._repr_inline_ = (
-            lambda self, max_width: f"CustomIndex[{', '.join(self.names)}]"
-        )
-        inline = formatting.summarize_index(name, index, col_width=20)
-        assert name in inline
+        class IndexWithInlineRepr(CustomIndex):
+            def _repr_inline_(self, max_width: int):
+                return f"CustomIndex[{', '.join(self.names)}]"
+
+        index = IndexWithInlineRepr(coord_names)
+        inline = formatting.summarize_index(names, index, col_width=20)
+        assert names[0] in inline
         assert index._repr_inline_(max_width=40) in inline
 
+    @pytest.mark.parametrize(
+        "names",
+        (
+            ("x",),
+            ("x", "y"),
+            ("x", "y", "z"),
+            ("x", "y", "z", "a"),
+        ),
+    )
+    def test_index_repr_grouping(self, names) -> None:
+        from xarray.core.indexes import Index
+
+        class CustomIndex(Index):
+            def __init__(self, names):
+                self.names = names
+
+            def __repr__(self):
+                return f"CustomIndex(coords={self.names})"
+
+        index = CustomIndex(names)
+
+        normal = formatting.summarize_index(names, index, col_width=20)
+        assert all(name in normal for name in names)
+        assert len(normal.splitlines()) == len(names)
+        assert "CustomIndex" in normal
+
+        hint_chars = [line[2] for line in normal.splitlines()]
+
+        if len(names) <= 1:
+            assert hint_chars == [" "]
+        else:
+            assert hint_chars[0] == "┌" and hint_chars[-1] == "└"
+            assert len(names) == 2 or hint_chars[1:-1] == ["│"] * (len(names) - 2)
+
     def test_diff_array_repr(self) -> None:
         da_a = xr.DataArray(
             np.array([[1, 2, 3], [4, 5, 6]], dtype="int64"),