From 793f7fd8df31ff91f7125c60bd8244bf989c703c Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Thu, 26 Mar 2020 21:19:19 -0400 Subject: [PATCH 1/2] limit repr of arrays containing long strings --- xarray/core/formatting.py | 34 ++++++++++++++++++++++++--------- xarray/tests/test_formatting.py | 25 +++++++++++++++++------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 89246ff228d..534d253ecc8 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -4,6 +4,7 @@ import functools from datetime import datetime, timedelta from itertools import zip_longest +from typing import Hashable import numpy as np import pandas as pd @@ -14,7 +15,7 @@ from .pycompat import dask_array_type, sparse_array_type -def pretty_print(x, numchars): +def pretty_print(x, numchars: int): """Given an object `x`, call `str(x)` and format the returned string so that it is numchars long, padding with trailing spaces or truncating with ellipses as necessary @@ -163,7 +164,7 @@ def format_items(x): return formatted -def format_array_flat(array, max_width): +def format_array_flat(array, max_width: int): """Return a formatted string for as many items in the flattened version of array that will fit within max_width characters. """ @@ -198,11 +199,20 @@ def format_array_flat(array, max_width): num_back = count - num_front # note that num_back is 0 <--> array.size is 0 or 1 # <--> relevant_back_items is [] - pprint_str = ( - " ".join(relevant_front_items[:num_front]) - + padding - + " ".join(relevant_back_items[-num_back:]) + pprint_str = "".join( + [ + " ".join(relevant_front_items[:num_front]), + padding, + " ".join(relevant_back_items[-num_back:]), + ] ) + + # As a final check, if it's still too long even with the limit in values, + # replace the end with an ellipsis + # NB: this will still returns a full 3-character ellipsis when max_width < 3 + if len(pprint_str) > max_width: + pprint_str = pprint_str[: max(max_width - 3, 0)] + "..." + return pprint_str @@ -258,10 +268,16 @@ def inline_variable_array_repr(var, max_width): return "..." -def summarize_variable(name, var, col_width, marker=" ", max_width=None): +def summarize_variable( + name: Hashable, var, col_width: int, marker: str = " ", max_width: int = None +): """Summarize a variable in one line, e.g., for the Dataset.__repr__.""" if max_width is None: - max_width = OPTIONS["display_width"] + max_width_options = OPTIONS["display_width"] + if not isinstance(max_width_options, int): + raise TypeError(f"`max_width` value of `{max_width}` is not a valid int") + else: + max_width = max_width_options first_col = pretty_print(f" {marker} {name} ", col_width) if var.dims: dims_str = "({}) ".format(", ".join(map(str, var.dims))) @@ -295,7 +311,7 @@ def summarize_datavar(name, var, col_width): return summarize_variable(name, var.variable, col_width) -def summarize_coord(name, var, col_width): +def summarize_coord(name: Hashable, var, col_width: int): is_index = name in var.dims marker = "*" if is_index else " " if is_index: diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 61ecf46b79b..6881c0bc0ff 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -115,7 +115,7 @@ def test_format_items(self): def test_format_array_flat(self): actual = formatting.format_array_flat(np.arange(100), 2) - expected = "0 ... 99" + expected = "..." assert expected == actual actual = formatting.format_array_flat(np.arange(100), 9) @@ -134,11 +134,13 @@ def test_format_array_flat(self): expected = "0 1 2 ... 98 99" assert expected == actual + # NB: Probably not ideal; an alternative would be cutting after the + # first ellipsis actual = formatting.format_array_flat(np.arange(100.0), 11) - expected = "0.0 ... 99.0" + expected = "0.0 ... ..." assert expected == actual - actual = formatting.format_array_flat(np.arange(100.0), 1) + actual = formatting.format_array_flat(np.arange(100.0), 12) expected = "0.0 ... 99.0" assert expected == actual @@ -154,16 +156,25 @@ def test_format_array_flat(self): expected = "" assert expected == actual - actual = formatting.format_array_flat(np.arange(1), 0) + actual = formatting.format_array_flat(np.arange(1), 1) expected = "0" assert expected == actual - actual = formatting.format_array_flat(np.arange(2), 0) + actual = formatting.format_array_flat(np.arange(2), 3) expected = "0 1" assert expected == actual - actual = formatting.format_array_flat(np.arange(4), 0) - expected = "0 ... 3" + actual = formatting.format_array_flat(np.arange(4), 7) + expected = "0 1 2 3" + assert expected == actual + + actual = formatting.format_array_flat(np.arange(5), 7) + expected = "0 ... 4" + assert expected == actual + + long_str = [" ".join(["hello world" for _ in range(100)])] + actual = formatting.format_array_flat(np.asarray([long_str]), 21) + expected = "'hello world hello..." assert expected == actual def test_pretty_print(self): From 443215d95ae84e949c907335a08b0e9e648eaa92 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 27 Mar 2020 22:20:25 -0400 Subject: [PATCH 2/2] whatsnew --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 14941228c88..a138dee4128 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -35,6 +35,9 @@ New Features :py:func:`combine_by_coords` and :py:func:`combine_nested` using combine_attrs keyword argument. (:issue:`3865`, :pull:`3877`) By `John Omotani `_ +- Limited the length of array items with long string reprs to a + reasonable width (:pull:`3900`) + By `Maximilian Roos `_ Bug fixes