From e7c666c58671bd1ed0390fd96f0a52efc4179bc6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 24 Jan 2019 15:17:30 +0100 Subject: [PATCH] BUG (output formatting): use fixed with for truncation column instead of inferring from last column --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/io/formats/format.py | 12 ++------ pandas/tests/io/formats/test_format.py | 41 ++++++++++++++++++-------- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 3dd345890881c..78bc4cf751c4f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1752,6 +1752,7 @@ I/O - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) - Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`) - Bug in :func:`DataFrame.to_string()` and more generally in the floating ``repr`` formatter. Zeros were not trimmed if ``inf`` was present in a columns while it was the case with NA values. Zeros are now trimmed as in the presence of NA (:issue:`24861`). +- Bug in the ``repr`` when truncating the number of columns and having a wide last column (:issue:`24849`). Plotting ^^^^^^^^ diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2c1fcab1ebde9..62fa04e784072 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -435,9 +435,6 @@ def _chk_truncate(self): """ from pandas.core.reshape.concat import concat - # Column of which first element is used to determine width of a dot col - self.tr_size_col = -1 - # Cut the data to the information actually printed max_cols = self.max_cols max_rows = self.max_rows @@ -556,10 +553,7 @@ def _to_str_columns(self): if truncate_h: col_num = self.tr_col_num - # infer from column header - col_width = self.adj.len(strcols[self.tr_size_col][0]) - strcols.insert(self.tr_col_num + 1, ['...'.center(col_width)] * - (len(str_index))) + strcols.insert(self.tr_col_num + 1, [' ...'] * (len(str_index))) if truncate_v: n_header_rows = len(str_index) - len(frame) row_num = self.tr_row_num @@ -577,8 +571,8 @@ def _to_str_columns(self): if ix == 0: dot_mode = 'left' elif is_dot_col: - cwidth = self.adj.len(strcols[self.tr_size_col][0]) - dot_mode = 'center' + cwidth = 4 + dot_mode = 'right' else: dot_mode = 'right' dot_str = self.adj.justify([my_str], cwidth, mode=dot_mode)[0] diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 31ab1e050d95c..5d922ccaf1fd5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -345,6 +345,15 @@ def test_repr_truncates_terminal_size_full(self, monkeypatch): lambda: terminal_size) assert "..." not in str(df) + def test_repr_truncation_column_size(self): + # dataframe with last column very wide -> check it is not used to + # determine size of truncation (...) column + df = pd.DataFrame({'a': [108480, 30830], 'b': [12345, 12345], + 'c': [12345, 12345], 'd': [12345, 12345], + 'e': ['a' * 50] * 2}) + assert "..." in str(df) + assert " ... " not in str(df) + def test_repr_max_columns_max_rows(self): term_width, term_height = get_terminal_size() if term_width < 10 or term_height < 10: @@ -543,7 +552,7 @@ def test_to_string_with_formatters_unicode(self): formatters={u('c/\u03c3'): lambda x: '{x}'.format(x=x)}) assert result == u(' c/\u03c3\n') + '0 1\n1 2\n2 3' - def test_east_asian_unicode_frame(self): + def test_east_asian_unicode_false(self): if PY3: _rep = repr else: @@ -643,17 +652,23 @@ def test_east_asian_unicode_frame(self): u'ああああ': [u'さ', u'し', u'す', u'せ']}, columns=['a', 'b', 'c', u'ああああ']) - expected = (u" a ... ああああ\n0 あああああ ... さ\n" - u".. ... ... ...\n3 えええ ... せ\n" + expected = (u" a ... ああああ\n0 あああああ ... さ\n" + u".. ... ... ...\n3 えええ ... せ\n" u"\n[4 rows x 4 columns]") assert _rep(df) == expected df.index = [u'あああ', u'いいいい', u'う', 'aaa'] - expected = (u" a ... ああああ\nあああ あああああ ... さ\n" - u".. ... ... ...\naaa えええ ... せ\n" + expected = (u" a ... ああああ\nあああ あああああ ... さ\n" + u".. ... ... ...\naaa えええ ... せ\n" u"\n[4 rows x 4 columns]") assert _rep(df) == expected + def test_east_asian_unicode_true(self): + if PY3: + _rep = repr + else: + _rep = unicode # noqa + # Emable Unicode option ----------------------------------------- with option_context('display.unicode.east_asian_width', True): @@ -757,18 +772,18 @@ def test_east_asian_unicode_frame(self): u'ああああ': [u'さ', u'し', u'す', u'せ']}, columns=['a', 'b', 'c', u'ああああ']) - expected = (u" a ... ああああ\n" - u"0 あああああ ... さ\n" - u".. ... ... ...\n" - u"3 えええ ... せ\n" + expected = (u" a ... ああああ\n" + u"0 あああああ ... さ\n" + u".. ... ... ...\n" + u"3 えええ ... せ\n" u"\n[4 rows x 4 columns]") assert _rep(df) == expected df.index = [u'あああ', u'いいいい', u'う', 'aaa'] - expected = (u" a ... ああああ\n" - u"あああ あああああ ... さ\n" - u"... ... ... ...\n" - u"aaa えええ ... せ\n" + expected = (u" a ... ああああ\n" + u"あああ あああああ ... さ\n" + u"... ... ... ...\n" + u"aaa えええ ... せ\n" u"\n[4 rows x 4 columns]") assert _rep(df) == expected