From 5bcd2a77e89f47765992363c4296f2b2aff5c558 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Sat, 25 Aug 2018 14:10:05 -0500 Subject: [PATCH] more accurate line width estimate for e.g. taking into account full width asian characters, accented latin characters --- AUTHORS | 1 + CHANGELOG | 8 +++ py/_io/terminalwriter.py | 43 ++++++++++++--- testing/io_/test_terminalwriter_linewidth.py | 56 ++++++++++++++++++++ 4 files changed, 102 insertions(+), 6 deletions(-) create mode 100644 testing/io_/test_terminalwriter_linewidth.py diff --git a/AUTHORS b/AUTHORS index 8c0cf9b7..9c5dda9c 100644 --- a/AUTHORS +++ b/AUTHORS @@ -22,3 +22,4 @@ Jan Balster Grig Gheorghiu Bob Ippolito Christian Tismer +Wim Glenn diff --git a/CHANGELOG b/CHANGELOG index a17cdb59..c78f4131 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +1.5.5 (unreleased) +================== + +- add ``TerminalWriter.width_of_current_line`` (i18n version of + ``TerminalWriter.chars_on_current_line``), a read-only property + that tracks how wide the current line is, attempting to take + into account international characters in the calculation. + 1.5.4 (2018-06-27) ================== diff --git a/py/_io/terminalwriter.py b/py/_io/terminalwriter.py index 74d31259..a6bea323 100644 --- a/py/_io/terminalwriter.py +++ b/py/_io/terminalwriter.py @@ -5,7 +5,7 @@ """ -import sys, os +import sys, os, unicodedata import py py3k = sys.version_info[0] >= 3 from py.builtin import text, bytes @@ -53,6 +53,21 @@ def get_terminal_width(): terminal_width = get_terminal_width() +char_width = { + 'A': 1, # "Ambiguous" + 'F': 2, # Fullwidth + 'H': 1, # Halfwidth + 'N': 1, # Neutral + 'Na': 1, # Narrow + 'W': 2, # Wide +} + + +def get_line_width(text): + text = unicodedata.normalize('NFC', text) + return sum(char_width.get(unicodedata.east_asian_width(c), 1) for c in text) + + # XXX unify with _escaped func below def ansi_print(text, esc, file=None, newline=True, flush=False): if file is None: @@ -140,6 +155,7 @@ def __init__(self, file=None, stringio=False, encoding=None): self.hasmarkup = should_do_markup(file) self._lastlen = 0 self._chars_on_current_line = 0 + self._width_of_current_line = 0 @property def fullwidth(self): @@ -164,6 +180,16 @@ def chars_on_current_line(self): """ return self._chars_on_current_line + @property + def width_of_current_line(self): + """Return an estimate of the width so far in the current line. + + .. versionadded:: 1.5.5 + + :rtype: int + """ + return self._width_of_current_line + def _escaped(self, text, esc): if esc and self.hasmarkup: text = (''.join(['\x1b[%sm' % cod for cod in esc]) + @@ -223,12 +249,17 @@ def write(self, msg, **kw): markupmsg = msg write_out(self._file, markupmsg) - def _update_chars_on_current_line(self, text): - fields = text.rsplit('\n', 1) - if '\n' in text: - self._chars_on_current_line = len(fields[-1]) + def _update_chars_on_current_line(self, text_or_bytes): + newline = b'\n' if isinstance(text_or_bytes, bytes) else '\n' + current_line = text_or_bytes.rsplit(newline, 1)[-1] + if isinstance(current_line, bytes): + current_line = current_line.decode('utf-8', errors='replace') + if newline in text_or_bytes: + self._chars_on_current_line = len(current_line) + self._width_of_current_line = get_line_width(current_line) else: - self._chars_on_current_line += len(fields[-1]) + self._chars_on_current_line += len(current_line) + self._width_of_current_line += get_line_width(current_line) def line(self, s='', **kw): self.write(s, **kw) diff --git a/testing/io_/test_terminalwriter_linewidth.py b/testing/io_/test_terminalwriter_linewidth.py new file mode 100644 index 00000000..e6d84fbf --- /dev/null +++ b/testing/io_/test_terminalwriter_linewidth.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from py._io.terminalwriter import TerminalWriter + + +def test_terminal_writer_line_width_init(): + tw = TerminalWriter() + assert tw.chars_on_current_line == 0 + assert tw.width_of_current_line == 0 + + +def test_terminal_writer_line_width_update(): + tw = TerminalWriter() + tw.write('hello world') + assert tw.chars_on_current_line == 11 + assert tw.width_of_current_line == 11 + + +def test_terminal_writer_line_width_update_with_newline(): + tw = TerminalWriter() + tw.write('hello\nworld') + assert tw.chars_on_current_line == 5 + assert tw.width_of_current_line == 5 + + +def test_terminal_writer_line_width_update_with_wide_text(): + tw = TerminalWriter() + tw.write('乇乂ㄒ尺卂 ㄒ卄丨匚匚') + assert tw.chars_on_current_line == 11 + assert tw.width_of_current_line == 21 # 5*2 + 1 + 5*2 + + +def test_terminal_writer_line_width_update_with_wide_bytes(): + tw = TerminalWriter() + tw.write('乇乂ㄒ尺卂 ㄒ卄丨匚匚'.encode('utf-8')) + assert tw.chars_on_current_line == 11 + assert tw.width_of_current_line == 21 + + +def test_terminal_writer_line_width_composed(): + tw = TerminalWriter() + text = 'café food' + assert len(text) == 9 + tw.write(text) + assert tw.chars_on_current_line == 9 + assert tw.width_of_current_line == 9 + + +def test_terminal_writer_line_width_combining(): + tw = TerminalWriter() + text = 'café food' + assert len(text) == 10 + tw.write(text) + assert tw.chars_on_current_line == 10 + assert tw.width_of_current_line == 9