From 619e3bc21f2a3900ef170c34f4e0b8f2856bc023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C5=BD=C3=A1ra?= Date: Mon, 7 Oct 2024 22:33:42 +0200 Subject: [PATCH 1/2] lame attempt at hyphenation when nbsp is present --- weasyprint/text/line_break.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/weasyprint/text/line_break.py b/weasyprint/text/line_break.py index b3e8495b4..0416d7c53 100644 --- a/weasyprint/text/line_break.py +++ b/weasyprint/text/line_break.py @@ -10,6 +10,8 @@ from .fonts import font_features, get_font_description +NBSP = '\u00a0' + def line_size(line, style): """Get logical width and height of the given ``line``. @@ -400,7 +402,7 @@ def split_first_line(text, style, context, max_width, justification_spacing, dictionary = pyphen.Pyphen(lang=lang, left=left, right=right) context.dictionaries[dictionary_key] = dictionary dictionary_iterations = [ - start for start, end in dictionary.iterate(next_word)] + start for start, end in dictionary.iterate(next_word) if start[-1] != NBSP] else: dictionary_iterations = [] @@ -518,6 +520,8 @@ def get_next_word_boundaries(text, lang): return None bytestring, log_attrs = get_log_attrs(text, lang) for i, attr in enumerate(log_attrs): + follows_nbsp = (i < len(text) and text[i] == NBSP) + if follows_nbsp: continue if attr.is_word_end: word_end = i break From 2ae30bc339cf193e54b46eee74260372f65d0c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20=C5=BD=C3=A1ra?= Date: Fri, 25 Oct 2024 00:02:00 +0200 Subject: [PATCH 2/2] ruff fixes --- weasyprint/text/line_break.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/weasyprint/text/line_break.py b/weasyprint/text/line_break.py index 0416d7c53..b0c7cafb3 100644 --- a/weasyprint/text/line_break.py +++ b/weasyprint/text/line_break.py @@ -9,7 +9,6 @@ from .ffi import FROM_UNITS, TO_UNITS, ffi, gobject, pango, pangoft2, unicode_to_char_p from .fonts import font_features, get_font_description - NBSP = '\u00a0' def line_size(line, style): @@ -521,7 +520,8 @@ def get_next_word_boundaries(text, lang): bytestring, log_attrs = get_log_attrs(text, lang) for i, attr in enumerate(log_attrs): follows_nbsp = (i < len(text) and text[i] == NBSP) - if follows_nbsp: continue + if follows_nbsp: + continue if attr.is_word_end: word_end = i break