Skip to content

Commit

Permalink
Don’t assume that lines break after spaces
Browse files Browse the repository at this point in the history
Fix #1817.
  • Loading branch information
liZe committed Jul 11, 2024
1 parent 126abd3 commit 071e733
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 16 deletions.
16 changes: 16 additions & 0 deletions tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,22 @@ def test_overflow_wrap_trailing_space(wrap, text, body_width, expected_width):
assert td.width == expected_width


def test_overflow_wrap_no_break_on_space():
# Test regression: https://github.com/Kozea/WeasyPrint/issues/1817
page, = render_pages('''
<style>
body {width: 11px; font-family: weasyprint; font-size: 2px;
overflow-wrap: anywhere}
</style>.jpg, .png''')
html, = page.children
body, = html.children
line1, line2 = body.children
text1, = line1.children
assert text1.text == '.jpg,'
text2, = line2.children
assert text2.text == '.png'


def test_line_break_before_trailing_space():
# Test regression: https://github.com/Kozea/WeasyPrint/issues/1852
page, = render_pages('''
Expand Down
31 changes: 15 additions & 16 deletions weasyprint/text/line_break.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,6 @@ def split_first_line(text, style, context, max_width, justification_spacing,
first_line_text = text.encode()[:resume_index].decode()
first_line_fits = (
first_line_width <= max_width or
' ' in first_line_text.strip() or
can_break_text(first_line_text.strip(), style['lang']))
if first_line_fits:
# The first line fits but may have been cut too early by Pango
Expand All @@ -327,9 +326,10 @@ def split_first_line(text, style, context, max_width, justification_spacing,
first_line_text = ''
second_line_text = text

next_word = second_line_text.split(' ', 1)[0]
break_point = next_break_point(second_line_text, style['lang'])
next_word = second_line_text[:break_point].rstrip(' ')
if next_word:
if space_collapse:
if space_collapse and second_line_text[break_point or -1] == ' ':
# next_word might fit without a space afterwards
# only try when space collapsing is allowed
new_first_line_text = first_line_text + next_word
Expand Down Expand Up @@ -362,7 +362,7 @@ def split_first_line(text, style, context, max_width, justification_spacing,

auto_hyphenation = manual_hyphenation = False
if hyphens != 'none':
manual_hyphenation = soft_hyphen in first_line_text + next_word
manual_hyphenation = soft_hyphen in first_line_text + second_line_text
if hyphens == 'auto' and lang:
next_word_boundaries = get_next_word_boundaries(second_line_text, lang)
if next_word_boundaries:
Expand Down Expand Up @@ -392,18 +392,11 @@ def split_first_line(text, style, context, max_width, justification_spacing,
# hyphen and add the missing hyphen
if first_line_text.endswith(soft_hyphen):
# The first line has been split on a soft hyphen
if ' ' in first_line_text:
first_line_text, next_word = first_line_text.rsplit(' ', 1)
next_word = f' {next_word}'
layout.set_text(first_line_text)
first_line, _ = layout.get_first_line()
resume_index = len((f'{first_line_text} ').encode())
else:
first_line_text, next_word = '', first_line_text
first_line_text, second_line_text = '', first_line_text
soft_hyphen_indexes = [
match.start() for match in re.finditer(soft_hyphen, next_word)]
match.start() for match in re.finditer(soft_hyphen, second_line_text)]
soft_hyphen_indexes.reverse()
dictionary_iterations = [next_word[:i+1] for i in soft_hyphen_indexes]
dictionary_iterations = [second_line_text[:i+1] for i in soft_hyphen_indexes]
start_word = 0
elif auto_hyphenation:
dictionary_key = (lang, left, right, total)
Expand Down Expand Up @@ -513,12 +506,18 @@ def get_log_attrs(text, lang):
return bytestring, log_attrs


def can_break_text(text, lang):
def next_break_point(text, lang):
if not text or len(text) < 2:
return None
bytestring, log_attrs = get_log_attrs(text, lang)
length = len(text) + 1
return any(attr.is_line_break for attr in log_attrs[1:length - 1])
for i, attr in enumerate(log_attrs[1:length - 1]):
if attr.is_line_break:
return i


def can_break_text(text, lang):
return next_break_point(text, lang) is not None


def get_next_word_boundaries(text, lang):
Expand Down

0 comments on commit 071e733

Please sign in to comment.