From 562a2499a12014eca20647a6a017a16d697c0adc Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Tue, 8 Feb 2022 12:16:10 +0100 Subject: [PATCH] Fix line break when breaks occur between a nbsp and an inline block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit: - fixes the trailing space detection, by handling all trailing spacing characters that could be ignored by Pango’s line break algorithm; - tries harder to break waiting children when a line break occurs in an inline block that can’t be separated from the previous one. Fix #1562. --- tests/test_text.py | 20 +++++++ weasyprint/layout/inline.py | 109 +++++++++++++++++++----------------- 2 files changed, 79 insertions(+), 50 deletions(-) diff --git a/tests/test_text.py b/tests/test_text.py index 99ffbc072..3d9b3c3e7 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -72,6 +72,26 @@ def test_line_breaking_rtl(): 'ايبسوم دولا') +@assert_no_logs +def test_line_breaking_nbsp(): + # Test regression: https://github.com/Kozea/WeasyPrint/issues/1561 + page, = render_pages(''' + + a b c d ef + ''') + html, = page.children + body, = html.children + line_1, line_2 = body.children + assert line_1.children[0].text == 'a ' + assert line_1.children[1].children[0].text == 'b' + assert line_1.children[2].text == ' c' + assert line_2.children[0].text == 'd\xa0' + assert line_2.children[1].children[0].text == 'ef' + + @assert_no_logs def test_text_dimension(): string = 'This is a text for test. This is a test for text.py' diff --git a/weasyprint/layout/inline.py b/weasyprint/layout/inline.py index 12f6b7322..ba7e40c7f 100644 --- a/weasyprint/layout/inline.py +++ b/weasyprint/layout/inline.py @@ -609,59 +609,68 @@ def _break_waiting_children(context, box, max_x, bottom_space, waiting_children_copy = waiting_children.copy() while waiting_children_copy: child_index, child = waiting_children_copy.pop() - if child.is_in_normal_flow() and can_break_inside(child): - # Break the waiting child at its last possible breaking point. - # TODO: The dirty solution chosen here is to decrease the - # actual size by 1 and render the waiting child again with this - # constraint. We may find a better way. - max_x = child.position_x + child.margin_width() - 1 + if not child.is_in_normal_flow() or not can_break_inside(child): + continue + + # Break the waiting child at its last possible breaking point. + # TODO: The dirty solution chosen here is to decrease the + # actual size by 1 and render the waiting child again with this + # constraint. We may find a better way. + max_x = child.position_x + child.margin_width() - 1 + while max_x > child.position_x: new_child, child_resume_at, _, _, _, _ = split_inline_level( context, child, child.position_x, max_x, bottom_space, None, box, absolute_boxes, fixed_boxes, line_placeholders, waiting_floats, line_children) + if child_resume_at: + break + max_x -= 1 + else: + # No line break found + continue - children.extend(waiting_children_copy) - if new_child is None: - # May be None where we have an empty TextBox. - assert isinstance(child, boxes.TextBox) - else: - children.append((child_index, new_child)) - - # As this child has already been broken following the original - # skip stack, we have to add the original skip stack to the - # partial skip stack we get after the new rendering. - - # Combining skip stacks is a bit complicated. We have to: - # - set `child_index` as the first number - # - append the new stack if it's an absolute one - # - otherwise append the combined stacks - # (resume_at + initial_skip_stack) - - # extract the initial index - if initial_skip_stack is None: - current_skip_stack = None - initial_index = 0 - else: - (initial_index, current_skip_stack), = ( - initial_skip_stack.items()) - # child_resume_at is an absolute skip stack - if child_index > initial_index: - return {child_index: child_resume_at} - - # combine the stacks - current_resume_at = child_resume_at - stack = [] - while current_skip_stack and current_resume_at: - (skip, current_skip_stack), = current_skip_stack.items() - (resume, current_resume_at), = current_resume_at.items() - stack.append(skip + resume) - if resume != 0: - break - resume_at = current_resume_at - while stack: - resume_at = {stack.pop(): resume_at} - # insert the child index - return {child_index: resume_at} + children.extend(waiting_children_copy) + if new_child is None: + # May be None where we have an empty TextBox. + assert isinstance(child, boxes.TextBox) + else: + children.append((child_index, new_child)) + + # As this child has already been broken following the original + # skip stack, we have to add the original skip stack to the + # partial skip stack we get after the new rendering. + + # Combining skip stacks is a bit complicated. We have to: + # - set `child_index` as the first number + # - append the new stack if it's an absolute one + # - otherwise append the combined stacks + # (resume_at + initial_skip_stack) + + # extract the initial index + if initial_skip_stack is None: + current_skip_stack = None + initial_index = 0 + else: + (initial_index, current_skip_stack), = ( + initial_skip_stack.items()) + # child_resume_at is an absolute skip stack + if child_index > initial_index: + return {child_index: child_resume_at} + + # combine the stacks + current_resume_at = child_resume_at + stack = [] + while current_skip_stack and current_resume_at: + (skip, current_skip_stack), = current_skip_stack.items() + (resume, current_resume_at), = current_resume_at.items() + stack.append(skip + resume) + if resume != 0: + break + resume_at = current_resume_at + while stack: + resume_at = {stack.pop(): resume_at} + # insert the child index + return {child_index: resume_at} if children: # Too wide, can't break waiting children and the inline is @@ -785,10 +794,10 @@ def split_inline_box(context, box, position_x, max_x, bottom_space, skip_stack, else: if isinstance(box, boxes.LineBox): line_children.append((index, new_child)) - # TODO: we should try to find a better condition here. trailing_whitespace = ( isinstance(new_child, boxes.TextBox) and - not new_child.text.strip()) + new_child.text and + unicodedata.category(new_child.text[-1]) == 'Zs') new_position_x = new_child.position_x + new_child.margin_width() if new_position_x > max_x and not trailing_whitespace: