diff --git a/lib/element.py b/lib/element.py index 892739b..5650366 100644 --- a/lib/element.py +++ b/lib/element.py @@ -215,20 +215,19 @@ def get_content(self): for noise in element_copy.xpath( self.remove_pattern, namespaces=ns): self._safe_remove(noise) + elements = [] if self.reserve_pattern is not None: - self.reserve_elements = element_copy.xpath( - self.reserve_pattern, namespaces=ns) - for eid, reserve in enumerate(self.reserve_elements): + elements = element_copy.xpath(self.reserve_pattern, namespaces=ns) + for eid, element in enumerate(elements): replacement = self.placeholder[0].format(format(eid, '05')) - if get_name(reserve) in ('sub', 'sup'): - parent = reserve.getparent() + if get_name(element) in ('sub', 'sup'): + parent = element.getparent() if parent is not None and get_name(parent) == 'a' and \ - parent.text is None and reserve.tail is None and \ + parent.text is None and element.tail is None and \ len(parent.getchildren()) == 1: - index = self.reserve_elements.index(reserve) - self.reserve_elements[index] = reserve = parent - self._safe_remove(reserve, replacement) - + elements[eid] = element = parent + self.reserve_elements.append(get_string(element, True)) + self._safe_remove(element, replacement) return trim(''.join(element_copy.itertext())) def _polish_translation(self, translation): @@ -236,20 +235,21 @@ def _polish_translation(self, translation): # Condense consecutive letters to a maximum of four. return re.sub(r'((\w)\2{3})\2*', r'\1', translation) - def _create_new_element(self, name, content='', excluding_attrs=[]): + def _create_new_element( + self, name, content='', copy_attrs=True, excluding_attrs=[]): # new_element = self.element.makeelement( # get_name(self.element), nsmap={'xhtml': ns['x']}) # new_element.text = trim(translation) new_element = etree.XML('<{0} xmlns="{1}">{2}'.format( name, ns['x'], trim(content))) # Preserve all attributes from the original element. - for name, value in self.element.items(): - if (name == 'id' and self.position != 'only') or \ - name in excluding_attrs: - continue - if name == 'dir': - value = 'auto' - new_element.set(name, value) + if copy_attrs: + for name, value in self.element.items(): + if (name == 'id' and self.position != 'only') or \ + name in excluding_attrs: + continue + new_element.set(name, value) + new_element.set('dir', 'auto') if self.translation_lang is not None: new_element.set('lang', self.translation_lang) if self.translation_color is not None: @@ -267,13 +267,12 @@ def add_translation(self, translation=None): return # Escape the markups () to replace escaped markups. translation = xml_escape(translation) - for rid, reserve in enumerate(self.reserve_elements): + for rid, element in enumerate(self.reserve_elements): pattern = self.placeholder[1].format( r'\s*'.join(format(rid, '05'))) # Prevent processe any backslash escapes in the replacement. translation = re.sub( - xml_escape(pattern), lambda _: get_string(reserve), - translation) + xml_escape(pattern), lambda _: element, translation) translation = self._polish_translation(translation) element_name = get_name(self.element) @@ -359,42 +358,43 @@ def add_translation(self, translation=None): def _add_translation_for_line_breaks( self, new_element, original_br_list, translation_br_list): + text = new_element.text tail = None - for index, br in enumerate(original_br_list): - translation_br = translation_br_list[index] - wrapper = self._create_new_element('span') - if self.position == 'below': + if self.position == 'below': + for index, br in enumerate(original_br_list): + translation_br = translation_br_list[index] + wrapper = self._create_new_element( + 'span', copy_attrs=False, excluding_attrs=['class']) # Get preceding siblings in reverse document order. for sibling in translation_br.itersiblings(preceding=True): if get_name(sibling) == 'br': break wrapper.insert(0, sibling) - wrapper.text = new_element.text if index == 0 else tail + wrapper.text = text if index == 0 else tail tail = translation_br.tail - if not wrapper.text and len(list(wrapper)) < 1: - continue - new_br = etree.SubElement(self.element, 'br') - br.addprevious(new_br) - new_br.addnext(wrapper) + if wrapper.text or len(list(wrapper)) > 0: + new_br = etree.SubElement(self.element, 'br') + br.addprevious(new_br) + new_br.addnext(wrapper) # Handle the last br element in the translation simultaneously. if br == original_br_list[-1]: - if br.getnext() is None and ( - br.tail is None or br.tail.strip() == ''): + # Ignore the last barely br element. + if translation_br.getnext() is None and ( + tail is None or tail.strip() == ''): continue - last_br = etree.SubElement(self.element, 'br') - while br.getnext() is not None: - br = br.getnext() - if br is None: - br.getparent().append(last_br) - else: - br.addnext(last_br) - wrapper = self._create_new_element('span') - translation_br = translation_br_list[-1] + wrapper = self._create_new_element( + 'span', copy_attrs=False, excluding_attrs=['class']) for sibling in translation_br.itersiblings(): - wrapper.addnext(sibling) - wrapper.text = translation_br.tail - last_br.addnext(wrapper) - else: + wrapper.append(sibling) + wrapper.text = tail + new_br = etree.SubElement(self.element, 'br') + self.element.append(new_br) + new_br.addnext(wrapper) + else: + for index, br in enumerate(original_br_list): + translation_br = translation_br_list[index] + wrapper = self._create_new_element( + 'span', copy_attrs=False, excluding_attrs=['class']) for sibling in translation_br.itersiblings(): if get_name(sibling) == 'br': break @@ -402,19 +402,21 @@ def _add_translation_for_line_breaks( wrapper.text = translation_br.tail if wrapper.text or len(list(wrapper)) > 0: new_br = etree.SubElement(self.element, 'br') - br.addprevious(new_br) - new_br.addnext(wrapper) - if br == original_br_list[-1]: + br.addnext(new_br) + new_br.addprevious(wrapper) + if br == original_br_list[0]: + wrapper = self._create_new_element( + 'span', copy_attrs=False, excluding_attrs=['class']) + if translation_br.getprevious() is None and ( + text is None or text.strip() == ''): + continue + for sibling in translation_br.itersiblings(preceding=True): + wrapper.insert(0, sibling) + wrapper.text = new_element.text new_br = etree.SubElement(self.element, 'br') new_br.tail = self.element.text self.element.text = None self.element.insert(0, new_br) - wrapper = self._create_new_element('span') - wrapper.text = new_element.text - translation_br = translation_br_list[0] - for sibling in translation_br.itersiblings( - preceding=True): - wrapper.insert(0, sibling) new_br.addprevious(wrapper) def _create_table(self, translation=None): diff --git a/tests/test_element.py b/tests/test_element.py index a6596f2..3dc8683 100644 --- a/tests/test_element.py +++ b/tests/test_element.py @@ -408,12 +408,26 @@ def test_get_content(self): '{{id_00006}} {{id_00007}} k{{id_00008}} l') self.assertEqual(content, self.element.get_content()) self.assertEqual(9, len(self.element.reserve_elements)) + self.assertEqual( + '', self.element.reserve_elements[0]) + self.assertEqual( + '', self.element.reserve_elements[1]) + self.assertEqual( + '', self.element.reserve_elements[2]) + self.assertEqual( + '', self.element.reserve_elements[3]) + self.assertEqual( + '', self.element.reserve_elements[4]) + self.assertEqual( + '{\D}', + self.element.reserve_elements[5]) + self.assertEqual( + '', self.element.reserve_elements[6]) + self.assertEqual( + 'App\Http', self.element.reserve_elements[7]) + self.assertEqual('[1]', self.element.reserve_elements[8]) - for element in self.element.reserve_elements: - with self.subTest(element=element): - self.assertIsNone(element.tail) - - def test_get_content_with_sup_sub(self): + def test_get_content_with_sub_sup(self): xhtml = etree.XML(rb""" @@ -433,10 +447,10 @@ def test_get_content_with_sup_sub(self): content = ( 'a{{id_00000}} b{{id_00001}} x cx {{id_00002}} d{{id_00003}} x') self.assertEqual(content, element.get_content()) - self.assertEqual('a', get_name(element.reserve_elements[0])) - self.assertEqual('sup', get_name(element.reserve_elements[1])) - self.assertEqual('sup', get_name(element.reserve_elements[2])) - self.assertEqual('sup', get_name(element.reserve_elements[3])) + self.assertEqual('[1]', element.reserve_elements[0]) + self.assertEqual('[1]', element.reserve_elements[1]) + self.assertEqual('[2]', element.reserve_elements[2]) + self.assertEqual('[3]', element.reserve_elements[3]) def test_get_attributes(self): self.assertEqual('{"class": "abc"}', self.element.get_attributes()) @@ -505,7 +519,7 @@ def test_add_translation_with_placeholder(self): self.element.add_translation(translation) translation = ( - '

' + '

' ' Aaaaa ' 'Bbbbb C D E ' 'F G H ' @@ -530,7 +544,7 @@ def test_add_translation_with_markup(self): self.element.add_translation(translation) translation = ( - '

' + '

' ' Aaaaa ' 'Bbbbb C D E ' 'F G H ' @@ -561,7 +575,8 @@ def test_add_translation_with_linefeeds(self): elements = xhtml.findall('.//x:p', namespaces=ns) self.assertEqual(2, len(elements)) self.assertEqual( - '

A

B
C

', + '

' + 'A

B
C

', get_string(elements[1])) def test_add_translation_below(self): @@ -672,35 +687,53 @@ def test_add_translation_line_break_below(self): Test Document -

- a[1] b c[2]
- d e

- f g
h
i
j k l
- m
-

+

a

+

a
b

+

a


b

+

a
b

+

a
b


+


a
b

+




a
b

+

a
b
c
d
e

""") - - element = PageElement(xhtml.find('.//x:p', namespaces=ns), 'p1') - element.reserve_pattern = create_xpath(('sup', 'img', 'br')) - element.placeholder = Base.placeholder - element.position = 'below' - element.get_content() - element.add_translation( - 'A{{id_00000}} B C{{id_00001}}{{id_00002}} D E {{id_00003}}' - '{{id_00004}}{{id_00005}} F G{{id_00006}}H{{id_00007}}I' - '{{id_00008}}J K L M{{id_00009}}') + translations = [ + 'A{{id_00000}}', + 'A{{id_00000}}B', + 'A{{id_00000}}{{id_00001}}{{id_00002}}B', + 'A{{id_00000}}B{{id_00001}}', + 'A{{id_00000}}B{{id_00001}}{{id_00002}}{{id_00003}}', + '{{id_00000}}A{{id_00001}}B', + '{{id_00000}}{{id_00001}}{{id_00002}}A{{id_00003}}B', + 'A{{id_00000}}B{{id_00001}}C{{id_00002}}{{id_00003}}D E'] + for element in xhtml.findall('.//x:p', namespaces=ns): + element = PageElement(element, 'p1') + element.reserve_pattern = create_xpath(('sup', 'img', 'br')) + element.placeholder = Base.placeholder + element.position = 'below' + element.get_content() + element.add_translation(translations.pop(0)) translation = ( - ' ' - 'Test Document ' - '

a[1] b c[2]
' - 'A[1] B C[2]
' - ' d e
' - ' D E

' - 'f g
F G
' - 'h
H
i
I
' - 'j k l
m
J K L M

' + ' ' + 'Test Document ' + '

a
A

' + '

a
A
' + 'b
B

' + '

a
A


' + 'b
B

' + '

a
A
b
' + 'B

' + '

a
A
b
' + 'B


' + '


a
A
b
' + 'B

' + '




a
A
b
' + 'B

' + '

a
A
b
' + 'B
c
' + 'C
d
e
' + 'D E

' ' ') self.assertEqual(translation, get_string(xhtml)) @@ -710,34 +743,55 @@ def test_add_translation_line_break_above(self): Test Document -

- a[1] b c[2]
- d e f

- g h
i
j
k l
-

+

a

+

a
b

+

a


b

+

a
b

+

a
b


+


a
b

+




a
b

+

a
b
c
d
e

""") - - element = PageElement(xhtml.find('.//x:p', namespaces=ns), 'p1') - element.reserve_pattern = create_xpath(('sup', 'img', 'br')) - element.placeholder = Base.placeholder - element.position = 'above' - element.get_content() - element.add_translation( - 'A{{id_00000}} B C{{id_00001}}{{id_00002}} D E {{id_00003}} F' - '{{id_00004}}{{id_00005}} G H{{id_00006}}I{{id_00007}}J' - '{{id_00008}}K L{{id_00009}}') + translations = [ + 'A{{id_00000}}', + 'A{{id_00000}}B', + 'A{{id_00000}}{{id_00001}}{{id_00002}}B', + 'A{{id_00000}}B{{id_00001}}', + 'A{{id_00000}}B{{id_00001}}{{id_00002}}{{id_00003}}', + '{{id_00000}}A{{id_00001}}B', + '{{id_00000}}{{id_00001}}{{id_00002}}A{{id_00003}}B', + 'A{{id_00000}}{{id_00001}}B{{id_00002}}C{{id_00003}}D E' + ] + for element in xhtml.findall('.//x:p', namespaces=ns): + element = PageElement(element, 'p1') + element.reserve_pattern = create_xpath(('sup', 'img', 'br')) + element.placeholder = Base.placeholder + element.position = 'above' + element.get_content() + element.add_translation(translations.pop(0)) translation = ( - ' ' - 'Test Document ' - '

A[1] B C[2]
' - ' a[1] b c[2]
' - ' D E F
d e ' - ' f

G H
' - ' g h
I
' - 'i
J
j
K L
' - 'k l

') + ' ' + 'Test Document ' + '

A
a

' + '

A
a
' + 'B
b

' + '

A
a


' + 'B
b

' + '

A
a
' + 'B
b

' + '

A
a
' + 'B
b


' + '


A
a
' + 'B
b

' + '




A
a
' + 'B
b

' + '

A
' + 'a
B
' + 'b
C
c
' + 'D E
d
e

' + ' ') self.assertEqual(translation, get_string(xhtml)) def test_add_translation_attr(self): @@ -1426,7 +1480,8 @@ def test_add_translations_merge_separator(self): self.assertEqual('b', elements[1].text) self.assertEqual('c', elements[3].text) self.assertEqual( - '

A B

C

', get_string(elements[4], True)) + '

A B

C

', + get_string(elements[4], True)) def test_add_translations_merge_separator_multiple(self): self.handler.merge_length = 2 @@ -1466,7 +1521,8 @@ def test_add_translations_merge_placeholder_missing_id(self): self.assertEqual('c', elements[3].text) self.assertEqual( - '

A B

C

', get_string(elements[4], True)) + '

A B

C

', + get_string(elements[4], True)) def test_add_translations_merge_placeholder_missing_newline(self): self.handler.separator = Base.separator @@ -1484,7 +1540,8 @@ def test_add_translations_merge_placeholder_missing_newline(self): self.assertEqual('c', elements[3].text) self.assertEqual( - '

A B

C

', get_string(elements[4], True)) + '

A B

C

', + get_string(elements[4], True)) def test_add_translations_merge_palceholder_only(self): self.handler.position = 'only' @@ -1532,7 +1589,7 @@ def test_add_translations_merge_placeholder_only_missing_id(self): elements = self.xhtml.findall('./x:body/*', namespaces=ns) self.assertEqual(5, len(elements)) self.assertEqual( - '

A B

C

', + '

A B

C

', get_string(elements[-2], True)) def test_add_translations_merge_separator_only_missing_id(self): @@ -1549,5 +1606,5 @@ def test_add_translations_merge_separator_only_missing_id(self): self.assertEqual(5, len(elements)) self.assertEqual( - '

A B

C

', + '

A B

C

', get_string(elements[-2], True))