diff --git a/lib/element.py b/lib/element.py
index 892739b..5650366 100644
--- a/lib/element.py
+++ b/lib/element.py
@@ -215,20 +215,19 @@ def get_content(self):
for noise in element_copy.xpath(
self.remove_pattern, namespaces=ns):
self._safe_remove(noise)
+ elements = []
if self.reserve_pattern is not None:
- self.reserve_elements = element_copy.xpath(
- self.reserve_pattern, namespaces=ns)
- for eid, reserve in enumerate(self.reserve_elements):
+ elements = element_copy.xpath(self.reserve_pattern, namespaces=ns)
+ for eid, element in enumerate(elements):
replacement = self.placeholder[0].format(format(eid, '05'))
- if get_name(reserve) in ('sub', 'sup'):
- parent = reserve.getparent()
+ if get_name(element) in ('sub', 'sup'):
+ parent = element.getparent()
if parent is not None and get_name(parent) == 'a' and \
- parent.text is None and reserve.tail is None and \
+ parent.text is None and element.tail is None and \
len(parent.getchildren()) == 1:
- index = self.reserve_elements.index(reserve)
- self.reserve_elements[index] = reserve = parent
- self._safe_remove(reserve, replacement)
-
+ elements[eid] = element = parent
+ self.reserve_elements.append(get_string(element, True))
+ self._safe_remove(element, replacement)
return trim(''.join(element_copy.itertext()))
def _polish_translation(self, translation):
@@ -236,20 +235,21 @@ def _polish_translation(self, translation):
# Condense consecutive letters to a maximum of four.
return re.sub(r'((\w)\2{3})\2*', r'\1', translation)
- def _create_new_element(self, name, content='', excluding_attrs=[]):
+ def _create_new_element(
+ self, name, content='', copy_attrs=True, excluding_attrs=[]):
# new_element = self.element.makeelement(
# get_name(self.element), nsmap={'xhtml': ns['x']})
# new_element.text = trim(translation)
new_element = etree.XML('<{0} xmlns="{1}">{2}{0}>'.format(
name, ns['x'], trim(content)))
# Preserve all attributes from the original element.
- for name, value in self.element.items():
- if (name == 'id' and self.position != 'only') or \
- name in excluding_attrs:
- continue
- if name == 'dir':
- value = 'auto'
- new_element.set(name, value)
+ if copy_attrs:
+ for name, value in self.element.items():
+ if (name == 'id' and self.position != 'only') or \
+ name in excluding_attrs:
+ continue
+ new_element.set(name, value)
+ new_element.set('dir', 'auto')
if self.translation_lang is not None:
new_element.set('lang', self.translation_lang)
if self.translation_color is not None:
@@ -267,13 +267,12 @@ def add_translation(self, translation=None):
return
# Escape the markups () to replace escaped markups.
translation = xml_escape(translation)
- for rid, reserve in enumerate(self.reserve_elements):
+ for rid, element in enumerate(self.reserve_elements):
pattern = self.placeholder[1].format(
r'\s*'.join(format(rid, '05')))
# Prevent processe any backslash escapes in the replacement.
translation = re.sub(
- xml_escape(pattern), lambda _: get_string(reserve),
- translation)
+ xml_escape(pattern), lambda _: element, translation)
translation = self._polish_translation(translation)
element_name = get_name(self.element)
@@ -359,42 +358,43 @@ def add_translation(self, translation=None):
def _add_translation_for_line_breaks(
self, new_element, original_br_list, translation_br_list):
+ text = new_element.text
tail = None
- for index, br in enumerate(original_br_list):
- translation_br = translation_br_list[index]
- wrapper = self._create_new_element('span')
- if self.position == 'below':
+ if self.position == 'below':
+ for index, br in enumerate(original_br_list):
+ translation_br = translation_br_list[index]
+ wrapper = self._create_new_element(
+ 'span', copy_attrs=False, excluding_attrs=['class'])
# Get preceding siblings in reverse document order.
for sibling in translation_br.itersiblings(preceding=True):
if get_name(sibling) == 'br':
break
wrapper.insert(0, sibling)
- wrapper.text = new_element.text if index == 0 else tail
+ wrapper.text = text if index == 0 else tail
tail = translation_br.tail
- if not wrapper.text and len(list(wrapper)) < 1:
- continue
- new_br = etree.SubElement(self.element, 'br')
- br.addprevious(new_br)
- new_br.addnext(wrapper)
+ if wrapper.text or len(list(wrapper)) > 0:
+ new_br = etree.SubElement(self.element, 'br')
+ br.addprevious(new_br)
+ new_br.addnext(wrapper)
# Handle the last br element in the translation simultaneously.
if br == original_br_list[-1]:
- if br.getnext() is None and (
- br.tail is None or br.tail.strip() == ''):
+ # Ignore the last barely br element.
+ if translation_br.getnext() is None and (
+ tail is None or tail.strip() == ''):
continue
- last_br = etree.SubElement(self.element, 'br')
- while br.getnext() is not None:
- br = br.getnext()
- if br is None:
- br.getparent().append(last_br)
- else:
- br.addnext(last_br)
- wrapper = self._create_new_element('span')
- translation_br = translation_br_list[-1]
+ wrapper = self._create_new_element(
+ 'span', copy_attrs=False, excluding_attrs=['class'])
for sibling in translation_br.itersiblings():
- wrapper.addnext(sibling)
- wrapper.text = translation_br.tail
- last_br.addnext(wrapper)
- else:
+ wrapper.append(sibling)
+ wrapper.text = tail
+ new_br = etree.SubElement(self.element, 'br')
+ self.element.append(new_br)
+ new_br.addnext(wrapper)
+ else:
+ for index, br in enumerate(original_br_list):
+ translation_br = translation_br_list[index]
+ wrapper = self._create_new_element(
+ 'span', copy_attrs=False, excluding_attrs=['class'])
for sibling in translation_br.itersiblings():
if get_name(sibling) == 'br':
break
@@ -402,19 +402,21 @@ def _add_translation_for_line_breaks(
wrapper.text = translation_br.tail
if wrapper.text or len(list(wrapper)) > 0:
new_br = etree.SubElement(self.element, 'br')
- br.addprevious(new_br)
- new_br.addnext(wrapper)
- if br == original_br_list[-1]:
+ br.addnext(new_br)
+ new_br.addprevious(wrapper)
+ if br == original_br_list[0]:
+ wrapper = self._create_new_element(
+ 'span', copy_attrs=False, excluding_attrs=['class'])
+ if translation_br.getprevious() is None and (
+ text is None or text.strip() == ''):
+ continue
+ for sibling in translation_br.itersiblings(preceding=True):
+ wrapper.insert(0, sibling)
+ wrapper.text = new_element.text
new_br = etree.SubElement(self.element, 'br')
new_br.tail = self.element.text
self.element.text = None
self.element.insert(0, new_br)
- wrapper = self._create_new_element('span')
- wrapper.text = new_element.text
- translation_br = translation_br_list[0]
- for sibling in translation_br.itersiblings(
- preceding=True):
- wrapper.insert(0, sibling)
new_br.addprevious(wrapper)
def _create_table(self, translation=None):
diff --git a/tests/test_element.py b/tests/test_element.py
index a6596f2..3dc8683 100644
--- a/tests/test_element.py
+++ b/tests/test_element.py
@@ -408,12 +408,26 @@ def test_get_content(self):
'{{id_00006}} {{id_00007}} k{{id_00008}} l')
self.assertEqual(content, self.element.get_content())
self.assertEqual(9, len(self.element.reserve_elements))
+ self.assertEqual(
+ '', self.element.reserve_elements[0])
+ self.assertEqual(
+ '', self.element.reserve_elements[1])
+ self.assertEqual(
+ '', self.element.reserve_elements[2])
+ self.assertEqual(
+ '', self.element.reserve_elements[3])
+ self.assertEqual(
+ '', self.element.reserve_elements[4])
+ self.assertEqual(
+ '',
+ self.element.reserve_elements[5])
+ self.assertEqual(
+ '', self.element.reserve_elements[6])
+ self.assertEqual(
+ 'App\Http
', self.element.reserve_elements[7])
+ self.assertEqual('[1]', self.element.reserve_elements[8])
- for element in self.element.reserve_elements:
- with self.subTest(element=element):
- self.assertIsNone(element.tail)
-
- def test_get_content_with_sup_sub(self):
+ def test_get_content_with_sub_sup(self):
xhtml = etree.XML(rb"""
@@ -433,10 +447,10 @@ def test_get_content_with_sup_sub(self):
content = (
'a{{id_00000}} b{{id_00001}} x cx {{id_00002}} d{{id_00003}} x')
self.assertEqual(content, element.get_content())
- self.assertEqual('a', get_name(element.reserve_elements[0]))
- self.assertEqual('sup', get_name(element.reserve_elements[1]))
- self.assertEqual('sup', get_name(element.reserve_elements[2]))
- self.assertEqual('sup', get_name(element.reserve_elements[3]))
+ self.assertEqual('[1]', element.reserve_elements[0])
+ self.assertEqual('[1]', element.reserve_elements[1])
+ self.assertEqual('[2]', element.reserve_elements[2])
+ self.assertEqual('[3]', element.reserve_elements[3])
def test_get_attributes(self):
self.assertEqual('{"class": "abc"}', self.element.get_attributes())
@@ -505,7 +519,7 @@ def test_add_translation_with_placeholder(self):
self.element.add_translation(translation)
translation = (
- '
'
+ '
'
' Aaaaa '
'Bbbbb C D E '
'F G H '
@@ -530,7 +544,7 @@ def test_add_translation_with_markup(self):
self.element.add_translation(translation)
translation = (
- '
'
+ '
'
' Aaaaa '
'Bbbbb C D E '
'F G H '
@@ -561,7 +575,8 @@ def test_add_translation_with_linefeeds(self):
elements = xhtml.findall('.//x:p', namespaces=ns)
self.assertEqual(2, len(elements))
self.assertEqual(
- '
A
B
C
',
+ ''
+ 'A
B
C
',
get_string(elements[1]))
def test_add_translation_below(self):
@@ -672,35 +687,53 @@ def test_add_translation_line_break_below(self):
Test Document
-
- a[1] b c[2]
- d e
- f g
h
i
j k l
- m
-
+ a
+ a
b
+ a
b
+ a
b
+ a
b
+
a
b
+
a
b
+ a
b
c
de
""")
-
- element = PageElement(xhtml.find('.//x:p', namespaces=ns), 'p1')
- element.reserve_pattern = create_xpath(('sup', 'img', 'br'))
- element.placeholder = Base.placeholder
- element.position = 'below'
- element.get_content()
- element.add_translation(
- 'A{{id_00000}} B C{{id_00001}}{{id_00002}} D E {{id_00003}}'
- '{{id_00004}}{{id_00005}} F G{{id_00006}}H{{id_00007}}I'
- '{{id_00008}}J K L M{{id_00009}}')
+ translations = [
+ 'A{{id_00000}}',
+ 'A{{id_00000}}B',
+ 'A{{id_00000}}{{id_00001}}{{id_00002}}B',
+ 'A{{id_00000}}B{{id_00001}}',
+ 'A{{id_00000}}B{{id_00001}}{{id_00002}}{{id_00003}}',
+ '{{id_00000}}A{{id_00001}}B',
+ '{{id_00000}}{{id_00001}}{{id_00002}}A{{id_00003}}B',
+ 'A{{id_00000}}B{{id_00001}}C{{id_00002}}{{id_00003}}D E']
+ for element in xhtml.findall('.//x:p', namespaces=ns):
+ element = PageElement(element, 'p1')
+ element.reserve_pattern = create_xpath(('sup', 'img', 'br'))
+ element.placeholder = Base.placeholder
+ element.position = 'below'
+ element.get_content()
+ element.add_translation(translations.pop(0))
translation = (
- ' '
- 'Test Document '
- ' a[1] b c[2]
'
- 'A[1] B C[2]
'
- ' d e
'
- ' D E
'
- 'f g
F G
'
- 'h
H
i
I
'
- 'j k l m
J K L M
'
+ ' '
+ 'Test Document '
+ 'a
A
'
+ 'a
A
'
+ 'b
B
'
+ 'a
A
'
+ 'b
B
'
+ 'a
A
b
'
+ 'B
'
+ 'a
A
b
'
+ 'B
'
+ '
a
A
b
'
+ 'B
'
+ '
a
A
b
'
+ 'B
'
+ 'a
A
b
'
+ 'B
c
'
+ 'C
de
'
+ 'D E
'
' ')
self.assertEqual(translation, get_string(xhtml))
@@ -710,34 +743,55 @@ def test_add_translation_line_break_above(self):
Test Document
-
- a[1] b c[2]
- d e f
- g h
i
j
k l
-
+ a
+ a
b
+ a
b
+ a
b
+ a
b
+
a
b
+
a
b
+ a
b
c
de
""")
-
- element = PageElement(xhtml.find('.//x:p', namespaces=ns), 'p1')
- element.reserve_pattern = create_xpath(('sup', 'img', 'br'))
- element.placeholder = Base.placeholder
- element.position = 'above'
- element.get_content()
- element.add_translation(
- 'A{{id_00000}} B C{{id_00001}}{{id_00002}} D E {{id_00003}} F'
- '{{id_00004}}{{id_00005}} G H{{id_00006}}I{{id_00007}}J'
- '{{id_00008}}K L{{id_00009}}')
+ translations = [
+ 'A{{id_00000}}',
+ 'A{{id_00000}}B',
+ 'A{{id_00000}}{{id_00001}}{{id_00002}}B',
+ 'A{{id_00000}}B{{id_00001}}',
+ 'A{{id_00000}}B{{id_00001}}{{id_00002}}{{id_00003}}',
+ '{{id_00000}}A{{id_00001}}B',
+ '{{id_00000}}{{id_00001}}{{id_00002}}A{{id_00003}}B',
+ 'A{{id_00000}}{{id_00001}}B{{id_00002}}C{{id_00003}}D E'
+ ]
+ for element in xhtml.findall('.//x:p', namespaces=ns):
+ element = PageElement(element, 'p1')
+ element.reserve_pattern = create_xpath(('sup', 'img', 'br'))
+ element.placeholder = Base.placeholder
+ element.position = 'above'
+ element.get_content()
+ element.add_translation(translations.pop(0))
translation = (
- ' '
- 'Test Document '
- 'A[1] B C[2]
'
- ' a[1] b c[2]
'
- ' D E F
d e '
- ' f
G H
'
- ' g h
I
'
- 'i
J
j
K L
'
- 'k l
')
+ ' '
+ 'Test Document '
+ 'A
a
'
+ 'A
a
'
+ 'B
b
'
+ 'A
a
'
+ 'B
b
'
+ 'A
a
'
+ 'B
b
'
+ 'A
a
'
+ 'B
b
'
+ '
A
a
'
+ 'B
b
'
+ '
A
a
'
+ 'B
b
'
+ 'A
'
+ 'a
B
'
+ 'b
C
c
'
+ 'D E
de
'
+ ' ')
self.assertEqual(translation, get_string(xhtml))
def test_add_translation_attr(self):
@@ -1426,7 +1480,8 @@ def test_add_translations_merge_separator(self):
self.assertEqual('b', elements[1].text)
self.assertEqual('c', elements[3].text)
self.assertEqual(
- 'A B
C
', get_string(elements[4], True))
+ 'A B
C
',
+ get_string(elements[4], True))
def test_add_translations_merge_separator_multiple(self):
self.handler.merge_length = 2
@@ -1466,7 +1521,8 @@ def test_add_translations_merge_placeholder_missing_id(self):
self.assertEqual('c', elements[3].text)
self.assertEqual(
- 'A B
C
', get_string(elements[4], True))
+ 'A B
C
',
+ get_string(elements[4], True))
def test_add_translations_merge_placeholder_missing_newline(self):
self.handler.separator = Base.separator
@@ -1484,7 +1540,8 @@ def test_add_translations_merge_placeholder_missing_newline(self):
self.assertEqual('c', elements[3].text)
self.assertEqual(
- 'A B
C
', get_string(elements[4], True))
+ 'A B
C
',
+ get_string(elements[4], True))
def test_add_translations_merge_palceholder_only(self):
self.handler.position = 'only'
@@ -1532,7 +1589,7 @@ def test_add_translations_merge_placeholder_only_missing_id(self):
elements = self.xhtml.findall('./x:body/*', namespaces=ns)
self.assertEqual(5, len(elements))
self.assertEqual(
- 'A B
C
',
+ 'A B
C
',
get_string(elements[-2], True))
def test_add_translations_merge_separator_only_missing_id(self):
@@ -1549,5 +1606,5 @@ def test_add_translations_merge_separator_only_missing_id(self):
self.assertEqual(5, len(elements))
self.assertEqual(
- 'A B
C
',
+ 'A B
C
',
get_string(elements[-2], True))