diff --git a/cobalt/akn.py b/cobalt/akn.py index 5b41b49..6c23904 100644 --- a/cobalt/akn.py +++ b/cobalt/akn.py @@ -70,13 +70,8 @@ class AkomaNtosoDocument: source = ["cobalt", "cobalt", "https://github.com/laws-africa/cobalt"] def __init__(self, xml=None): - # TODO: we can do this better - encoding = ENCODING_RE.search(xml, 0, 200) - if encoding: - # lxml doesn't like unicode strings with an encoding element, so - # change to bytes + if isinstance(xml, str): xml = xml.encode('utf-8') - self.parse(xml) self.maker = objectify.ElementMaker(annotate=False, namespace=self.namespace, nsmap=self.root.nsmap) diff --git a/tests/test_structured_document.py b/tests/test_structured_document.py index 4233acf..3717cff 100644 --- a/tests/test_structured_document.py +++ b/tests/test_structured_document.py @@ -194,6 +194,42 @@ def test_parser(self): """, a.document_type) + def test_unicode(self): + # string, no encoding + a = Act(""" + + + 😀 + + """) + self.assertEqual(a.root.xpath("//a:body", namespaces={'a': a.namespace})[0].text, "😀") + + # bytes, no encoding + a = Act(""" + + + 😀 + + """.encode('utf-8')) + self.assertEqual(a.root.xpath("//a:body", namespaces={'a': a.namespace})[0].text, "😀") + + # with encoding attribute, bytes + a = Act(""" + + + 😀 + + """.encode('utf-8')) + self.assertEqual(a.root.xpath("//a:body", namespaces={'a': a.namespace})[0].text, "😀") + + # with encoding string + Act(""" + + + 😀 + + """) + def test_add_number(self): """ When adding an FRBRnumber element to a document that doesn't already have one, it must come after subtype.