diff --git a/cobalt/akn.py b/cobalt/akn.py
index 5b41b49..6c23904 100644
--- a/cobalt/akn.py
+++ b/cobalt/akn.py
@@ -70,13 +70,8 @@ class AkomaNtosoDocument:
source = ["cobalt", "cobalt", "https://github.com/laws-africa/cobalt"]
def __init__(self, xml=None):
- # TODO: we can do this better
- encoding = ENCODING_RE.search(xml, 0, 200)
- if encoding:
- # lxml doesn't like unicode strings with an encoding element, so
- # change to bytes
+ if isinstance(xml, str):
xml = xml.encode('utf-8')
-
self.parse(xml)
self.maker = objectify.ElementMaker(annotate=False, namespace=self.namespace, nsmap=self.root.nsmap)
diff --git a/tests/test_structured_document.py b/tests/test_structured_document.py
index 4233acf..3717cff 100644
--- a/tests/test_structured_document.py
+++ b/tests/test_structured_document.py
@@ -194,6 +194,42 @@ def test_parser(self):
""", a.document_type)
+ def test_unicode(self):
+ # string, no encoding
+ a = Act("""
+
+
+ 😀
+
+ """)
+ self.assertEqual(a.root.xpath("//a:body", namespaces={'a': a.namespace})[0].text, "😀")
+
+ # bytes, no encoding
+ a = Act("""
+
+
+ 😀
+
+ """.encode('utf-8'))
+ self.assertEqual(a.root.xpath("//a:body", namespaces={'a': a.namespace})[0].text, "😀")
+
+ # with encoding attribute, bytes
+ a = Act("""
+
+
+ 😀
+
+ """.encode('utf-8'))
+ self.assertEqual(a.root.xpath("//a:body", namespaces={'a': a.namespace})[0].text, "😀")
+
+ # with encoding string
+ Act("""
+
+
+ 😀
+
+ """)
+
def test_add_number(self):
""" When adding an FRBRnumber element to a document that doesn't already have one, it
must come after subtype.