From 96c1910688159a37f5c4aed8713eac6f9229db7a Mon Sep 17 00:00:00 2001 From: Armin Rigo Date: Mon, 12 Nov 2018 17:55:33 +0100 Subject: [PATCH] #2912 Rewrite PyUnicode_FromEncodedObject() to use CPython's logic (still needs to be forward-ported to py3.5) --- pypy/module/cpyext/test/test_unicodeobject.py | 2 ++ pypy/module/cpyext/unicodeobject.py | 30 ++++++------------- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py index befc4dc33fd..6a0916d59a5 100644 --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -421,6 +421,8 @@ def test_decode_null_encoding(self, space): with raises_w(space, TypeError): PyUnicode_FromEncodedObject( space, space.wrap(u_text), null_charp, None) + assert space.unicode_w(PyUnicode_FromEncodedObject( + space, space.wrap(s_text), null_charp, None)) == u_text rffi.free_charp(b_text) def test_mbcs(self, space): diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py index 5f2696707f5..a1af21d06e9 100644 --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -366,10 +366,14 @@ def PyUnicode_Decode(space, s, size, encoding, errors): in the unicode() built-in function. The codec to be used is looked up using the Python codec registry. Return NULL if an exception was raised by the codec.""" + return _pyunicode_decode(space, rffi.charpsize2str(s, size), + encoding, errors) + +def _pyunicode_decode(space, s, encoding, errors): if not encoding: # This tracks CPython 2.7, in CPython 3.4 'utf-8' is hardcoded instead encoding = PyUnicode_GetDefaultEncoding(space) - w_str = space.newbytes(rffi.charpsize2str(s, size)) + w_str = space.newbytes(s) w_encoding = space.newtext(rffi.charp2str(encoding)) if errors: w_errors = space.newbytes(rffi.charp2str(errors)) @@ -398,28 +402,12 @@ def PyUnicode_FromEncodedObject(space, w_obj, encoding, errors): All other objects, including Unicode objects, cause a TypeError to be set.""" - if not encoding: - raise oefmt(space.w_TypeError, "decoding Unicode is not supported") - w_encoding = space.newtext(rffi.charp2str(encoding)) - if errors: - w_errors = space.newtext(rffi.charp2str(errors)) - else: - w_errors = None - - # - unicode is disallowed - # - raise TypeError for non-string types if space.isinstance_w(w_obj, space.w_unicode): - w_meth = None - else: - try: - w_meth = space.getattr(w_obj, space.newtext('decode')) - except OperationError as e: - if not e.match(space, space.w_AttributeError): - raise - w_meth = None - if w_meth is None: raise oefmt(space.w_TypeError, "decoding Unicode is not supported") - return space.call_function(w_meth, w_encoding, w_errors) + if space.isinstance_w(w_obj, space.w_bytearray): # Python 2.x specific + raise oefmt(space.w_TypeError, "decoding bytearray is not supported") + s = space.bufferstr_w(w_obj) + return _pyunicode_decode(space, s, encoding, errors) @cpython_api([CONST_STRING], PyObject) def PyUnicode_FromString(space, s):