Skip to content
This repository has been archived by the owner on Jan 3, 2024. It is now read-only.

Commit

Permalink
#2912
Browse files Browse the repository at this point in the history
Rewrite PyUnicode_FromEncodedObject() to use CPython's logic
(still needs to be forward-ported to py3.5)
  • Loading branch information
arigo committed Nov 12, 2018
1 parent 107d077 commit 96c1910
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 21 deletions.
2 changes: 2 additions & 0 deletions pypy/module/cpyext/test/test_unicodeobject.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,8 @@ def test_decode_null_encoding(self, space):
with raises_w(space, TypeError):
PyUnicode_FromEncodedObject(
space, space.wrap(u_text), null_charp, None)
assert space.unicode_w(PyUnicode_FromEncodedObject(
space, space.wrap(s_text), null_charp, None)) == u_text
rffi.free_charp(b_text)

def test_mbcs(self, space):
Expand Down
30 changes: 9 additions & 21 deletions pypy/module/cpyext/unicodeobject.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,10 +366,14 @@ def PyUnicode_Decode(space, s, size, encoding, errors):
in the unicode() built-in function. The codec to be used is looked up
using the Python codec registry. Return NULL if an exception was raised by
the codec."""
return _pyunicode_decode(space, rffi.charpsize2str(s, size),
encoding, errors)

def _pyunicode_decode(space, s, encoding, errors):
if not encoding:
# This tracks CPython 2.7, in CPython 3.4 'utf-8' is hardcoded instead
encoding = PyUnicode_GetDefaultEncoding(space)
w_str = space.newbytes(rffi.charpsize2str(s, size))
w_str = space.newbytes(s)
w_encoding = space.newtext(rffi.charp2str(encoding))
if errors:
w_errors = space.newbytes(rffi.charp2str(errors))
Expand Down Expand Up @@ -398,28 +402,12 @@ def PyUnicode_FromEncodedObject(space, w_obj, encoding, errors):
All other objects, including Unicode objects, cause a TypeError to be
set."""
if not encoding:
raise oefmt(space.w_TypeError, "decoding Unicode is not supported")
w_encoding = space.newtext(rffi.charp2str(encoding))
if errors:
w_errors = space.newtext(rffi.charp2str(errors))
else:
w_errors = None

# - unicode is disallowed
# - raise TypeError for non-string types
if space.isinstance_w(w_obj, space.w_unicode):
w_meth = None
else:
try:
w_meth = space.getattr(w_obj, space.newtext('decode'))
except OperationError as e:
if not e.match(space, space.w_AttributeError):
raise
w_meth = None
if w_meth is None:
raise oefmt(space.w_TypeError, "decoding Unicode is not supported")
return space.call_function(w_meth, w_encoding, w_errors)
if space.isinstance_w(w_obj, space.w_bytearray): # Python 2.x specific
raise oefmt(space.w_TypeError, "decoding bytearray is not supported")
s = space.bufferstr_w(w_obj)
return _pyunicode_decode(space, s, encoding, errors)

@cpython_api([CONST_STRING], PyObject)
def PyUnicode_FromString(space, s):
Expand Down

0 comments on commit 96c1910

Please sign in to comment.