Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-127058: Make PySequence_Tuple safer and probably faster. #127758

Merged
merged 4 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Include/internal/pycore_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ typedef struct {
union _PyStackRef;

PyAPI_FUNC(PyObject *)_PyList_FromStackRefSteal(const union _PyStackRef *src, Py_ssize_t n);

PyAPI_FUNC(PyObject *)_PyList_AsTupleAndClear(PyListObject *v);

#ifdef __cplusplus
}
Expand Down
25 changes: 25 additions & 0 deletions Lib/test/test_capi/test_tuple.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
import sys
import gc
from collections import namedtuple
from test.support import import_helper

Expand Down Expand Up @@ -257,5 +258,29 @@ def test__tuple_resize(self):
self.assertRaises(SystemError, resize, [1, 2, 3], 0, False)
self.assertRaises(SystemError, resize, NULL, 0, False)

def test_bug_59313(self):
# Before 3.14, the C-API function PySequence_Tuple
# would create incomplete tuples which were visible to
# the cycle GC, and this test would crash the interpeter.
TAG = object()
tuples = []

def referrer_tuples():
return [x for x in gc.get_referrers(TAG)
if isinstance(x, tuple)]

def my_iter():
nonlocal tuples
yield TAG # 'tag' gets stored in the result tuple
tuples += referrer_tuples()
for x in range(10):
tuples += referrer_tuples()
# Prior to 3.13 would raise a SystemError when the tuple needs to be resized
yield x

self.assertEqual(tuple(my_iter()), (TAG, *range(10)))
self.assertEqual(tuples, [])


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
``PySequence_Tuple`` now creates the resulting tuple atomically, preventing
partially created tuples being visible to the garbage collector or through
``gc.get_referrers()``
87 changes: 40 additions & 47 deletions Objects/abstract.c
Original file line number Diff line number Diff line change
Expand Up @@ -1993,9 +1993,6 @@ PyObject *
PySequence_Tuple(PyObject *v)
{
PyObject *it; /* iter(v) */
Py_ssize_t n; /* guess for result tuple size */
PyObject *result = NULL;
Py_ssize_t j;

if (v == NULL) {
return null_error();
Expand All @@ -2017,58 +2014,54 @@ PySequence_Tuple(PyObject *v)
if (it == NULL)
return NULL;

/* Guess result size and allocate space. */
n = PyObject_LengthHint(v, 10);
if (n == -1)
goto Fail;
result = PyTuple_New(n);
if (result == NULL)
goto Fail;

/* Fill the tuple. */
for (j = 0; ; ++j) {
Py_ssize_t n;
PyObject *buffer[8];
for (n = 0; n < 8; n++) {
PyObject *item = PyIter_Next(it);
if (item == NULL) {
if (PyErr_Occurred())
goto Fail;
break;
}
if (j >= n) {
size_t newn = (size_t)n;
/* The over-allocation strategy can grow a bit faster
than for lists because unlike lists the
over-allocation isn't permanent -- we reclaim
the excess before the end of this routine.
So, grow by ten and then add 25%.
*/
newn += 10u;
newn += newn >> 2;
if (newn > PY_SSIZE_T_MAX) {
/* Check for overflow */
PyErr_NoMemory();
Py_DECREF(item);
goto Fail;
if (PyErr_Occurred()) {
goto fail;
}
n = (Py_ssize_t)newn;
if (_PyTuple_Resize(&result, n) != 0) {
Py_DECREF(item);
goto Fail;
Py_DECREF(it);
return _PyTuple_FromArraySteal(buffer, n);
}
buffer[n] = item;
}
PyListObject *list = (PyListObject *)PyList_New(16);
if (list == NULL) {
goto fail;
}
assert(n == 8);
Py_SET_SIZE(list, n);
for (Py_ssize_t j = 0; j < n; j++) {
PyList_SET_ITEM(list, j, buffer[j]);
}
for (;;) {
PyObject *item = PyIter_Next(it);
if (item == NULL) {
if (PyErr_Occurred()) {
Py_DECREF(list);
Py_DECREF(it);
return NULL;
}
break;
}
if (_PyList_AppendTakeRef(list, item) < 0) {
Py_DECREF(list);
Py_DECREF(it);
return NULL;
}
PyTuple_SET_ITEM(result, j, item);
}

/* Cut tuple back if guess was too large. */
if (j < n &&
_PyTuple_Resize(&result, j) != 0)
goto Fail;

Py_DECREF(it);
return result;

Fail:
Py_XDECREF(result);
PyObject *res = _PyList_AsTupleAndClear(list);
Py_DECREF(list);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the name _PyList_AsTupleAndClear might be confusing, because if you called Py_CLEAR on the list you wouldn't need to decref it.

Copy link
Member Author

@markshannon markshannon Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But if you called PyList_Clear on it, you would need to Py_DECREF it.
It is the list being cleared, not the variable holding a reference to it.

return res;
fail:
Py_DECREF(it);
while (n > 0) {
n--;
Py_DECREF(buffer[n]);
}
return NULL;
}

Expand Down
19 changes: 19 additions & 0 deletions Objects/listobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -3174,6 +3174,25 @@ PyList_AsTuple(PyObject *v)
return ret;
}

PyObject *
_PyList_AsTupleAndClear(PyListObject *self)
{
assert(self != NULL);
PyObject *ret;
if (self->ob_item == NULL) {
return PyTuple_New(0);
}
Py_BEGIN_CRITICAL_SECTION(self);
PyObject **items = self->ob_item;
Py_ssize_t size = Py_SIZE(self);
self->ob_item = NULL;
Py_SET_SIZE(self, 0);
ret = _PyTuple_FromArraySteal(items, size);
free_list_items(items, false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If _PyTuple_FromArraySteal, do we still want to free the list items?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. _PyTuple_FromArraySteal steals the references, but doesn't do anything with the memory

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typically it is passed a stack-allocated array, so it can't free it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I meant if _PyTuple_FromArraySteal fails.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. _PyTuple_FromArraySteal consumes the all references regardless of whether it succeeds or fails.
It would be a pain to use otherwise.

Py_END_CRITICAL_SECTION();
return ret;
}

PyObject *
_PyList_FromStackRefSteal(const _PyStackRef *src, Py_ssize_t n)
{
Expand Down
Loading