From b5f9d232c401d8fba8710778ad85b29a41ec3924 Mon Sep 17 00:00:00 2001 From: Joshua Oreman Date: Fri, 15 Dec 2023 17:37:16 -0700 Subject: [PATCH] Restore stack introspection ability on 3.12 --- CHANGES.rst | 15 ++- docs/api.rst | 25 ++++- src/greenlet/TGreenlet.cpp | 103 +++++++++++++++++++++ src/greenlet/TPythonState.cpp | 36 ++++--- src/greenlet/TStackState.cpp | 31 +++++++ src/greenlet/__init__.py | 2 +- src/greenlet/greenlet.cpp | 33 +++++++ src/greenlet/greenlet_greenlet.hpp | 73 ++++++++++++++- src/greenlet/tests/_test_extension_cpp.cpp | 15 +++ src/greenlet/tests/test_greenlet.py | 72 ++++++++++++-- 10 files changed, 379 insertions(+), 26 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 70439119..1c5e145b 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,11 +2,20 @@ Changes ========= -3.0.3 (unreleased) +3.1.0 (unreleased) ================== -- Nothing changed yet. - +- Python 3.12: Restore the full ability to walk the stack of a suspended + greenlet; previously only the innermost frame was exposed. + For performance reasons, there are still some restrictions relative to + older Python versions; in particular, by default it is only valid to walk + a suspended greenlet's stack in between a ``gr_frame`` access and the next + resumption of the greenlet. A more flexible mode can be enabled by setting + the greenlet's new ``gr_frames_always_exposed`` attribute to True. If you + get a Python interpreter crash on 3.12+ when accessing the ``f_back`` of a + suspended greenlet frame, you're probably accessing it in a way that + requires you to set this attribute. See `issue 388 + `_. 3.0.2 (2023-12-08) ================== diff --git a/docs/api.rst b/docs/api.rst index ab5df550..ebb64a95 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -32,7 +32,6 @@ Greenlets .. autoattribute:: gr_context - The :class:`contextvars.Context` in which ``g`` will run. Writable; defaults to ``None``, reflecting that a greenlet starts execution in an empty context unless told otherwise. @@ -56,6 +55,30 @@ Greenlets for suspended greenlets; it is None if the greenlet is dead, not yet started, or currently executing. + .. warning:: Greenlet stack introspection is fragile on CPython 3.12 + and later. The frame objects of a suspended greenlet are not safe + to access as-is, but must be adjusted by the greenlet package in + order to make traversing ``f_back`` links not crash the interpreter, + and restored to their original state when resuming the greenlet. + This is all handled transparently as long as you obtain references + to greenlet frames only via the ``gr_frame`` attribute and you finish + accessing them before the greenlet next resumes. If you obtain + frames in other ways, or hold onto them across their greenlet's + resumption, you must set the ``gr_frames_always_exposed`` attribute + in order to make that safe. + + .. autoattribute:: gr_frames_always_exposed + + Writable boolean indicating whether this greenlet will take extra action, + each time it is suspended, to ensure that its frame objects are always + safe to access. Normally such action is only taken when an access + to the ``gr_frame`` attribute occurs, which means you can only safely + walk a greenlet's stack in between accessing ``gr_frame`` and resuming + the greenlet. This is relevant only on CPython 3.12 and later; earlier + versions still permit writing the attribute, but because their frame + objects are safe to access regardless, such writes have no effect and + the attribute always reads as true. + .. autoattribute:: parent The parent greenlet. This is writable, but it is not allowed to create diff --git a/src/greenlet/TGreenlet.cpp b/src/greenlet/TGreenlet.cpp index 60acab33..cb8fe0d4 100644 --- a/src/greenlet/TGreenlet.cpp +++ b/src/greenlet/TGreenlet.cpp @@ -168,6 +168,11 @@ Greenlet::g_switchstack(void) current->exception_state << tstate; this->python_state.will_switch_from(tstate); switching_thread_state = this; +#if GREENLET_PY312 + if (current->python_state.expose_frames_on_every_suspension) { + current->expose_frames(); + } +#endif } assert(this->args() || PyErr_Occurred()); // If this is the first switch into a greenlet, this will @@ -606,5 +611,103 @@ bool Greenlet::is_currently_running_in_some_thread() const return this->stack_state.active() && !this->python_state.top_frame(); } +#if GREENLET_PY312 +void GREENLET_NOINLINE(Greenlet::expose_frames)() +{ + if (!this->python_state.frame_exposure_needs_stack_rewrite()) { + return; // nothing to do + } + + _PyInterpreterFrame* last_complete_iframe = nullptr; + _PyInterpreterFrame* iframe = this->python_state.top_frame()->f_frame; + while (iframe) { + // We must make a copy before looking at the iframe contents, + // since iframe might point to a portion of the greenlet's C stack + // that was spilled when switching greenlets. + _PyInterpreterFrame iframe_copy; + this->stack_state.copy_from_stack(&iframe_copy, iframe, sizeof(*iframe)); + if (!_PyFrame_IsIncomplete(&iframe_copy)) { + // If the iframe were OWNED_BY_CSTACK then it would always be + // incomplete. Since it's not incomplete, it's not on the C stack + // and we can access it through the original `iframe` pointer + // directly. This is important since GetFrameObject might + // lazily _create_ the frame object and we don't want the + // interpreter to lose track of it. + assert(iframe_copy.owner != FRAME_OWNED_BY_CSTACK); + + // We really want to just write: + // PyFrameObject* frame = _PyFrame_GetFrameObject(iframe); + // but _PyFrame_GetFrameObject calls _PyFrame_MakeAndSetFrameObject + // which is not a visible symbol in libpython. The easiest + // way to get a public function to call it is using + // PyFrame_GetBack, which is defined as follows: + // assert(frame != NULL); + // assert(!_PyFrame_IsIncomplete(frame->f_frame)); + // PyFrameObject *back = frame->f_back; + // if (back == NULL) { + // _PyInterpreterFrame *prev = frame->f_frame->previous; + // prev = _PyFrame_GetFirstComplete(prev); + // if (prev) { + // back = _PyFrame_GetFrameObject(prev); + // } + // } + // return (PyFrameObject*)Py_XNewRef(back); + if (!iframe->frame_obj) { + PyFrameObject dummy_frame; + _PyInterpreterFrame dummy_iframe; + dummy_frame.f_back = nullptr; + dummy_frame.f_frame = &dummy_iframe; + // force the iframe to be considered complete without + // needing to check its code object: + dummy_iframe.owner = FRAME_OWNED_BY_GENERATOR; + dummy_iframe.previous = iframe; + assert(!_PyFrame_IsIncomplete(&dummy_iframe)); + // Drop the returned reference immediately; the iframe + // continues to hold a strong reference + Py_XDECREF(PyFrame_GetBack(&dummy_frame)); + assert(iframe->frame_obj); + } + + // This is a complete frame, so make the last one of those we saw + // point at it, bypassing any incomplete frames (which may have + // been on the C stack) in between the two. We're overwriting + // last_complete_iframe->previous and need that to be reversible, + // so we store the original previous ptr in the frame object + // (which we must have created on a previous iteration through + // this loop). The frame object has a bunch of storage that is + // only used when its iframe is OWNED_BY_FRAME_OBJECT, which only + // occurs when the frame object outlives the frame's execution, + // which can't have happened yet because the frame is currently + // executing as far as the interpreter is concerned. So, we can + // reuse it for our own purposes. + assert(iframe->owner == FRAME_OWNED_BY_THREAD || + iframe->owner == FRAME_OWNED_BY_GENERATOR); + if (last_complete_iframe) { + assert(last_complete_iframe->frame_obj); + memcpy(&last_complete_iframe->frame_obj->_f_frame_data[0], + &last_complete_iframe->previous, sizeof(void *)); + last_complete_iframe->previous = iframe; + } + last_complete_iframe = iframe; + } + // Frames that are OWNED_BY_FRAME_OBJECT are linked via the + // frame's f_back while all others are linked via the iframe's + // previous ptr. Since all the frames we traverse are running + // as far as the interpreter is concerned, we don't have to + // worry about the OWNED_BY_FRAME_OBJECT case. + iframe = iframe_copy.previous; + } + + // Give the outermost complete iframe a null previous pointer to + // account for any potential incomplete/C-stack iframes between it + // and the actual top-of-stack + if (last_complete_iframe) { + assert(last_complete_iframe->frame_obj); + memcpy(&last_complete_iframe->frame_obj->_f_frame_data[0], + &last_complete_iframe->previous, sizeof(void *)); + last_complete_iframe->previous = nullptr; + } +} +#endif }; // namespace greenlet diff --git a/src/greenlet/TPythonState.cpp b/src/greenlet/TPythonState.cpp index 9d57155c..d217a634 100644 --- a/src/greenlet/TPythonState.cpp +++ b/src/greenlet/TPythonState.cpp @@ -26,7 +26,8 @@ PythonState::PythonState() ,datastack_limit(nullptr) #endif #if GREENLET_PY312 - ,_prev_frame(nullptr) + ,frames_were_exposed(false) + ,expose_frames_on_every_suspension(false) #endif { #if GREENLET_USE_CFRAME @@ -146,12 +147,6 @@ void PythonState::operator<<(const PyThreadState *const tstate) noexcept Py_XDECREF(frame); // PyThreadState_GetFrame gives us a new // reference. this->_top_frame.steal(frame); - #if GREENLET_PY312 - if (frame) { - this->_prev_frame = frame->f_frame->previous; - frame->f_frame->previous = nullptr; - } - #endif #if GREENLET_PY312 this->trash_delete_nesting = tstate->trash.delete_nesting; #else // not 312 @@ -164,6 +159,25 @@ void PythonState::operator<<(const PyThreadState *const tstate) noexcept #endif // GREENLET_PY311 } +#if GREENLET_PY312 +void GREENLET_NOINLINE(PythonState::unexpose_frames)() +{ + if (!this->frames_were_exposed) { + return; + } + // See GreenletState::expose_frames() and the comment on frames_were_exposed + // for more information about this logic. + for (_PyInterpreterFrame *iframe = this->_top_frame->f_frame; + iframe != nullptr; ) { + _PyInterpreterFrame *prev_exposed = iframe->previous; + assert(iframe->frame_obj); + memcpy(&iframe->previous, &iframe->frame_obj->_f_frame_data[0], + sizeof(void *)); + iframe = prev_exposed; + } + this->frames_were_exposed = false; +} +#endif void PythonState::operator>>(PyThreadState *const tstate) noexcept { @@ -187,13 +201,9 @@ void PythonState::operator>>(PyThreadState *const tstate) noexcept #if GREENLET_PY312 tstate->py_recursion_remaining = tstate->py_recursion_limit - this->py_recursion_depth; tstate->c_recursion_remaining = C_RECURSION_LIMIT - this->c_recursion_depth; - // We're just going to throw this object away anyway, go ahead and - // do it now. - PyFrameObject* frame = this->_top_frame.relinquish_ownership(); - if (frame && frame->f_frame) { - frame->f_frame->previous = this->_prev_frame; + if (this->frames_were_exposed) { + this->unexpose_frames(); } - this->_prev_frame = nullptr; #else // \/ 3.11 tstate->recursion_remaining = tstate->recursion_limit - this->recursion_depth; #endif // GREENLET_PY312 diff --git a/src/greenlet/TStackState.cpp b/src/greenlet/TStackState.cpp index 5ce1ea19..8047cef5 100644 --- a/src/greenlet/TStackState.cpp +++ b/src/greenlet/TStackState.cpp @@ -226,6 +226,37 @@ StackState::~StackState() } } +void StackState::copy_from_stack(void* vdest, const void* vsrc, size_t n) const +{ + char* dest = static_cast(vdest); + const char* src = static_cast(vsrc); + if (src + n <= _stack_start || src >= _stack_start + _stack_saved || + _stack_saved == 0) { + // Nothing we're copying was spilled from the stack + memcpy(dest, src, n); + return; + } + if (src < _stack_start) { + // Copy the part before the saved stack. + // We know src + n > _stack_start due to the test above. + size_t nbefore = _stack_start - src; + memcpy(dest, src, nbefore); + dest += nbefore; + src += nbefore; + n -= nbefore; + } + // We know src >= _stack_start after the before-copy, and + // src < _stack_start + _stack_saved due to the first if condition + size_t nspilled = std::min(n, _stack_start + _stack_saved - src); + memcpy(dest, stack_copy + (src - _stack_start), nspilled); + dest += nspilled; + src += nspilled; + n -= nspilled; + if (n > 0) { + // Copy the part after the saved stack + memcpy(dest, src, n); + } +} }; // namespace greenlet diff --git a/src/greenlet/__init__.py b/src/greenlet/__init__.py index 12a4c4fd..c44b15e8 100644 --- a/src/greenlet/__init__.py +++ b/src/greenlet/__init__.py @@ -25,7 +25,7 @@ ### # Metadata ### -__version__ = '3.0.3.dev0' +__version__ = '3.1.0.dev0' from ._greenlet import _C_API # pylint:disable=no-name-in-module ### diff --git a/src/greenlet/greenlet.cpp b/src/greenlet/greenlet.cpp index 50574980..e26552ed 100644 --- a/src/greenlet/greenlet.cpp +++ b/src/greenlet/greenlet.cpp @@ -758,10 +758,39 @@ green_setcontext(BorrowedGreenlet self, PyObject* nctx, void* UNUSED(context)) static PyObject* green_getframe(BorrowedGreenlet self, void* UNUSED(context)) { +#if GREENLET_PY312 + self->expose_frames(); +#endif const PythonState::OwnedFrame& top_frame = self->top_frame(); return top_frame.acquire_or_None(); } +static PyObject* +green_getframeexposed(BorrowedGreenlet self, void* UNUSED(context)) +{ +#if GREENLET_PY312 + if (!self->expose_frames_on_every_suspension()) { + Py_RETURN_FALSE; + } +#endif + Py_RETURN_TRUE; +} + +static int +green_setframeexposed(BorrowedGreenlet self, PyObject* val, void* UNUSED(context)) +{ + if (val != Py_True && val != Py_False) { + PyErr_Format(PyExc_TypeError, + "expected a bool, not '%s'", + Py_TYPE(val)->tp_name); + return -1; + } +#if GREENLET_PY312 + self->set_expose_frames_on_every_suspension(val == Py_True); +#endif + return 0; +} + static PyObject* green_getstate(PyGreenlet* self) { @@ -979,6 +1008,10 @@ static PyGetSetDef green_getsets[] = { {"run", (getter)green_getrun, (setter)green_setrun, /*XXX*/ NULL}, {"parent", (getter)green_getparent, (setter)green_setparent, /*XXX*/ NULL}, {"gr_frame", (getter)green_getframe, NULL, /*XXX*/ NULL}, + {"gr_frames_always_exposed", + (getter)green_getframeexposed, + (setter)green_setframeexposed, + /*XXX*/ NULL}, {"gr_context", (getter)green_getcontext, (setter)green_setcontext, diff --git a/src/greenlet/greenlet_greenlet.hpp b/src/greenlet/greenlet_greenlet.hpp index 2c44130d..03417cf6 100644 --- a/src/greenlet/greenlet_greenlet.hpp +++ b/src/greenlet/greenlet_greenlet.hpp @@ -118,10 +118,26 @@ namespace greenlet PyObject** datastack_limit; #endif #if GREENLET_PY312 - _PyInterpreterFrame* _prev_frame; + // The PyInterpreterFrame list on 3.12+ contains some entries that are + // on the C stack, which can't be directly accessed while a greenlet is + // suspended. In order to keep greenlet gr_frame introspection working, + // we hook the gr_frame accessor to rewrite the interpreter frame list + // to skip these C-stack frames; we call this "exposing" the greenlet's + // frames because it makes them valid to work with in Python. Then when + // the greenlet is resumed we need to remember to reverse the operation + // we did. The C-stack frames are "entry frames" which are a low-level + // interpreter detail; they're not needed for introspection, but do + // need to be present for the eval loop to work. + bool frames_were_exposed; + void unexpose_frames(); #endif public: +#if GREENLET_PY312 + // State used by Greenlet class, stored here to improve packing + bool expose_frames_on_every_suspension; +#endif + PythonState(); // You can use this for testing whether we have a frame // or not. It returns const so they can't modify it. @@ -137,9 +153,24 @@ namespace greenlet #if GREENLET_USE_CFRAME void set_new_cframe(_PyCFrame& frame) noexcept; #endif + inline void may_switch_away() noexcept; inline void will_switch_from(PyThreadState *const origin_tstate) noexcept; void did_finish(PyThreadState* tstate) noexcept; + +#if GREENLET_PY312 + // Called when we're about to make the stack of a suspended greenlet + // visible to Python code. If it returns true, the stack must be fixed + // up first using the logic in Greenlet::expose_frames(). + bool frame_exposure_needs_stack_rewrite() noexcept + { + if (!top_frame() || frames_were_exposed) { + return false; + } + frames_were_exposed = true; + return true; + } +#endif }; class StackState @@ -186,6 +217,14 @@ namespace greenlet #ifdef GREENLET_USE_STDIO friend std::ostream& operator<<(std::ostream& os, const StackState& s); #endif + + // Fill in [dest, dest + n) with the values that would be at + // [src, src + n) while this greenlet is running. This is like memcpy + // except that if the greenlet is suspended it accounts for the portion + // of the greenlet's stack that was spilled to the heap. `src` may + // be on this greenlet's stack, or on the heap, but not on a different + // greenlet's stack. + void copy_from_stack(void* dest, const void* src, size_t n) const; }; #ifdef GREENLET_USE_STDIO std::ostream& operator<<(std::ostream& os, const StackState& s); @@ -377,6 +416,38 @@ namespace greenlet // was running in was known to have exited. void deallocing_greenlet_in_thread(const ThreadState* current_state); +#if GREENLET_PY312 + // Must be called on 3.12+ before exposing a suspended greenlet's + // frames to user code. This synthesizes a frame object if necessary + // for each interpreter frame on the greenlet's stack, and stitches + // together their f_back pointers so that Python accesses to f_back + // don't need to walk the interpreter frame stack. This is important + // because the interpreter frame stack contains some entries that are + // on the C stack and the interpreter will crash when trying to + // traverse those for a suspended greenlet. The f_back pointer work + // must be undone before resuming the greenlet because the interpreter + // assumes f_back is nullptr for running frames. This is tracked via + // PythonState::frames_were_exposed. + void expose_frames(); + + // To support code that exposes greenlet frames other than through + // gr_frame, it is possible to tell a greenlet to expose its frames + // every time it suspends itself. This will harm performance and + // should only be used if necessary. Attached to the Python attribute + // 'gr_frames_always_exposed'. + inline bool expose_frames_on_every_suspension() const noexcept + { + return this->python_state.expose_frames_on_every_suspension; + } + inline void set_expose_frames_on_every_suspension(bool value) noexcept + { + this->python_state.expose_frames_on_every_suspension = value; + if (value) { + expose_frames(); + } + } +#endif + // TODO: Figure out how to make these non-public. inline void slp_restore_state() noexcept; inline int slp_save_state(char *const stackref) noexcept; diff --git a/src/greenlet/tests/_test_extension_cpp.cpp b/src/greenlet/tests/_test_extension_cpp.cpp index 1c49160a..5cbe6a76 100644 --- a/src/greenlet/tests/_test_extension_cpp.cpp +++ b/src/greenlet/tests/_test_extension_cpp.cpp @@ -100,6 +100,15 @@ py_test_exception_throw_std(PyObject* self, PyObject* args) return NULL; } +static PyObject* +py_test_call(PyObject* self, PyObject* arg) +{ + PyObject* noargs = PyTuple_New(0); + PyObject* ret = PyObject_Call(arg, noargs, nullptr); + Py_DECREF(noargs); + return ret; +} + /* test_exception_switch_and_do_in_g2(g2func) @@ -173,6 +182,12 @@ static PyMethodDef test_methods[] = { METH_VARARGS, "Throws standard C++ exception. Calling this function directly should abort the process." }, + {"test_call", + (PyCFunction)&py_test_call, + METH_O, + "Call the given callable. Unlike calling it directly, this creates a " + "new C-level stack frame, which may be helpful in testing." + }, {NULL, NULL, 0, NULL} }; diff --git a/src/greenlet/tests/test_greenlet.py b/src/greenlet/tests/test_greenlet.py index 0adcc1ee..cf57c903 100644 --- a/src/greenlet/tests/test_greenlet.py +++ b/src/greenlet/tests/test_greenlet.py @@ -840,23 +840,81 @@ def test_switch_to_dead_greenlet_reparent(self): self.assertEqual(seen, [42, 24]) def test_can_access_f_back_of_suspended_greenlet(self): - # On Python 3.12, they added a ->previous field to - # _PyInterpreterFrame that has to be cleared when a frame is inactive. - # If we got that wrong, this immediately crashes. + # This tests our frame rewriting to work around Python 3.12+ having + # some interpreter frames on the C stack. It will crash in the absence + # of that logic. main = greenlet.getcurrent() - def Hub(): - main.switch() + def outer(): + inner() - hub = RawGreenlet(Hub) + def inner(): + main.switch(sys._getframe(0)) + + hub = RawGreenlet(outer) # start it hub.switch() + + # start another greenlet to make sure we aren't relying on + # anything in `hub` still being on the C stack + unrelated = RawGreenlet(lambda: None) + unrelated.switch() + # now it is suspended self.assertIsNotNone(hub.gr_frame) + self.assertEqual(hub.gr_frame.f_code.co_name, "inner") + self.assertIsNotNone(hub.gr_frame.f_back) + self.assertEqual(hub.gr_frame.f_back.f_code.co_name, "outer") # The next line is what would crash - self.assertIsNone(hub.gr_frame.f_back) + self.assertIsNone(hub.gr_frame.f_back.f_back) + + def test_get_stack_with_nested_c_calls(self): + from functools import partial + from . import _test_extension_cpp + + def recurse(v): + if v > 0: + return v * _test_extension_cpp.test_call(partial(recurse, v - 1)) + else: + return greenlet.getcurrent().parent.switch() + + gr = RawGreenlet(recurse) + gr.switch(5) + frame = gr.gr_frame + for i in range(5): + self.assertEqual(frame.f_locals["v"], i) + frame = frame.f_back + self.assertEqual(frame.f_locals["v"], 5) + self.assertIsNone(frame.f_back) + self.assertEqual(gr.switch(10), 1200) # 1200 = 5! * 10 + + def test_frames_always_exposed(self): + # On Python 3.12 this will crash if we don't set the + # gr_frames_always_exposed attribute. More background: + # https://github.com/python-greenlet/greenlet/issues/388 + main = greenlet.getcurrent() + + def outer(): + inner(sys._getframe(0)) + + def inner(frame): + main.switch(frame) + + gr = RawGreenlet(outer) + frame = gr.switch() + + # It's fine to set always_exposed after the frame was obtained, + # as long as it's before the frame is inspected + gr.gr_frames_always_exposed = True + # Do something else to clobber the part of the C stack used by `gr`, + # so we can't skate by on "it just happened to still be there" + unrelated = RawGreenlet(lambda: None) + unrelated.switch() + self.assertEqual(frame.f_code.co_name, "outer") + # The next line crashes on 3.12 if you don't set always_exposed + self.assertIsNone(frame.f_back) class TestGreenletSetParentErrors(TestCase):