Skip to content

Commit

Permalink
pythongh-76785: Expand How Interpreter Queues Handle Interpreter Fina…
Browse files Browse the repository at this point in the history
…lization (pythongh-116431)

Any cross-interpreter mechanism for passing objects between interpreters must be very careful to respect isolation, even when the object is effectively immutable (e.g. int, str).  Here this especially relates to when an interpreter sends one of its objects, and then is destroyed while the inter-interpreter machinery (e.g. queue) still holds a reference to the object.

When I added interpreters.Queue, I dealt with that case (using an atexit hook) by silently removing all items from the queue that were added by the finalizing interpreter.

Later, while working on concurrent.futures.InterpreterPoolExecutor (pythongh-116430), I noticed it was somewhat surprising when items were silently removed from the queue when the originating interpreter was destroyed.  (See my comment on that PR.)
 It took me a little while to realize what was going on.  I expect that users, which much less context than I have, would experience the same pain.

My approach, here, to improving the situation is to give users three options:

1. return a singleton (interpreters.queues.UNBOUND) from Queue.get() in place of each removed item
2. raise an exception (interpreters.queues.ItemInterpreterDestroyed) from Queue.get() in place of each removed item
3. existing behavior: silently remove each item (i.e. Queue.get() skips each one)

The default will now be (1), but users can still explicitly opt in any of them, including to the silent removal behavior.

The behavior for each item may be set with the corresponding Queue.put() call. and a queue-wide default may be set when the queue is created.  (This is the same as I did for "synconly".)
(cherry picked from commit 6b98b27)

Co-authored-by: Eric Snow <[email protected]>
  • Loading branch information
ericsnowcurrently authored and miss-islington committed Jul 15, 2024
1 parent 308857b commit e0d6480
Show file tree
Hide file tree
Showing 3 changed files with 513 additions and 90 deletions.
141 changes: 128 additions & 13 deletions Lib/test/support/interpreters/queues.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
)

__all__ = [
'UNBOUND', 'UNBOUND_ERROR', 'UNBOUND_REMOVE',
'create', 'list_all',
'Queue',
'QueueError', 'QueueNotFoundError', 'QueueEmpty', 'QueueFull',
'ItemInterpreterDestroyed',
]


Expand All @@ -32,47 +34,119 @@ class QueueFull(QueueError, queue.Full):
"""


class ItemInterpreterDestroyed(QueueError):
"""Raised from get() and get_nowait()."""


_SHARED_ONLY = 0
_PICKLED = 1

def create(maxsize=0, *, syncobj=False):

class UnboundItem:
"""Represents a Queue item no longer bound to an interpreter.
An item is unbound when the interpreter that added it to the queue
is destroyed.
"""

__slots__ = ()

def __new__(cls):
return UNBOUND

def __repr__(self):
return f'interpreters.queues.UNBOUND'


UNBOUND = object.__new__(UnboundItem)
UNBOUND_ERROR = object()
UNBOUND_REMOVE = object()

_UNBOUND_CONSTANT_TO_FLAG = {
UNBOUND_REMOVE: 1,
UNBOUND_ERROR: 2,
UNBOUND: 3,
}
_UNBOUND_FLAG_TO_CONSTANT = {v: k
for k, v in _UNBOUND_CONSTANT_TO_FLAG.items()}

def _serialize_unbound(unbound):
op = unbound
try:
flag = _UNBOUND_CONSTANT_TO_FLAG[op]
except KeyError:
raise NotImplementedError(f'unsupported unbound replacement op {op!r}')
return flag,


def _resolve_unbound(flag):
try:
op = _UNBOUND_FLAG_TO_CONSTANT[flag]
except KeyError:
raise NotImplementedError(f'unsupported unbound replacement op {flag!r}')
if op is UNBOUND_REMOVE:
# "remove" not possible here
raise NotImplementedError
elif op is UNBOUND_ERROR:
raise ItemInterpreterDestroyed("item's original interpreter destroyed")
elif op is UNBOUND:
return UNBOUND
else:
raise NotImplementedError(repr(op))


def create(maxsize=0, *, syncobj=False, unbounditems=UNBOUND):
"""Return a new cross-interpreter queue.
The queue may be used to pass data safely between interpreters.
"syncobj" sets the default for Queue.put()
and Queue.put_nowait().
"unbounditems" likewise sets the default. See Queue.put() for
supported values. The default value is UNBOUND, which replaces
the unbound item.
"""
fmt = _SHARED_ONLY if syncobj else _PICKLED
qid = _queues.create(maxsize, fmt)
return Queue(qid, _fmt=fmt)
unbound = _serialize_unbound(unbounditems)
unboundop, = unbound
qid = _queues.create(maxsize, fmt, unboundop)
return Queue(qid, _fmt=fmt, _unbound=unbound)


def list_all():
"""Return a list of all open queues."""
return [Queue(qid, _fmt=fmt)
for qid, fmt in _queues.list_all()]
return [Queue(qid, _fmt=fmt, _unbound=(unboundop,))
for qid, fmt, unboundop in _queues.list_all()]


_known_queues = weakref.WeakValueDictionary()

class Queue:
"""A cross-interpreter queue."""

def __new__(cls, id, /, *, _fmt=None):
def __new__(cls, id, /, *, _fmt=None, _unbound=None):
# There is only one instance for any given ID.
if isinstance(id, int):
id = int(id)
else:
raise TypeError(f'id must be an int, got {id!r}')
if _fmt is None:
_fmt, = _queues.get_queue_defaults(id)
if _unbound is None:
_fmt, op = _queues.get_queue_defaults(id)
_unbound = (op,)
else:
_fmt, _ = _queues.get_queue_defaults(id)
elif _unbound is None:
_, op = _queues.get_queue_defaults(id)
_unbound = (op,)
try:
self = _known_queues[id]
except KeyError:
self = super().__new__(cls)
self._id = id
self._fmt = _fmt
self._unbound = _unbound
_known_queues[id] = self
_queues.bind(id)
return self
Expand Down Expand Up @@ -124,14 +198,15 @@ def qsize(self):

def put(self, obj, timeout=None, *,
syncobj=None,
unbound=None,
_delay=10 / 1000, # 10 milliseconds
):
"""Add the object to the queue.
This blocks while the queue is full.
If "syncobj" is None (the default) then it uses the
queue's default, set with create_queue()..
queue's default, set with create_queue().
If "syncobj" is false then all objects are supported,
at the expense of worse performance.
Expand All @@ -152,11 +227,37 @@ def put(self, obj, timeout=None, *,
actually is. That's a slightly different and stronger promise
than just (initial) equality, which is all "syncobj=False"
can promise.
"unbound" controls the behavior of Queue.get() for the given
object if the current interpreter (calling put()) is later
destroyed.
If "unbound" is None (the default) then it uses the
queue's default, set with create_queue(),
which is usually UNBOUND.
If "unbound" is UNBOUND_ERROR then get() will raise an
ItemInterpreterDestroyed exception if the original interpreter
has been destroyed. This does not otherwise affect the queue;
the next call to put() will work like normal, returning the next
item in the queue.
If "unbound" is UNBOUND_REMOVE then the item will be removed
from the queue as soon as the original interpreter is destroyed.
Be aware that this will introduce an imbalance between put()
and get() calls.
If "unbound" is UNBOUND then it is returned by get() in place
of the unbound item.
"""
if syncobj is None:
fmt = self._fmt
else:
fmt = _SHARED_ONLY if syncobj else _PICKLED
if unbound is None:
unboundop, = self._unbound
else:
unboundop, = _serialize_unbound(unbound)
if timeout is not None:
timeout = int(timeout)
if timeout < 0:
Expand All @@ -166,29 +267,37 @@ def put(self, obj, timeout=None, *,
obj = pickle.dumps(obj)
while True:
try:
_queues.put(self._id, obj, fmt)
_queues.put(self._id, obj, fmt, unboundop)
except QueueFull as exc:
if timeout is not None and time.time() >= end:
raise # re-raise
time.sleep(_delay)
else:
break

def put_nowait(self, obj, *, syncobj=None):
def put_nowait(self, obj, *, syncobj=None, unbound=None):
if syncobj is None:
fmt = self._fmt
else:
fmt = _SHARED_ONLY if syncobj else _PICKLED
if unbound is None:
unboundop, = self._unbound
else:
unboundop, = _serialize_unbound(unbound)
if fmt is _PICKLED:
obj = pickle.dumps(obj)
_queues.put(self._id, obj, fmt)
_queues.put(self._id, obj, fmt, unboundop)

def get(self, timeout=None, *,
_delay=10 / 1000, # 10 milliseconds
):
"""Return the next object from the queue.
This blocks while the queue is empty.
If the next item's original interpreter has been destroyed
then the "next object" is determined by the value of the
"unbound" argument to put().
"""
if timeout is not None:
timeout = int(timeout)
Expand All @@ -197,13 +306,16 @@ def get(self, timeout=None, *,
end = time.time() + timeout
while True:
try:
obj, fmt = _queues.get(self._id)
obj, fmt, unboundop = _queues.get(self._id)
except QueueEmpty as exc:
if timeout is not None and time.time() >= end:
raise # re-raise
time.sleep(_delay)
else:
break
if unboundop is not None:
assert obj is None, repr(obj)
return _resolve_unbound(unboundop)
if fmt == _PICKLED:
obj = pickle.loads(obj)
else:
Expand All @@ -217,9 +329,12 @@ def get_nowait(self):
is the same as get().
"""
try:
obj, fmt = _queues.get(self._id)
obj, fmt, unboundop = _queues.get(self._id)
except QueueEmpty as exc:
raise # re-raise
if unboundop is not None:
assert obj is None, repr(obj)
return _resolve_unbound(unboundop)
if fmt == _PICKLED:
obj = pickle.loads(obj)
else:
Expand Down
Loading

0 comments on commit e0d6480

Please sign in to comment.