From a50f0b8f9d2dbece92a7ae1d3c2844b439785b09 Mon Sep 17 00:00:00 2001 From: Ganden Schaffner Date: Tue, 19 Nov 2024 00:00:00 -0800 Subject: [PATCH 1/2] Partially revert "gh-87744: fix waitpid race while calling send_signal in asyncio (#121126)" This reverts the non-test changes of commit bd473aa598c5161521a7018896dc124728214a6c. --- Lib/asyncio/base_subprocess.py | 35 +++++++++------------------------- 1 file changed, 9 insertions(+), 26 deletions(-) diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index 9c2ba679ce2bf1..6dbde2b696ad1f 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -1,9 +1,6 @@ import collections import subprocess import warnings -import os -import signal -import sys from . import protocols from . import transports @@ -145,31 +142,17 @@ def _check_proc(self): if self._proc is None: raise ProcessLookupError() - if sys.platform == 'win32': - def send_signal(self, signal): - self._check_proc() - self._proc.send_signal(signal) - - def terminate(self): - self._check_proc() - self._proc.terminate() - - def kill(self): - self._check_proc() - self._proc.kill() - else: - def send_signal(self, signal): - self._check_proc() - try: - os.kill(self._proc.pid, signal) - except ProcessLookupError: - pass + def send_signal(self, signal): + self._check_proc() + self._proc.send_signal(signal) - def terminate(self): - self.send_signal(signal.SIGTERM) + def terminate(self): + self._check_proc() + self._proc.terminate() - def kill(self): - self.send_signal(signal.SIGKILL) + def kill(self): + self._check_proc() + self._proc.kill() async def _connect_pipes(self, waiter): try: From 65cce5cdc7a0e2d2b3b4aeba5afe8fce37660645 Mon Sep 17 00:00:00 2001 From: Ganden Schaffner Date: Tue, 19 Nov 2024 00:00:00 -0800 Subject: [PATCH 2/2] Fix child process watchers racing to free PIDs managed by Popen Note that we call Popen.poll/wait in the event loop thread to avoid Popen's thread-unsafety. Without this workaround, when testing _ThreadedChildWatcher with the reproducer shared in the linked issue on my machine: * Case 1 of the known race condition ignored in GH-20010 is met (and an unsafe kill call is issued) about 1 in 10 times. * The race condition GH-127050 is met (causing _process_exited's assert returncode is not None to raise) about 1 in 30 times. --- Lib/asyncio/base_subprocess.py | 4 - Lib/asyncio/unix_events.py | 81 ++++++++++--------- Lib/subprocess.py | 3 + Misc/ACKS | 1 + ...-11-19-21-58-18.gh-issue-127049.dk76iD.rst | 4 + 5 files changed, 50 insertions(+), 43 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-19-21-58-18.gh-issue-127049.dk76iD.rst diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index 6dbde2b696ad1f..a072a88a757918 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -211,10 +211,6 @@ def _process_exited(self, returncode): if self._loop.get_debug(): logger.info('%r exited with return code %r', self, returncode) self._returncode = returncode - if self._proc.returncode is None: - # asyncio uses a child watcher: copy the status into the Popen - # object. On Python 3.6, it is required to avoid a ResourceWarning. - self._proc.returncode = returncode self._call(self._protocol.process_exited) self._try_finish() diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py index 0227eb506c6016..f0db0edc99fe45 100644 --- a/Lib/asyncio/unix_events.py +++ b/Lib/asyncio/unix_events.py @@ -205,7 +205,7 @@ async def _make_subprocess_transport(self, protocol, args, shell, waiter=waiter, extra=extra, **kwargs) watcher.add_child_handler(transp.get_pid(), - self._child_watcher_callback, transp) + self._child_watcher_callback_threadsafe, transp._proc, transp) try: await waiter except (SystemExit, KeyboardInterrupt): @@ -217,8 +217,34 @@ async def _make_subprocess_transport(self, protocol, args, shell, return transp - def _child_watcher_callback(self, pid, returncode, transp): - self.call_soon_threadsafe(transp._process_exited, returncode) + def _child_watcher_callback_threadsafe(self, pid, proc, transp): + if self.is_closed(): + logger.warning("Loop %r that handles pid %r is closed", loop, pid) + # Bump the Popen to reap the PID so it doesn't emit a + # ResourceWarning later. + proc.poll() + else: + self.call_soon_threadsafe(self._child_watcher_callback, pid, proc, transp) + + def _child_watcher_callback(self, pid, proc, transp): + # N.B.: This poll call must happen in the event loop thread to avoid + # both case 1 of the known race condition ignored in GH-20010 and the + # race condition GH-127050. + returncode = proc.poll() + if proc._returncode_is_a_lie: + # A buggy reap call (i.e. one external to Popen) stole the return code from + # Popen. Popen will silently fail and report return code zero. + returncode = 255 + logger.warning( + "child process pid %d exit status already read: " + " will report returncode 255", + pid) + else: + if self.get_debug(): + logger.debug('process %s exited with returncode %s', + pid, returncode) + + transp._process_exited(returncode) async def create_unix_connection( self, protocol_factory, path=None, *, @@ -870,26 +896,13 @@ class _PidfdChildWatcher: def add_child_handler(self, pid, callback, *args): loop = events.get_running_loop() pidfd = os.pidfd_open(pid) - loop._add_reader(pidfd, self._do_wait, pid, pidfd, callback, args) + loop._add_reader(pidfd, self._reader_callback, pid, pidfd, callback, args) - def _do_wait(self, pid, pidfd, callback, args): + def _reader_callback(self, pid, pidfd, callback, args): loop = events.get_running_loop() loop._remove_reader(pidfd) - try: - _, status = os.waitpid(pid, 0) - except ChildProcessError: - # The child process is already reaped - # (may happen if waitpid() is called elsewhere). - returncode = 255 - logger.warning( - "child process pid %d exit status already read: " - " will report returncode 255", - pid) - else: - returncode = waitstatus_to_exitcode(status) - os.close(pidfd) - callback(pid, returncode, *args) + callback(pid, *args) class _ThreadedChildWatcher: """Threaded child watcher implementation. @@ -918,38 +931,28 @@ def __del__(self, _warn=warnings.warn): def add_child_handler(self, pid, callback, *args): loop = events.get_running_loop() - thread = threading.Thread(target=self._do_waitpid, - name=f"asyncio-waitpid-{next(self._pid_counter)}", + thread = threading.Thread(target=self._do_waitid_WNOWAIT, + name=f"asyncio-waitid+WNOWAIT-{next(self._pid_counter)}", args=(loop, pid, callback, args), daemon=True) self._threads[pid] = thread thread.start() - def _do_waitpid(self, loop, expected_pid, callback, args): + def _do_waitid_WNOWAIT(self, loop, expected_pid, callback, args): assert expected_pid > 0 try: - pid, status = os.waitpid(expected_pid, 0) + # Do not reap---Popen is responsible for reaping and we mustn't have + # a second reap call racing with it. + os.waitid(os.P_PID, expected_pid, os.WEXITED | os.WNOWAIT) except ChildProcessError: # The child process is already reaped - # (may happen if waitpid() is called elsewhere). - pid = expected_pid - returncode = 255 - logger.warning( - "Unknown child process pid %d, will report returncode 255", - pid) - else: - returncode = waitstatus_to_exitcode(status) - if loop.get_debug(): - logger.debug('process %s exited with returncode %s', - expected_pid, returncode) - - if loop.is_closed(): - logger.warning("Loop %r that handles pid %r is closed", loop, pid) - else: - loop.call_soon_threadsafe(callback, pid, returncode, *args) + # (may happen if waitpid() is called elsewhere, e.g. by + # Process.send_signal() in the event loop thread). + pass self._threads.pop(expected_pid) + callback(expected_pid, *args) def can_use_pidfd(): if not hasattr(os, 'pidfd_open'): diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 88f0230b05fbc7..61d6adecac9cea 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -870,6 +870,7 @@ def __init__(self, args, bufsize=-1, executable=None, self.stderr = None self.pid = None self.returncode = None + self._returncode_is_a_lie = False self.encoding = encoding self.errors = errors self.pipesize = pipesize @@ -2005,6 +2006,7 @@ def _internal_poll(self, _deadstate=None, _del_safe=_del_safe): # disabled for our process. This child is dead, we # can't get the status. # http://bugs.python.org/issue15756 + self._returncode_is_a_lie = True self.returncode = 0 finally: self._waitpid_lock.release() @@ -2019,6 +2021,7 @@ def _try_wait(self, wait_flags): # This happens if SIGCLD is set to be ignored or waiting # for child processes has otherwise been disabled for our # process. This child is dead, we can't get the status. + self._returncode_is_a_lie = True pid = self.pid sts = 0 return (pid, sts) diff --git a/Misc/ACKS b/Misc/ACKS index 08cd293eac3835..e48ea5c5a1259e 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1642,6 +1642,7 @@ Ben Sayer Luca Sbardella Marco Scataglini Andrew Schaaf +Ganden Schaffner Michael Scharf Andreas Schawo Neil Schemenauer diff --git a/Misc/NEWS.d/next/Library/2024-11-19-21-58-18.gh-issue-127049.dk76iD.rst b/Misc/NEWS.d/next/Library/2024-11-19-21-58-18.gh-issue-127049.dk76iD.rst new file mode 100644 index 00000000000000..ed7eb815f50984 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-19-21-58-18.gh-issue-127049.dk76iD.rst @@ -0,0 +1,4 @@ +Fix race condition where :meth:`asyncio.subprocess.Process.send_signal`, +:meth:`asyncio.subprocess.Process.terminate`, and +:meth:`asyncio.subprocess.Process.kill` could signal an already-freed PID on +non-Windows platforms. Patch by Ganden Schaffner.