From b6f28dc386536a977962225a444e0cdd330d7792 Mon Sep 17 00:00:00 2001 From: Roi Agai Date: Tue, 15 Oct 2024 15:47:42 +0300 Subject: [PATCH] PythonProfiler: expose `--python-pyspy-process` (#932) --- gprofiler/gprofiler_types.py | 4 ++-- gprofiler/profilers/python.py | 23 ++++++++++++++++++++--- tests/test_python.py | 6 +++--- tests/test_sanity.py | 2 +- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/gprofiler/gprofiler_types.py b/gprofiler/gprofiler_types.py index 22719699d..410074cb0 100644 --- a/gprofiler/gprofiler_types.py +++ b/gprofiler/gprofiler_types.py @@ -89,12 +89,12 @@ def nonnegative_integer(value_str: str) -> int: def integers_list(value_str: str) -> List[int]: try: - pids = [int(pid) for pid in value_str.split(",")] + values = [int(value) for value in value_str.split(",")] except ValueError: raise configargparse.ArgumentTypeError( "Integer list should be a single integer, or comma separated list of integers f.e. 13,452,2388" ) - return pids + return values def integer_range(min_range: int, max_range: int) -> Callable[[str], int]: diff --git a/gprofiler/profilers/python.py b/gprofiler/profilers/python.py index 12027cfc6..b7bfdbac9 100644 --- a/gprofiler/profilers/python.py +++ b/gprofiler/profilers/python.py @@ -43,6 +43,7 @@ ProcessToStackSampleCounters, ProfileData, StackToSampleCount, + integers_list, nonnegative_integer, ) from gprofiler.log import get_logger_adapter @@ -185,10 +186,12 @@ def __init__( profiler_state: ProfilerState, *, add_versions: bool, + python_pyspy_process: List[int], ): super().__init__(frequency, duration, profiler_state) self.add_versions = add_versions self._metadata = PythonMetadata(self._profiler_state.stop_event) + self._python_pyspy_process = python_pyspy_process def _make_command(self, pid: int, output_path: str, duration: int) -> List[str]: command = [ @@ -260,7 +263,7 @@ def _profile_process(self, process: Process, duration: int, spawned: bool) -> Pr return ProfileData(parsed, appid, app_metadata, container_name) def _select_processes_to_profile(self) -> List[Process]: - filtered_procs = [] + filtered_procs = set() if is_windows(): all_processes = [x for x in pgrep_exe("python")] else: @@ -269,13 +272,14 @@ def _select_processes_to_profile(self) -> List[Process]: for process in all_processes: try: if not self._should_skip_process(process): - filtered_procs.append(process) + filtered_procs.add(process) except NoSuchProcess: pass except Exception: logger.exception(f"Couldn't add pid {process.pid} to list") - return filtered_procs + filtered_procs.update([Process(pid) for pid in self._python_pyspy_process]) + return list(filtered_procs) def _should_profile_process(self, process: Process) -> bool: return search_proc_maps(process, DETECTED_PYTHON_PROCESSES_REGEX) is not None and not self._should_skip_process( @@ -340,6 +344,17 @@ def _should_skip_process(self, process: Process) -> bool: action="store_true", help="Enable PyPerf in verbose mode (max verbosity)", ), + ProfilerArgument( + name="--python-pyspy-process", + dest="python_pyspy_process", + action="extend", + default=[], + type=integers_list, + help="PID to profile with py-spy." + " This option forces gProfiler to profile given processes with py-spy, even if" + " they are not recognized by gProfiler as Python processes." + " Note - gProfiler assumes that the given processes are kept running as long as gProfiler runs.", + ), ], supported_profiling_modes=["cpu"], ) @@ -358,6 +373,7 @@ def __init__( python_add_versions: bool, python_pyperf_user_stacks_pages: Optional[int], python_pyperf_verbose: bool, + python_pyspy_process: List[int], ): if python_mode == "py-spy": python_mode = "pyspy" @@ -387,6 +403,7 @@ def __init__( duration, profiler_state, add_versions=python_add_versions, + python_pyspy_process=python_pyspy_process, ) else: self._pyspy_profiler = None diff --git a/tests/test_python.py b/tests/test_python.py index 92cd099c1..238ebd30b 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -50,7 +50,7 @@ def test_python_select_by_libpython( We expect to select these because they have "libpython" in their "/proc/pid/maps". This test runs a Python named "shmython". """ - with PythonProfiler(1000, 1, profiler_state, "pyspy", True, None, False) as profiler: + with PythonProfiler(1000, 1, profiler_state, "pyspy", True, None, False, python_pyspy_process=[]) as profiler: process_collapsed = snapshot_pid_collapsed(profiler, application_pid) assert_collapsed(process_collapsed) assert all(stack.startswith("shmython") for stack in process_collapsed.keys()) @@ -110,7 +110,7 @@ def test_python_matrix( if python_version in ["3.7", "3.8", "3.9", "3.10", "3.11"] and profiler_type == "py-spy" and libc == "musl": pytest.xfail("This combination fails, see https://github.com/Granulate/gprofiler/issues/714") - with PythonProfiler(1000, 2, profiler_state, profiler_type, True, None, False) as profiler: + with PythonProfiler(1000, 2, profiler_state, profiler_type, True, None, False, python_pyspy_process=[]) as profiler: profile = snapshot_pid_profile(profiler, application_pid) collapsed = profile.stacks @@ -167,7 +167,7 @@ def test_dso_name_in_pyperf_profile( "PyPerf doesn't support aarch64 architecture, see https://github.com/Granulate/gprofiler/issues/499" ) - with PythonProfiler(1000, 2, profiler_state, profiler_type, True, None, False) as profiler: + with PythonProfiler(1000, 2, profiler_state, profiler_type, True, None, False, python_pyspy_process=[]) as profiler: profile = snapshot_pid_profile(profiler, application_pid) python_version, _, _ = application_image_tag.split("-") interpreter_frame = "PyEval_EvalFrameEx" if python_version == "2.7" else "_PyEval_EvalFrameDefault" diff --git a/tests/test_sanity.py b/tests/test_sanity.py index 7df1238f0..734c8a37f 100644 --- a/tests/test_sanity.py +++ b/tests/test_sanity.py @@ -76,7 +76,7 @@ def test_pyspy( profiler_state: ProfilerState, ) -> None: _ = assert_app_id # Required for mypy unused argument warning - with PySpyProfiler(1000, 3, profiler_state, add_versions=True) as profiler: + with PySpyProfiler(1000, 3, profiler_state, add_versions=True, python_pyspy_process=[]) as profiler: # not using snapshot_one_collapsed because there are multiple Python processes running usually. process_collapsed = snapshot_pid_collapsed(profiler, application_pid) assert_collapsed(process_collapsed)