From 0ed4c9ba52fec3a95e48b4adb926b71574415f0b Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Wed, 15 Nov 2023 14:32:25 +0000 Subject: [PATCH] Disable profiler on Ruby 3.3 when running with RUBY_MN_THREADS=1 **What does this PR do?** This PR detects when Ruby (>= 3.3) is running with `RUBY_MN_THREADS=1` and disables the profiler. It also includes fixes to make sure to not crash the Ruby VM when `RUBY_MN_THREADS` is in use. Without this PR, the profiler was crashing when this setting is enabled. **Motivation:** Ruby 3.3 introduces the M:N thread scheduler (see ). Think Java's Virtual Threads, but for Ruby. In short, this means that Ruby threads no longer have a 1:1 mapping to actual kernel threads. By default, this new mode of working applies only to the non-main Ractor, which means it doesn't impact the profiler since we don't yet support profiling non-main ractors (they are detected and skipped). But, there's a setting to enable this M:N mode to the main ractor (`RUBY_MN_THREADS=1`). This wrecks havoc on a number of assumptions on the profiler. While this PR adds error handling to make sure not to crash when M:N threads are in use, we need deeper changes to e.g. properly track cpu-time use on such a Ruby, or to not assume that a Ruby thread is always hosted by the same native thread. Thus, beyond the error handling, and since `RUBY_MN_THREADS=1` is still not the default, I've chosen to not even allow the profiler to start in such a setup. We can remove this restriction once we review the assumptions I've discussed above. **Additional Notes:** To compile on Ruby 3.3, we use a copy of the Ruby VM internal headers shipped within the `debase-ruby_core_source` gem. The current version of the gem (3.2.2) does not yet have the Ruby 3.3.0-preview3 headers, and thus I'm opening up this PR as a draft until an update for that gem is available. (I was able to develop this change by having my own local fork of `debase-ruby_core_source` with the correct updated headers). **How to test the change?** Before, I was able to crash Ruby with as little as: ```bash RUBY_MN_THREADS=1 DD_TRACE_DEBUG=true DD_PROFILING_ENABLED=true bundle exec ddtracerb exec ruby -e "sleep 1" ``` Now, the profiler will refuse to run with ``` W, [2023-11-15T15:05:58.811554 #602738] WARN -- ddtrace: [ddtrace] (lib/datadog/core/configuration/components.rb:113:in `startup!') Profiling was requested but is not supported, profiling disabled: There was an error loading the profiling native extension due to 'RuntimeError Ruby VM is running with RUBY_MN_THREADS=1. This is not yet supported' at 'lib/datadog/profiling.rb:111:in `native_working?'' ``` --- .../clock_id_from_pthread.c | 3 ++ .../extconf.rb | 3 ++ .../private_vm_api_access.c | 40 +++++++++++++------ .../private_vm_api_access.h | 3 ++ .../profiling.c | 1 + 5 files changed, 38 insertions(+), 12 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c b/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c index 1bf21e16e3f..2cff358ad69 100644 --- a/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c +++ b/ext/ddtrace_profiling_native_extension/clock_id_from_pthread.c @@ -25,6 +25,9 @@ void self_test_clock_id(void) { // Safety: This function is assumed never to raise exceptions by callers thread_cpu_time_id thread_cpu_time_id_for(VALUE thread) { rb_nativethread_id_t thread_id = pthread_id_for(thread); + + if (thread_id == 0) return (thread_cpu_time_id) {.valid = false}; + clockid_t clock_id; int error = pthread_getcpuclockid(thread_id, &clock_id); diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb index bbd049c1e1e..4d5df803c79 100644 --- a/ext/ddtrace_profiling_native_extension/extconf.rb +++ b/ext/ddtrace_profiling_native_extension/extconf.rb @@ -134,6 +134,9 @@ def add_compiler_flag(flag) $defs << '-DHAVE_PTHREAD_GETCPUCLOCKID' end +# On older Rubies, M:N threads were not available +$defs << '-DNO_MN_THREADS_AVAILABLE' if RUBY_VERSION < '3.3' + # On older Rubies, we did not need to include the ractor header (this was built into the MJIT header) $defs << '-DNO_RACTOR_HEADER_INCLUDE' if RUBY_VERSION < '3.3' diff --git a/ext/ddtrace_profiling_native_extension/private_vm_api_access.c b/ext/ddtrace_profiling_native_extension/private_vm_api_access.c index fb5d169db0f..99a65e44a23 100644 --- a/ext/ddtrace_profiling_native_extension/private_vm_api_access.c +++ b/ext/ddtrace_profiling_native_extension/private_vm_api_access.c @@ -58,9 +58,12 @@ static inline rb_thread_t *thread_struct_from_object(VALUE thread) { } rb_nativethread_id_t pthread_id_for(VALUE thread) { - // struct rb_native_thread was introduced in Ruby 3.2 (preview2): https://github.com/ruby/ruby/pull/5836 + // struct rb_native_thread was introduced in Ruby 3.2: https://github.com/ruby/ruby/pull/5836 #ifndef NO_RB_NATIVE_THREAD - return thread_struct_from_object(thread)->nt->thread_id; + struct rb_native_thread* native_thread = thread_struct_from_object(thread)->nt; + // This can be NULL on Ruby 3.3 with MN threads (RUBY_MN_THREADS=1) + if (native_thread == NULL) return 0; + return native_thread->thread_id; #else return thread_struct_from_object(thread)->thread_id; #endif @@ -113,15 +116,16 @@ bool is_current_thread_holding_the_gvl(void) { if (current_owner == NULL) return (current_gvl_owner) {.valid = false}; - return (current_gvl_owner) { - .valid = true, - .owner = - #ifndef NO_RB_NATIVE_THREAD - current_owner->nt->thread_id - #else - current_owner->thread_id - #endif - }; + #ifndef NO_RB_NATIVE_THREAD + struct rb_native_thread* current_owner_native_thread = current_owner->nt; + + // This can be NULL on Ruby 3.3 with MN threads (RUBY_MN_THREADS=1) + if (current_owner_native_thread == NULL) return (current_gvl_owner) {.valid = false}; + + return (current_gvl_owner) {.valid = true, .owner = current_owner_native_thread->thread_id}; + #else + return (current_gvl_owner) {.valid = true, .owner = current_owner->thread_id}; + #endif } #else current_gvl_owner gvl_owner(void) { @@ -182,7 +186,9 @@ uint64_t native_thread_id_for(VALUE thread) { // The tid is only available on Ruby >= 3.1 + Linux (and FreeBSD). It's the same as `gettid()` aka the task id as seen in /proc #if !defined(NO_THREAD_TID) && defined(RB_THREAD_T_HAS_NATIVE_ID) #ifndef NO_RB_NATIVE_THREAD - return thread_struct_from_object(thread)->nt->tid; + struct rb_native_thread* native_thread = thread_struct_from_object(thread)->nt; + if (native_thread == NULL) rb_raise(rb_eRuntimeError, "BUG: rb_native_thread* is null. Is this Ruby running with RUBY_MN_THREADS=1?"); + return native_thread->tid; #else return thread_struct_from_object(thread)->tid; #endif @@ -811,3 +817,13 @@ VALUE invoke_location_for(VALUE thread, int *line_location) { *line_location = NUM2INT(rb_iseq_first_lineno(iseq)); return rb_iseq_path(iseq); } + +void self_test_mn_enabled(void) { + #ifdef NO_MN_THREADS_AVAILABLE + return; + #else + if (ddtrace_get_ractor()->threads.sched.enable_mn_threads == true) { + rb_raise(rb_eRuntimeError, "Ruby VM is running with RUBY_MN_THREADS=1. This is not yet supported"); + } + #endif +} diff --git a/ext/ddtrace_profiling_native_extension/private_vm_api_access.h b/ext/ddtrace_profiling_native_extension/private_vm_api_access.h index 608a25f7198..a98e9e8d528 100644 --- a/ext/ddtrace_profiling_native_extension/private_vm_api_access.h +++ b/ext/ddtrace_profiling_native_extension/private_vm_api_access.h @@ -49,3 +49,6 @@ bool ddtrace_rb_ractor_main_p(void); // This is what Ruby shows in `Thread#to_s`. // The file is returned directly, and the line is recorded onto *line_location. VALUE invoke_location_for(VALUE thread, int *line_location); + +// Check if RUBY_MN_THREADS is enabled (aka main Ractor is not doing 1:1 threads) +void self_test_mn_enabled(void); diff --git a/ext/ddtrace_profiling_native_extension/profiling.c b/ext/ddtrace_profiling_native_extension/profiling.c index 1e7d754d56b..9dc9952b4a7 100644 --- a/ext/ddtrace_profiling_native_extension/profiling.c +++ b/ext/ddtrace_profiling_native_extension/profiling.c @@ -68,6 +68,7 @@ void DDTRACE_EXPORT Init_ddtrace_profiling_native_extension(void) { static VALUE native_working_p(DDTRACE_UNUSED VALUE _self) { self_test_clock_id(); + self_test_mn_enabled(); return Qtrue; }