From 0d43a1aed183da05f5d7dcf66157ca8351fe5b38 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Sun, 17 Nov 2024 15:57:56 +0800 Subject: [PATCH] [fix](brpc) coredump caused by brpc checking (#44047) ### What problem does this PR solve? ``` /root/doris/be/src/runtime/fragment_mgr.cpp:1064:20: runtime error: member call on null pointer of type 'doris::PBackendService_Stub' #0 0x55bd899c9aaa in doris::FragmentMgr::_check_brpc_available(std::shared_ptr const&, doris::FragmentMgr::BrpcItem const&) /root/doris/be/src/runtime/fragment_mgr.cpp:1064:20 #1 0x55bd899c521f in doris::FragmentMgr::cancel_worker() /root/doris/be/src/runtime/fragment_mgr.cpp:1021:13 #2 0x55bd8a4c97ae in std::function::operator()() const /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9 #3 0x55bd8a4c97ae in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5 #4 0x7f7601092608 in start_thread /build/glibc-SzIz7B/glibc-2.31/nptl/pthread_create.c:477:8 #5 0x7f760133f132 in __clone /build/glibc-SzIz7B/glibc-2.31/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:95 SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /root/doris/be/src/runtime/fragment_mgr.cpp:1064:20 in *** Query id: 0-0 *** *** is nereids: 0 *** *** tablet id: 0 *** *** Aborted at 1731663847 (unix time) try "date -d @1731663847" if you are using GNU date *** *** Current BE git commitID: b663df0e50 *** *** SIGSEGV address not mapped to object (@0x0) received by PID 17169 (TID 17463 OR 0x7f746d21a700) from PID 0; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /root/doris/be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo_t*, void*) [clone .part.0] in /usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so 2# JVM_handle_linux_signal in /usr/lib/jvm/java-17-openjdk-amd64/lib/server/libjvm.so 3# 0x00007F7601263090 in /lib/x86_64-linux-gnu/libc.so.6 4# doris::FragmentMgr::_check_brpc_available(std::shared_ptr const&, doris::FragmentMgr::BrpcItem const&) in /mnt/ssd01/pipline/OpenSourceDoris/clusterEnv/P0/Cluster0/be/lib/doris_be 5# doris::FragmentMgr::cancel_worker() at /root/doris/be/src/runtime/fragment_mgr.cpp:1022 6# doris::Thread::supervise_thread(void*) at /root/doris/be/src/util/thread.cpp:499 7# start_thread at /build/glibc-SzIz7B/glibc-2.31/nptl/pthread_create.c:478 8# __clone at ../sysdeps/unix/sysv/linux/x86_64/clone.S:97 ``` --- be/src/runtime/fragment_mgr.cpp | 10 ++++++++-- be/src/vec/sink/vdata_stream_sender.cpp | 12 +++++------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index 95e5f8e2ce14f3..2896190f606f57 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -1017,8 +1017,14 @@ void FragmentMgr::cancel_worker() { } } - for (auto it : brpc_stub_with_queries) { - _check_brpc_available(it.first, it.second); + if (config::enable_brpc_connection_check) { + for (auto it : brpc_stub_with_queries) { + if (!it.first) { + LOG(WARNING) << "brpc stub is nullptr, skip it."; + continue; + } + _check_brpc_available(it.first, it.second); + } } if (!queries_lost_coordinator.empty()) { diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp index 5fb1bd327ae227..66aacc59f6c02a 100644 --- a/be/src/vec/sink/vdata_stream_sender.cpp +++ b/be/src/vec/sink/vdata_stream_sender.cpp @@ -70,19 +70,13 @@ Status Channel::init(RuntimeState* state) { return Status::OK(); } + auto network_address = _brpc_dest_addr; if (_brpc_dest_addr.hostname == BackendOptions::get_localhost()) { _brpc_stub = state->exec_env()->brpc_internal_client_cache()->get_client( "127.0.0.1", _brpc_dest_addr.port); - auto network_address = _brpc_dest_addr; network_address.hostname = "127.0.0.1"; - if (config::enable_brpc_connection_check) { - state->get_query_ctx()->add_using_brpc_stub(network_address, _brpc_stub); - } } else { _brpc_stub = state->exec_env()->brpc_internal_client_cache()->get_client(_brpc_dest_addr); - if (config::enable_brpc_connection_check) { - state->get_query_ctx()->add_using_brpc_stub(_brpc_dest_addr, _brpc_stub); - } } if (!_brpc_stub) { @@ -91,6 +85,10 @@ Status Channel::init(RuntimeState* state) { LOG(WARNING) << msg; return Status::InternalError(msg); } + + if (config::enable_brpc_connection_check) { + state->get_query_ctx()->add_using_brpc_stub(_brpc_dest_addr, _brpc_stub); + } return Status::OK(); }