forked from rapidsai/cudf
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add stacktrace into cudf exception types (rapidsai#13298)
This implements stacktrace and adds a stacktrace string into any exception thrown by cudf. By doing so, the exception carries information about where it originated, allowing the downstream application to trace back with much less effort. Closes rapidsai#12422. ### Example: ``` #0: cudf/cpp/build/libcudf.so : std::unique_ptr<cudf::column, std::default_delete<cudf::column> > cudf::detail::sorted_order<false>(cudf::table_view, std::vector<cudf::order, std::allocator<cudf::order> > const&, std::vector<cudf::null_order, std::allocator<cudf::null_order> > const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)+0x446 #1: cudf/cpp/build/libcudf.so : cudf::detail::sorted_order(cudf::table_view const&, std::vector<cudf::order, std::allocator<cudf::order> > const&, std::vector<cudf::null_order, std::allocator<cudf::null_order> > const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)+0x113 #2: cudf/cpp/build/libcudf.so : std::unique_ptr<cudf::column, std::default_delete<cudf::column> > cudf::detail::segmented_sorted_order_common<(cudf::detail::sort_method)1>(cudf::table_view const&, cudf::column_view const&, std::vector<cudf::order, std::allocator<cudf::order> > const&, std::vector<cudf::null_order, std::allocator<cudf::null_order> > const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)+0x66e #3: cudf/cpp/build/libcudf.so : cudf::detail::segmented_sort_by_key(cudf::table_view const&, cudf::table_view const&, cudf::column_view const&, std::vector<cudf::order, std::allocator<cudf::order> > const&, std::vector<cudf::null_order, std::allocator<cudf::null_order> > const&, rmm::cuda_stream_view, rmm::mr::device_memory_resource*)+0x88 #4: cudf/cpp/build/libcudf.so : cudf::segmented_sort_by_key(cudf::table_view const&, cudf::table_view const&, cudf::column_view const&, std::vector<cudf::order, std::allocator<cudf::order> > const&, std::vector<cudf::null_order, std::allocator<cudf::null_order> > const&, rmm::mr::device_memory_resource*)+0xb9 #5: cudf/cpp/build/gtests/SORT_TEST : ()+0xe3027 rapidsai#6: cudf/cpp/build/lib/libgtest.so.1.13.0 : void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*)+0x8f rapidsai#7: cudf/cpp/build/lib/libgtest.so.1.13.0 : testing::Test::Run()+0xd6 rapidsai#8: cudf/cpp/build/lib/libgtest.so.1.13.0 : testing::TestInfo::Run()+0x195 rapidsai#9: cudf/cpp/build/lib/libgtest.so.1.13.0 : testing::TestSuite::Run()+0x109 rapidsai#10: cudf/cpp/build/lib/libgtest.so.1.13.0 : testing::internal::UnitTestImpl::RunAllTests()+0x44f rapidsai#11: cudf/cpp/build/lib/libgtest.so.1.13.0 : bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*)+0x87 rapidsai#12: cudf/cpp/build/lib/libgtest.so.1.13.0 : testing::UnitTest::Run()+0x95 rapidsai#13: cudf/cpp/build/gtests/SORT_TEST : ()+0xdb08c rapidsai#14: /lib/x86_64-linux-gnu/libc.so.6 : ()+0x29d90 rapidsai#15: /lib/x86_64-linux-gnu/libc.so.6 : __libc_start_main()+0x80 rapidsai#16: cudf/cpp/build/gtests/SORT_TEST : ()+0xdf3d5 ``` ### Usage In order to retrieve a stacktrace with fully human-readable symbols, some compiling options must be adjusted. To make such adjustment convenient and effortless, a new cmake option (`CUDF_BUILD_STACKTRACE_DEBUG`) has been added. Just set this option to `ON` before building cudf and it will be ready to use. For downstream applications, whenever a cudf-type exception is thrown, it can retrieve the stored stacktrace and do whatever it wants with it. For example: ``` try { // cudf API calls } catch (cudf::logic_error const& e) { std::cout << e.what() << std::endl; std::cout << e.stacktrace() << std::endl; throw e; } // similar with catching other exception types ``` ### Follow-up work The next step would be patching `rmm` to attach stacktrace into `rmm::` exceptions. Doing so will allow debugging various memory exceptions thrown from libcudf using their stacktrace. ### Note: * This feature doesn't require libcudf to be built in Debug mode. * The flag `CUDF_BUILD_STACKTRACE_DEBUG` should not be turned on in production as it may affect code optimization. Instead, libcudf compiled with that flag turned on should be used only when needed, when debugging cudf throwing exceptions. * This flag removes the current optimization flag from compiling (such as `-O2` or `-O3`, if in Release mode) and replaces by `-Og` (optimize for debugging). * If this option is not set to `ON`, the stacktrace will not be available. This is to avoid expensive stracktrace retrieval if the throwing exception is expected. Authors: - Nghia Truong (https://github.com/ttnghia) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Robert Maynard (https://github.com/robertmaynard) - Vyas Ramasubramani (https://github.com/vyasr) - Jason Lowe (https://github.com/jlowe) URL: rapidsai#13298
- Loading branch information
Showing
13 changed files
with
350 additions
and
123 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <string> | ||
|
||
namespace cudf::detail { | ||
/** | ||
* @addtogroup utility_stacktrace | ||
* @{ | ||
* @file | ||
*/ | ||
|
||
/** | ||
* @brief Specify whether the last stackframe is included in the stacktrace. | ||
*/ | ||
enum class capture_last_stackframe : bool { YES, NO }; | ||
|
||
/** | ||
* @brief Query the current stacktrace and return the whole stacktrace as one string. | ||
* | ||
* Depending on the value of the flag `capture_last_frame`, the caller that executes stacktrace | ||
* retrieval can be included in the output result. | ||
* | ||
* @param capture_last_frame Flag to specify if the current stackframe will be included into | ||
* the output | ||
* @return A string storing the whole current stacktrace | ||
*/ | ||
std::string get_stacktrace(capture_last_stackframe capture_last_frame); | ||
|
||
/** @} */ // end of group | ||
|
||
} // namespace cudf::detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include <cudf/detail/utilities/stacktrace.hpp> | ||
|
||
#if defined(__GNUC__) && defined(CUDF_BUILD_STACKTRACE_DEBUG) | ||
#include <cxxabi.h> | ||
#include <execinfo.h> | ||
|
||
#include <cstdlib> | ||
#include <cstring> | ||
#include <sstream> | ||
#endif // defined(__GNUC__) && defined(CUDF_BUILD_STACKTRACE_DEBUG) | ||
|
||
namespace cudf::detail { | ||
|
||
std::string get_stacktrace(capture_last_stackframe capture_last_frame) | ||
{ | ||
#if defined(__GNUC__) && defined(CUDF_BUILD_STACKTRACE_DEBUG) | ||
constexpr int max_stack_depth = 64; | ||
void* stack[max_stack_depth]; | ||
|
||
auto const depth = backtrace(stack, max_stack_depth); | ||
auto const modules = backtrace_symbols(stack, depth); | ||
|
||
if (modules == nullptr) { return "No stacktrace could be captured!"; } | ||
|
||
std::stringstream ss; | ||
|
||
// Skip one more depth to avoid including the stackframe of this function. | ||
auto const skip_depth = 1 + (capture_last_frame == capture_last_stackframe::YES ? 0 : 1); | ||
for (auto i = skip_depth; i < depth; ++i) { | ||
// Each modules[i] string contains a mangled name in the format like following: | ||
// `module_name(function_name+0x012) [0x01234567890a]` | ||
// We need to extract function name and function offset. | ||
char* begin_func_name = std::strstr(modules[i], "("); | ||
char* begin_func_offset = std::strstr(modules[i], "+"); | ||
char* end_func_offset = std::strstr(modules[i], ")"); | ||
|
||
auto const frame_idx = i - skip_depth; | ||
if (begin_func_name && begin_func_offset && end_func_offset && | ||
begin_func_name < begin_func_offset) { | ||
// Split `modules[i]` into separate null-terminated strings. | ||
// After this, mangled function name will then be [begin_func_name, begin_func_offset), and | ||
// function offset is in [begin_func_offset, end_func_offset). | ||
*(begin_func_name++) = '\0'; | ||
*(begin_func_offset++) = '\0'; | ||
*end_func_offset = '\0'; | ||
|
||
// We need to demangle function name. | ||
int status{0}; | ||
char* func_name = abi::__cxa_demangle(begin_func_name, nullptr, nullptr, &status); | ||
|
||
ss << "#" << frame_idx << ": " << modules[i] << " : " | ||
<< (status == 0 /*demangle success*/ ? func_name : begin_func_name) << "+" | ||
<< begin_func_offset << "\n"; | ||
free(func_name); | ||
} else { | ||
ss << "#" << frame_idx << ": " << modules[i] << "\n"; | ||
} | ||
} | ||
|
||
free(modules); | ||
|
||
return ss.str(); | ||
#else | ||
#ifdef CUDF_BUILD_STACKTRACE_DEBUG | ||
return "Stacktrace is only supported when built with a GNU compiler."; | ||
#else | ||
return "libcudf was not built with stacktrace support."; | ||
#endif // CUDF_BUILD_STACKTRACE_DEBUG | ||
#endif // __GNUC__ | ||
} | ||
|
||
} // namespace cudf::detail |
Oops, something went wrong.