From 16ff99899fa665edf640362ba2115d23540104ef Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 25 Jun 2024 14:25:58 -0400 Subject: [PATCH 1/2] Add libcudf public/detail API pattern to developer guide --- .../developer_guide/DEVELOPER_GUIDE.md | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index ff80c2daab8..e497018cc12 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -1,4 +1,4 @@ -# libcudf C++ Developer Guide +# libcudf C++ Developer Guide {#DEVELOPER_GUIDE} This document serves as a guide for contributors to libcudf C++ code. Developers should also refer to these additional files for further documentation of libcudf best practices. @@ -469,7 +469,7 @@ libcudf throws under different circumstances, see the [section on error handling # libcudf API and Implementation -## Streams +## Streams {#streams} libcudf is in the process of adding support for asynchronous execution using CUDA streams. In order to facilitate the usage of streams, all new libcudf APIs @@ -703,28 +703,28 @@ The preferred style for how inputs are passed in and outputs are returned is the - `column_view const&` - Tables: - `table_view const&` - - Scalar: - - `scalar const&` - - Everything else: - - Trivial or inexpensively copied types - - Pass by value - - Non-trivial or expensive to copy types - - Pass by `const&` + - Scalar: + - `scalar const&` + - Everything else: + - Trivial or inexpensively copied types + - Pass by value + - Non-trivial or expensive to copy types + - Pass by `const&` - In/Outs - Columns: - `mutable_column_view&` - Tables: - `mutable_table_view&` - - Everything else: - - Pass by via raw pointer + - Everything else: + - Pass by via raw pointer - Outputs - Outputs should be *returned*, i.e., no output parameters - Columns: - `std::unique_ptr` - Tables: - `std::unique_ptr` - - Scalars: - - `std::unique_ptr` + - Scalars: + - `std::unique_ptr` ### Multiple Return Values @@ -908,6 +908,10 @@ functions that are specific to columns of Strings. These functions reside in the namespace. Similarly, functionality used exclusively for unit testing is in the `cudf::test::` namespace. +The public function is expected to contain a call to `CUDF_FUNC_RANGE()` followed by a call to +a `detail` function with same name and parameters as the public function. +See the [Streams](#streams) section for an example of this pattern. + ### Internal Many functions are not meant for public use, so place them in either the `detail` or an *anonymous* From d1ca56f7b07f353c103b58cc7f56783c4759c22c Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 26 Jun 2024 13:57:21 -0400 Subject: [PATCH 2/2] fix streams example --- .../developer_guide/DEVELOPER_GUIDE.md | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index e497018cc12..0d097541692 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -486,33 +486,37 @@ use only asynchronous versions of CUDA APIs with the stream parameter. In order to make the `detail` API callable from other libcudf functions, it should be exposed in a header placed in the `cudf/cpp/include/detail/` directory. +The declaration is not necessary if no other libcudf functions call the `detail` function. For example: ```c++ // cpp/include/cudf/header.hpp -void external_function(...); +void external_function(..., + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); // cpp/include/cudf/detail/header.hpp namespace detail{ -void external_function(..., rmm::cuda_stream_view stream) +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) } // namespace detail // cudf/src/implementation.cpp namespace detail{ - // Use the stream parameter in the detail implementation. - void external_function(..., rmm::cuda_stream_view stream){ - // Implementation uses the stream with async APIs. - rmm::device_buffer buff(...,stream); - CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value())); - kernel<<<..., stream>>>(...); - thrust::algorithm(rmm::exec_policy(stream), ...); - } +// Use the stream parameter in the detail implementation. +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr){ + // Implementation uses the stream with async APIs. + rmm::device_buffer buff(..., stream, mr); + CUDF_CUDA_TRY(cudaMemcpyAsync(...,stream.value())); + kernel<<<..., stream>>>(...); + thrust::algorithm(rmm::exec_policy(stream), ...); +} } // namespace detail -void external_function(...){ - CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function. - detail::external_function(..., cudf::get_default_stream()); +void external_function(..., rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function. + detail::external_function(..., stream, mr); } ```