diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index fcd74bebfe8..05a1de722fc 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -18,11 +18,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -230,7 +232,8 @@ rmm::device_uvector gather_chars(StringIterator strings_begin, if (output_count == 0) return rmm::device_uvector(0, stream, mr); auto chars_data = rmm::device_uvector(chars_bytes, stream, mr); - auto d_chars = chars_data.data(); + cudf::experimental::prefetch::detail::prefetch("prefetch", chars_data); + auto d_chars = chars_data.data(); constexpr int warps_per_threadblock = 4; // String parallel strategy will be used if average string length is above this threshold. @@ -312,6 +315,8 @@ std::unique_ptr gather(strings_column_view const& strings, // build chars column auto const offsets_view = cudf::detail::offsetalator_factory::make_input_iterator(out_offsets_column->view()); + cudf::experimental::prefetch::detail::prefetch( + "prefetch", strings.chars_begin(stream), strings.chars_size(stream)); auto out_chars_data = gather_chars( d_strings->begin(), begin, end, offsets_view, total_bytes, stream, mr); diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index f5f3982a5d6..fd75e7da547 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -186,6 +187,7 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn, // Now build the chars column rmm::device_uvector chars(bytes, stream, mr); + cudf::experimental::prefetch::detail::prefetch("prefetch", chars); size_and_exec_fn.d_chars = chars.data(); // Execute the function fn again to fill in the chars data. diff --git a/cpp/include/cudf/utilities/prefetch.hpp b/cpp/include/cudf/utilities/prefetch.hpp index d95649bb267..2efdd8e3b3b 100644 --- a/cpp/include/cudf/utilities/prefetch.hpp +++ b/cpp/include/cudf/utilities/prefetch.hpp @@ -16,6 +16,8 @@ #pragma once +#include + #include #include #include @@ -82,6 +84,22 @@ class PrefetchConfig { */ void prefetch(std::string_view key, void const* ptr, std::size_t size); +/** + * @brief Prefetch the data in a device_uvector. + * + * @note At present this function does not support stream-ordered execution. Prefetching always + * occurs on the default stream. + * + * @param key The key to enable prefetching for. + * @param v The device_uvector to prefetch. + */ +template +void prefetch(std::string_view key, rmm::device_uvector const& v) +{ + if (v.is_empty()) { return; } + prefetch(key, v.data(), v.size()); +} + } // namespace detail /**