From c1e26a63d33563190f452047e548f24fb47a63bf Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Thu, 29 Feb 2024 17:15:17 -0500 Subject: [PATCH] Fix cudf::test::to_host to handle both offset types for strings columns (#15073) The `cudf::test::to_host` function is updated to handle int32 and int64 offset types for strings columns when copying data to host memory. This function is used with `cudf::test::print()` as well. Also moved the function from the header `column_utilities.hpp` to the `column_utilities.cu` file. And moved the specialization for of `to_host` for fixed-point types from the header to `.cu` as well. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/15073 --- cpp/include/cudf_test/column_utilities.hpp | 43 +------------ cpp/tests/utilities/column_utilities.cu | 75 ++++++++++++++++++++++ 2 files changed, 78 insertions(+), 40 deletions(-) diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp index 49d5098f823..cbfd7a5e45c 100644 --- a/cpp/include/cudf_test/column_utilities.hpp +++ b/cpp/include/cudf_test/column_utilities.hpp @@ -194,23 +194,7 @@ std::pair, std::vector> to_host(column_view * `column_view`'s data, and second is the column's bitmask. */ template ()>* = nullptr> -std::pair, std::vector> to_host(column_view c) -{ - using namespace numeric; - using Rep = typename T::rep; - - auto host_rep_types = thrust::host_vector(c.size()); - - CUDF_CUDA_TRY( - cudaMemcpy(host_rep_types.data(), c.begin(), c.size() * sizeof(Rep), cudaMemcpyDefault)); - - auto to_fp = [&](Rep val) { return T{scaled_integer{val, scale_type{c.type().scale()}}}; }; - auto begin = thrust::make_transform_iterator(std::cbegin(host_rep_types), to_fp); - auto const host_fixed_points = thrust::host_vector(begin, begin + c.size()); - - return {host_fixed_points, bitmask_to_host(c)}; -} -//! @endcond +std::pair, std::vector> to_host(column_view c); /** * @brief Copies the data and bitmask of a `column_view` of strings @@ -223,29 +207,8 @@ std::pair, std::vector> to_host(column_view * and second is the column's bitmask. */ template <> -inline std::pair, std::vector> to_host(column_view c) -{ - thrust::host_vector host_data(c.size()); - auto stream = cudf::get_default_stream(); - if (c.size() > c.null_count()) { - auto const scv = strings_column_view(c); - auto const h_chars = cudf::detail::make_std_vector_sync( - cudf::device_span(scv.chars_begin(stream), scv.chars_size(stream)), stream); - auto const h_offsets = cudf::detail::make_std_vector_sync( - cudf::device_span(scv.offsets().data() + scv.offset(), - scv.size() + 1), - stream); - - // build std::string vector from chars and offsets - std::transform( - std::begin(h_offsets), - std::end(h_offsets) - 1, - std::begin(h_offsets) + 1, - host_data.begin(), - [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); }); - } - return {std::move(host_data), bitmask_to_host(c)}; -} +std::pair, std::vector> to_host(column_view c); +//! @endcond } // namespace cudf::test diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 018c6aeec2c..a556a8702bd 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -940,5 +940,80 @@ bool validate_host_masks(std::vector const& expected_mask, }); } +template ()>*> +std::pair, std::vector> to_host(column_view c) +{ + using namespace numeric; + using Rep = typename T::rep; + + auto host_rep_types = thrust::host_vector(c.size()); + + CUDF_CUDA_TRY( + cudaMemcpy(host_rep_types.data(), c.begin(), c.size() * sizeof(Rep), cudaMemcpyDefault)); + + auto to_fp = [&](Rep val) { return T{scaled_integer{val, scale_type{c.type().scale()}}}; }; + auto begin = thrust::make_transform_iterator(std::cbegin(host_rep_types), to_fp); + auto const host_fixed_points = thrust::host_vector(begin, begin + c.size()); + + return {host_fixed_points, bitmask_to_host(c)}; +} + +template std::pair, std::vector> to_host( + column_view c); +template std::pair, std::vector> to_host( + column_view c); +template std::pair, std::vector> to_host( + column_view c); + +namespace { +struct strings_to_host_fn { + template || + std::is_same_v>* = nullptr> + void operator()(thrust::host_vector& host_data, + char const* chars, + cudf::column_view const& offsets, + rmm::cuda_stream_view stream) + { + auto const h_offsets = cudf::detail::make_std_vector_sync( + cudf::device_span(offsets.data(), offsets.size()), stream); + // build std::string vector from chars and offsets + std::transform(std::begin(h_offsets), + std::end(h_offsets) - 1, + std::begin(h_offsets) + 1, + host_data.begin(), + [&](auto start, auto end) { return std::string(chars + start, end - start); }); + } + + template && + !std::is_same_v>* = nullptr> + void operator()(thrust::host_vector&, + char const*, + cudf::column_view const&, + rmm::cuda_stream_view) + { + CUDF_FAIL("invalid offsets type"); + } +}; +} // namespace + +template <> +std::pair, std::vector> to_host(column_view c) +{ + thrust::host_vector host_data(c.size()); + auto stream = cudf::get_default_stream(); + if (c.size() > c.null_count()) { + auto const scv = strings_column_view(c); + auto const h_chars = cudf::detail::make_std_vector_sync( + cudf::device_span(scv.chars_begin(stream), scv.chars_size(stream)), stream); + auto offsets = + cudf::slice(scv.offsets(), {scv.offset(), scv.offset() + scv.size() + 1}).front(); + cudf::type_dispatcher( + offsets.type(), strings_to_host_fn{}, host_data, h_chars.data(), offsets, stream); + } + return {std::move(host_data), bitmask_to_host(c)}; +} + } // namespace test } // namespace cudf