Skip to content

Commit

Permalink
Fix cudf::test::to_host to handle both offset types for strings colum…
Browse files Browse the repository at this point in the history
…ns (#15073)

The `cudf::test::to_host` function is updated to handle int32 and int64 offset types for strings columns when copying data to host memory. This function is used with `cudf::test::print()` as well.

Also moved the function from the header `column_utilities.hpp` to the `column_utilities.cu` file.
And moved the specialization for of `to_host` for fixed-point types from the header to `.cu` as well.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #15073
  • Loading branch information
davidwendt authored Feb 29, 2024
1 parent 200fc0b commit c1e26a6
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 40 deletions.
43 changes: 3 additions & 40 deletions cpp/include/cudf_test/column_utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,23 +194,7 @@ std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view
* `column_view`'s data, and second is the column's bitmask.
*/
template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>* = nullptr>
std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c)
{
using namespace numeric;
using Rep = typename T::rep;

auto host_rep_types = thrust::host_vector<Rep>(c.size());

CUDF_CUDA_TRY(
cudaMemcpy(host_rep_types.data(), c.begin<Rep>(), c.size() * sizeof(Rep), cudaMemcpyDefault));

auto to_fp = [&](Rep val) { return T{scaled_integer<Rep>{val, scale_type{c.type().scale()}}}; };
auto begin = thrust::make_transform_iterator(std::cbegin(host_rep_types), to_fp);
auto const host_fixed_points = thrust::host_vector<T>(begin, begin + c.size());

return {host_fixed_points, bitmask_to_host(c)};
}
//! @endcond
std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c);

/**
* @brief Copies the data and bitmask of a `column_view` of strings
Expand All @@ -223,29 +207,8 @@ std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view
* and second is the column's bitmask.
*/
template <>
inline std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to_host(column_view c)
{
thrust::host_vector<std::string> host_data(c.size());
auto stream = cudf::get_default_stream();
if (c.size() > c.null_count()) {
auto const scv = strings_column_view(c);
auto const h_chars = cudf::detail::make_std_vector_sync<char>(
cudf::device_span<char const>(scv.chars_begin(stream), scv.chars_size(stream)), stream);
auto const h_offsets = cudf::detail::make_std_vector_sync(
cudf::device_span<cudf::size_type const>(scv.offsets().data<cudf::size_type>() + scv.offset(),
scv.size() + 1),
stream);

// build std::string vector from chars and offsets
std::transform(
std::begin(h_offsets),
std::end(h_offsets) - 1,
std::begin(h_offsets) + 1,
host_data.begin(),
[&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); });
}
return {std::move(host_data), bitmask_to_host(c)};
}
std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to_host(column_view c);
//! @endcond

} // namespace cudf::test

Expand Down
75 changes: 75 additions & 0 deletions cpp/tests/utilities/column_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -940,5 +940,80 @@ bool validate_host_masks(std::vector<bitmask_type> const& expected_mask,
});
}

template <typename T, std::enable_if_t<cudf::is_fixed_point<T>()>*>
std::pair<thrust::host_vector<T>, std::vector<bitmask_type>> to_host(column_view c)
{
using namespace numeric;
using Rep = typename T::rep;

auto host_rep_types = thrust::host_vector<Rep>(c.size());

CUDF_CUDA_TRY(
cudaMemcpy(host_rep_types.data(), c.begin<Rep>(), c.size() * sizeof(Rep), cudaMemcpyDefault));

auto to_fp = [&](Rep val) { return T{scaled_integer<Rep>{val, scale_type{c.type().scale()}}}; };
auto begin = thrust::make_transform_iterator(std::cbegin(host_rep_types), to_fp);
auto const host_fixed_points = thrust::host_vector<T>(begin, begin + c.size());

return {host_fixed_points, bitmask_to_host(c)};
}

template std::pair<thrust::host_vector<numeric::decimal32>, std::vector<bitmask_type>> to_host(
column_view c);
template std::pair<thrust::host_vector<numeric::decimal64>, std::vector<bitmask_type>> to_host(
column_view c);
template std::pair<thrust::host_vector<numeric::decimal128>, std::vector<bitmask_type>> to_host(
column_view c);

namespace {
struct strings_to_host_fn {
template <typename OffsetType,
std::enable_if_t<std::is_same_v<OffsetType, int32_t> ||
std::is_same_v<OffsetType, int64_t>>* = nullptr>
void operator()(thrust::host_vector<std::string>& host_data,
char const* chars,
cudf::column_view const& offsets,
rmm::cuda_stream_view stream)
{
auto const h_offsets = cudf::detail::make_std_vector_sync(
cudf::device_span<OffsetType const>(offsets.data<OffsetType>(), offsets.size()), stream);
// build std::string vector from chars and offsets
std::transform(std::begin(h_offsets),
std::end(h_offsets) - 1,
std::begin(h_offsets) + 1,
host_data.begin(),
[&](auto start, auto end) { return std::string(chars + start, end - start); });
}

template <typename OffsetType,
std::enable_if_t<!std::is_same_v<OffsetType, int32_t> &&
!std::is_same_v<OffsetType, int64_t>>* = nullptr>
void operator()(thrust::host_vector<std::string>&,
char const*,
cudf::column_view const&,
rmm::cuda_stream_view)
{
CUDF_FAIL("invalid offsets type");
}
};
} // namespace

template <>
std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to_host(column_view c)
{
thrust::host_vector<std::string> host_data(c.size());
auto stream = cudf::get_default_stream();
if (c.size() > c.null_count()) {
auto const scv = strings_column_view(c);
auto const h_chars = cudf::detail::make_std_vector_sync<char>(
cudf::device_span<char const>(scv.chars_begin(stream), scv.chars_size(stream)), stream);
auto offsets =
cudf::slice(scv.offsets(), {scv.offset(), scv.offset() + scv.size() + 1}).front();
cudf::type_dispatcher(
offsets.type(), strings_to_host_fn{}, host_data, h_chars.data(), offsets, stream);
}
return {std::move(host_data), bitmask_to_host(c)};
}

} // namespace test
} // namespace cudf

0 comments on commit c1e26a6

Please sign in to comment.