Skip to content

Commit

Permalink
Use make_strings_children for fill() specialization logic (#14697)
Browse files Browse the repository at this point in the history
Refactors the `cudf::strings::detail::fill()` function to use `make_strings_children`. 
This refactors some duplicated logic and will help to enable large strings support.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: #14697
  • Loading branch information
davidwendt authored Jan 5, 2024
1 parent 9c7b05b commit 5eb923f
Showing 1 changed file with 45 additions and 50 deletions.
95 changes: 45 additions & 50 deletions cpp/src/strings/filling/fill.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,91 +15,86 @@
*/

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/detail/fill.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>

#include <cuda/functional>

namespace cudf {
namespace strings {
namespace detail {
std::unique_ptr<column> fill(strings_column_view const& strings,
namespace {
struct fill_fn {
column_device_view const d_strings;
size_type const begin;
size_type const end;
string_view const d_value;
size_type* d_offsets{};
char* d_chars{};

__device__ string_view resolve_string_at(size_type idx) const
{
if ((begin <= idx) && (idx < end)) { return d_value; }
return d_strings.is_valid(idx) ? d_strings.element<string_view>(idx) : string_view{};
}

__device__ void operator()(size_type idx) const
{
auto const d_str = resolve_string_at(idx);
if (!d_chars) {
d_offsets[idx] = d_str.size_bytes();
} else {
copy_string(d_chars + d_offsets[idx], d_str);
}
}
};
} // namespace

std::unique_ptr<column> fill(strings_column_view const& input,
size_type begin,
size_type end,
string_scalar const& value,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto strings_count = strings.size();
if (strings_count == 0) return make_empty_column(type_id::STRING);
auto const strings_count = input.size();
if (strings_count == 0) { return make_empty_column(type_id::STRING); }
CUDF_EXPECTS((begin >= 0) && (end <= strings_count),
"Parameters [begin,end) are outside the range of the provided strings column");
CUDF_EXPECTS(begin <= end, "Parameters [begin,end) have invalid range values");
if (begin == end) // return a copy
return std::make_unique<column>(strings.parent(), stream, mr);

// string_scalar.data() is null for valid, empty strings
auto d_value = get_scalar_device_view(const_cast<string_scalar&>(value));
if (begin == end) { return std::make_unique<column>(input.parent(), stream, mr); }

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_strings = *strings_column;
auto strings_column = column_device_view::create(input.parent(), stream);
auto const d_strings = *strings_column;
auto const is_valid = value.is_valid(stream);

// create resulting null mask
auto valid_mask = [begin, end, d_value, &value, d_strings, stream, mr] {
if (begin == 0 and end == d_strings.size() and value.is_valid(stream))
auto [null_mask, null_count] = [begin, end, is_valid, d_strings, stream, mr] {
if (begin == 0 and end == d_strings.size() and is_valid) {
return std::pair(rmm::device_buffer{}, 0);
}
return cudf::detail::valid_if(
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(d_strings.size()),
[d_strings, begin, end, d_value] __device__(size_type idx) {
return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx);
[d_strings, begin, end, is_valid] __device__(size_type idx) {
return ((begin <= idx) && (idx < end)) ? is_valid : d_strings.is_valid(idx);
},
stream,
mr);
}();
auto null_count = valid_mask.second;
rmm::device_buffer& null_mask = valid_mask.first;

// build offsets column
auto offsets_transformer = cuda::proclaim_return_type<size_type>(
[d_strings, begin, end, d_value] __device__(size_type idx) {
if (((begin <= idx) && (idx < end)) ? !d_value.is_valid() : d_strings.is_null(idx)) return 0;
return ((begin <= idx) && (idx < end)) ? d_value.size()
: d_strings.element<string_view>(idx).size_bytes();
});
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), offsets_transformer);
auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto d_offsets = offsets_column->view().data<int32_t>();
auto const d_value = const_cast<string_scalar&>(value);
auto const d_str = is_valid ? d_value.value(stream) : string_view{};
auto fn = fill_fn{d_strings, begin, end, d_str};

// create the chars column
auto chars_column = create_chars_child_column(bytes, stream, mr);
// fill the chars column
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
[d_strings, begin, end, d_value, d_offsets, d_chars] __device__(size_type idx) {
if (((begin <= idx) && (idx < end)) ? !d_value.is_valid() : d_strings.is_null(idx)) return;
string_view const d_str =
((begin <= idx) && (idx < end)) ? d_value.value() : d_strings.element<string_view>(idx);
memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
});
auto [offsets_column, chars_column] = make_strings_children(fn, strings_count, stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
Expand Down

0 comments on commit 5eb923f

Please sign in to comment.