Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use make_strings_children for fill() specialization logic #14697

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 45 additions & 50 deletions cpp/src/strings/filling/fill.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,91 +15,86 @@
*/

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/detail/fill.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>

#include <cuda/functional>

namespace cudf {
namespace strings {
namespace detail {
std::unique_ptr<column> fill(strings_column_view const& strings,
namespace {
struct fill_fn {
column_device_view const d_strings;
size_type const begin;
size_type const end;
string_view const d_value;
size_type* d_offsets{};
char* d_chars{};

__device__ string_view resolve_string_at(size_type idx) const
{
if ((begin <= idx) && (idx < end)) { return d_value; }
return d_strings.is_valid(idx) ? d_strings.element<string_view>(idx) : string_view{};
}

__device__ void operator()(size_type idx) const
{
auto const d_str = resolve_string_at(idx);
if (!d_chars) {
d_offsets[idx] = d_str.size_bytes();
} else {
copy_string(d_chars + d_offsets[idx], d_str);
}
}
};
} // namespace

std::unique_ptr<column> fill(strings_column_view const& input,
size_type begin,
size_type end,
string_scalar const& value,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto strings_count = strings.size();
if (strings_count == 0) return make_empty_column(type_id::STRING);
auto const strings_count = input.size();
if (strings_count == 0) { return make_empty_column(type_id::STRING); }
CUDF_EXPECTS((begin >= 0) && (end <= strings_count),
"Parameters [begin,end) are outside the range of the provided strings column");
CUDF_EXPECTS(begin <= end, "Parameters [begin,end) have invalid range values");
if (begin == end) // return a copy
return std::make_unique<column>(strings.parent(), stream, mr);

// string_scalar.data() is null for valid, empty strings
auto d_value = get_scalar_device_view(const_cast<string_scalar&>(value));
if (begin == end) { return std::make_unique<column>(input.parent(), stream, mr); }

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_strings = *strings_column;
auto strings_column = column_device_view::create(input.parent(), stream);
auto const d_strings = *strings_column;
auto const is_valid = value.is_valid(stream);

// create resulting null mask
auto valid_mask = [begin, end, d_value, &value, d_strings, stream, mr] {
if (begin == 0 and end == d_strings.size() and value.is_valid(stream))
auto [null_mask, null_count] = [begin, end, is_valid, d_strings, stream, mr] {
if (begin == 0 and end == d_strings.size() and is_valid) {
return std::pair(rmm::device_buffer{}, 0);
}
return cudf::detail::valid_if(
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(d_strings.size()),
[d_strings, begin, end, d_value] __device__(size_type idx) {
return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx);
[d_strings, begin, end, is_valid] __device__(size_type idx) {
return ((begin <= idx) && (idx < end)) ? is_valid : d_strings.is_valid(idx);
},
stream,
mr);
}();
auto null_count = valid_mask.second;
rmm::device_buffer& null_mask = valid_mask.first;

// build offsets column
auto offsets_transformer = cuda::proclaim_return_type<size_type>(
[d_strings, begin, end, d_value] __device__(size_type idx) {
if (((begin <= idx) && (idx < end)) ? !d_value.is_valid() : d_strings.is_null(idx)) return 0;
return ((begin <= idx) && (idx < end)) ? d_value.size()
: d_strings.element<string_view>(idx).size_bytes();
});
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), offsets_transformer);
auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto d_offsets = offsets_column->view().data<int32_t>();
auto const d_value = const_cast<string_scalar&>(value);
auto const d_str = is_valid ? d_value.value(stream) : string_view{};
auto fn = fill_fn{d_strings, begin, end, d_str};

// create the chars column
auto chars_column = create_chars_child_column(bytes, stream, mr);
// fill the chars column
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
[d_strings, begin, end, d_value, d_offsets, d_chars] __device__(size_type idx) {
if (((begin <= idx) && (idx < end)) ? !d_value.is_valid() : d_strings.is_null(idx)) return;
string_view const d_str =
((begin <= idx) && (idx < end)) ? d_value.value() : d_strings.element<string_view>(idx);
memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
});
auto [offsets_column, chars_column] = make_strings_children(fn, strings_count, stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
Expand Down