Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use make_strings_children for fill() specialization logic #14697

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 37 additions & 42 deletions cpp/src/strings/filling/fill.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,27 +15,48 @@
*/

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/detail/fill.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>

#include <cuda/functional>

namespace cudf {
namespace strings {
namespace detail {
namespace {
struct fill_fn {
column_device_view d_strings;
size_type begin;
size_type end;
string_view d_value;
size_type* d_offsets{};
char* d_chars{};

__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str =
((begin <= idx) && (idx < end)) ? d_value : d_strings.element<string_view>(idx);
if (!d_chars) {
d_offsets[idx] = d_str.size_bytes();
} else {
copy_string(d_chars + d_offsets[idx], d_str);
}
}
};
} // namespace

std::unique_ptr<column> fill(strings_column_view const& strings,
size_type begin,
size_type end,
Expand All @@ -51,55 +72,29 @@ std::unique_ptr<column> fill(strings_column_view const& strings,
if (begin == end) // return a copy
return std::make_unique<column>(strings.parent(), stream, mr);

// string_scalar.data() is null for valid, empty strings
auto d_value = get_scalar_device_view(const_cast<string_scalar&>(value));

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_strings = *strings_column;

// create resulting null mask
auto valid_mask = [begin, end, d_value, &value, d_strings, stream, mr] {
if (begin == 0 and end == d_strings.size() and value.is_valid(stream))
auto [null_mask, null_count] = [begin, end, &value, d_strings, stream, mr] {
if (begin == 0 and end == d_strings.size() and value.is_valid(stream)) {
return std::pair(rmm::device_buffer{}, 0);
}
auto d_value = get_scalar_device_view(const_cast<string_scalar&>(value));
return cudf::detail::valid_if(
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(d_strings.size()),
[d_strings, begin, end, d_value] __device__(size_type idx) {
return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx);
return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : d_strings.is_valid(idx);
},
stream,
mr);
}();
auto null_count = valid_mask.second;
rmm::device_buffer& null_mask = valid_mask.first;

// build offsets column
auto offsets_transformer = cuda::proclaim_return_type<size_type>(
[d_strings, begin, end, d_value] __device__(size_type idx) {
if (((begin <= idx) && (idx < end)) ? !d_value.is_valid() : d_strings.is_null(idx)) return 0;
return ((begin <= idx) && (idx < end)) ? d_value.size()
: d_strings.element<string_view>(idx).size_bytes();
});
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), offsets_transformer);
auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto d_offsets = offsets_column->view().data<int32_t>();

// create the chars column
auto chars_column = create_chars_child_column(bytes, stream, mr);
// fill the chars column
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(
rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
[d_strings, begin, end, d_value, d_offsets, d_chars] __device__(size_type idx) {
if (((begin <= idx) && (idx < end)) ? !d_value.is_valid() : d_strings.is_null(idx)) return;
string_view const d_str =
((begin <= idx) && (idx < end)) ? d_value.value() : d_strings.element<string_view>(idx);
memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes());
});
auto d_value = const_cast<string_scalar&>(value);
auto const d_str = d_value.is_valid(stream) ? d_value.value(stream) : string_view{};
auto fn = fill_fn{d_strings, begin, end, d_str};
auto [offsets_column, chars_column] = make_strings_children(fn, strings_count, stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
Expand Down
Loading