Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework cudf::find_and_replace_all to use gather-based make_strings_column #15305

Merged
merged 6 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ add_library(
src/strings/regex/regex_program.cpp
src/strings/repeat_strings.cu
src/strings/replace/backref_re.cu
src/strings/replace/find_replace.cu
src/strings/replace/multi.cu
src/strings/replace/multi_re.cu
src/strings/replace/replace.cu
Expand Down
18 changes: 18 additions & 0 deletions cpp/include/cudf/strings/detail/replace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,24 @@ std::unique_ptr<column> replace_slice(strings_column_view const& strings,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Return a copy of `input` replacing any `values_to_replace[i]`
* found with `replacement_values[i]`
*
* @param input The column to find and replace values
* @param values_to_replace The values to find
* @param replacement_values The corresponding replacement values
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Copy of `input` with specified values replaced
*/
std::unique_ptr<cudf::column> find_and_replace_all(
cudf::strings_column_view const& input,
cudf::strings_column_view const& values_to_replace,
cudf::strings_column_view const& replacement_values,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace strings
} // namespace cudf
212 changes: 3 additions & 209 deletions cpp/src/replace/replace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/replace.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/replace.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/type_dispatcher.hpp>
Expand All @@ -57,7 +57,6 @@
#include <thrust/execution_policy.h>
#include <thrust/find.h>
#include <thrust/pair.h>
#include <thrust/tuple.h>

namespace { // anonymous

Expand Down Expand Up @@ -87,140 +86,6 @@ __device__ auto get_new_value(cudf::size_type idx,
return thrust::make_pair(new_value, output_is_valid);
}

__device__ int get_new_string_value(cudf::size_type idx,
cudf::column_device_view& input,
cudf::column_device_view& values_to_replace,
cudf::column_device_view&)
{
cudf::string_view input_string = input.element<cudf::string_view>(idx);
int match = -1;
for (int i = 0; i < values_to_replace.size(); i++) {
cudf::string_view value_string = values_to_replace.element<cudf::string_view>(i);
if (input_string == value_string) {
match = i;
break;
}
}
return match;
}

/**
* @brief Kernel which does the first pass of strings replace.
*
* It computes the output null_mask, null_count, and the offsets.
*
* @param input The input column to replace strings in.
* @param values_to_replace The string values to replace.
* @param replacement The replacement values.
* @param offsets The column which will contain the offsets of the new string column
* @param indices Temporary column used to store the replacement indices
* @param output_valid The output null_mask
* @param output_valid_count The output valid count
*/
template <bool input_has_nulls, bool replacement_has_nulls>
CUDF_KERNEL void replace_strings_first_pass(cudf::column_device_view input,
cudf::column_device_view values_to_replace,
cudf::column_device_view replacement,
cudf::mutable_column_device_view offsets,
cudf::mutable_column_device_view indices,
cudf::bitmask_type* output_valid,
cudf::size_type* __restrict__ output_valid_count)
{
cudf::size_type nrows = input.size();
auto tid = cudf::detail::grid_1d::global_thread_id();
auto const stride = cudf::detail::grid_1d::grid_stride();
uint32_t active_mask = 0xffff'ffffu;
active_mask = __ballot_sync(active_mask, tid < nrows);
auto const lane_id{threadIdx.x % cudf::detail::warp_size};
uint32_t valid_sum{0};

while (tid < nrows) {
auto const idx = static_cast<cudf::size_type>(tid);
bool input_is_valid = true;

if (input_has_nulls) input_is_valid = input.is_valid_nocheck(idx);
bool output_is_valid = input_is_valid;

if (input_is_valid) {
int result = get_new_string_value(idx, input, values_to_replace, replacement);
cudf::string_view output = (result == -1) ? input.element<cudf::string_view>(idx)
: replacement.element<cudf::string_view>(result);
offsets.data<cudf::size_type>()[idx] = output.size_bytes();
indices.data<cudf::size_type>()[idx] = result;
if (replacement_has_nulls && result != -1) {
output_is_valid = replacement.is_valid_nocheck(result);
}
} else {
offsets.data<cudf::size_type>()[idx] = 0;
indices.data<cudf::size_type>()[idx] = -1;
}

uint32_t bitmask = __ballot_sync(active_mask, output_is_valid);
if (0 == lane_id) {
output_valid[cudf::word_index(idx)] = bitmask;
valid_sum += __popc(bitmask);
}

tid += stride;
active_mask = __ballot_sync(active_mask, tid < nrows);
}

// Compute total valid count for this block and add it to global count
uint32_t block_valid_count = cudf::detail::single_lane_block_sum_reduce<BLOCK_SIZE, 0>(valid_sum);
// one thread computes and adds to output_valid_count
if (threadIdx.x == 0) {
atomicAdd(output_valid_count, static_cast<cudf::size_type>(block_valid_count));
}
}

/**
* @brief Kernel which does the second pass of strings replace.
*
* It copies the string data needed from input and replacement into the new strings column chars
* column.
*
* @param input The input column
* @param replacement The replacement values
* @param offsets The offsets column of the new strings column
* @param strings The chars column of the new strings column
* @param indices Temporary column used to store the replacement indices.
*/
template <bool input_has_nulls, bool replacement_has_nulls>
CUDF_KERNEL void replace_strings_second_pass(cudf::column_device_view input,
cudf::column_device_view replacement,
cudf::mutable_column_device_view offsets,
char* strings,
cudf::mutable_column_device_view indices)
{
cudf::size_type nrows = input.size();
auto tid = cudf::detail::grid_1d::global_thread_id();
auto const stride = cudf::detail::grid_1d::grid_stride();

while (tid < nrows) {
auto const idx = static_cast<cudf::size_type>(tid);
auto const replace_idx = indices.element<cudf::size_type>(idx);
bool output_is_valid = true;
bool input_is_valid = true;

if (input_has_nulls) {
input_is_valid = input.is_valid_nocheck(idx);
output_is_valid = input_is_valid;
}
if (replacement_has_nulls && replace_idx != -1) {
output_is_valid = replacement.is_valid_nocheck(replace_idx);
}
if (output_is_valid) {
cudf::string_view output = (replace_idx == -1)
? input.element<cudf::string_view>(idx)
: replacement.element<cudf::string_view>(replace_idx);
std::memcpy(
strings + offsets.data<cudf::size_type>()[idx], output.data(), output.size_bytes());
}

tid += stride;
}
}

/**
* @brief Kernel that replaces elements from `output_data` given the following
* rule: replace all `values_to_replace[i]` in [values_to_replace_begin`,
Expand Down Expand Up @@ -375,79 +240,8 @@ std::unique_ptr<cudf::column> replace_kernel_forwarder::operator()<cudf::string_
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
rmm::device_scalar<cudf::size_type> valid_counter(0, stream);
cudf::size_type* valid_count = valid_counter.data();

auto replace_first = replace_strings_first_pass<true, false>;
auto replace_second = replace_strings_second_pass<true, false>;
if (input_col.has_nulls()) {
if (replacement_values.has_nulls()) {
replace_first = replace_strings_first_pass<true, true>;
replace_second = replace_strings_second_pass<true, true>;
}
} else {
if (replacement_values.has_nulls()) {
replace_first = replace_strings_first_pass<false, true>;
replace_second = replace_strings_second_pass<false, true>;
} else {
replace_first = replace_strings_first_pass<false, false>;
replace_second = replace_strings_second_pass<false, false>;
}
}

// Create new offsets column to use in kernel
std::unique_ptr<cudf::column> sizes =
cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
input_col.size(),
cudf::mask_state::UNALLOCATED,
stream);
std::unique_ptr<cudf::column> indices =
cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<cudf::size_type>()},
input_col.size(),
cudf::mask_state::UNALLOCATED,
stream);

auto sizes_view = sizes->mutable_view();
auto indices_view = indices->mutable_view();

auto device_in = cudf::column_device_view::create(input_col, stream);
auto device_values_to_replace = cudf::column_device_view::create(values_to_replace, stream);
auto device_replacement = cudf::column_device_view::create(replacement_values, stream);
auto device_sizes = cudf::mutable_column_device_view::create(sizes_view, stream);
auto device_indices = cudf::mutable_column_device_view::create(indices_view, stream);

rmm::device_buffer valid_bits =
cudf::detail::create_null_mask(input_col.size(), cudf::mask_state::UNINITIALIZED, stream, mr);

// Call first pass kernel to get sizes in offsets
cudf::detail::grid_1d grid{input_col.size(), BLOCK_SIZE, 1};
replace_first<<<grid.num_blocks, BLOCK_SIZE, 0, stream.value()>>>(
*device_in,
*device_values_to_replace,
*device_replacement,
*device_sizes,
*device_indices,
reinterpret_cast<cudf::bitmask_type*>(valid_bits.data()),
valid_count);

auto [offsets, bytes] = cudf::detail::make_offsets_child_column(
sizes_view.begin<cudf::size_type>(), sizes_view.end<cudf::size_type>(), stream, mr);
auto offsets_view = offsets->mutable_view();
auto device_offsets = cudf::mutable_column_device_view::create(offsets_view, stream);

// Allocate chars array and output null mask
cudf::size_type null_count = input_col.size() - valid_counter.value(stream);
rmm::device_uvector<char> output_chars(bytes, stream, mr);
auto d_chars = output_chars.data();

replace_second<<<grid.num_blocks, BLOCK_SIZE, 0, stream.value()>>>(
*device_in, *device_replacement, *device_offsets, d_chars, *device_indices);

return cudf::make_strings_column(input_col.size(),
std::move(offsets),
output_chars.release(),
null_count,
std::move(valid_bits));
return cudf::strings::detail::find_and_replace_all(
input_col, values_to_replace, replacement_values, stream, mr);
}

template <>
Expand Down
87 changes: 87 additions & 0 deletions cpp/src/strings/replace/find_replace.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/replace.hpp>
#include <cudf/strings/detail/strings_column_factories.cuh>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_scalar.hpp>

#include <thrust/execution_policy.h>
#include <thrust/transform.h>

namespace cudf {
namespace strings {
namespace detail {
namespace {
struct find_replace_fn {
column_device_view d_input;
column_device_view d_values;
column_device_view d_replacements;

__device__ string_index_pair get_replacement(size_type idx)
{
if (d_replacements.is_null(idx)) { return string_index_pair{nullptr, 0}; }
auto const d_str = d_replacements.element<string_view>(idx);
return string_index_pair{d_str.data(), d_str.size_bytes()};
}

__device__ string_index_pair operator()(size_type idx)
{
if (d_input.is_null(idx)) { return string_index_pair{nullptr, 0}; }
auto const d_str = d_input.element<string_view>(idx);
// find d_str in d_values
// if found return corresponding replacement
// if not found, return d_str
auto const begin = thrust::counting_iterator<size_type>(0);
auto const end = thrust::counting_iterator<size_type>(d_values.size());
auto const itr =
thrust::find_if(thrust::seq, begin, end, [d_values = d_values, d_str](size_type i) -> bool {
return d_str == d_values.element<string_view>(i);
});
return itr == end ? string_index_pair{d_str.data(), d_str.size_bytes()} : get_replacement(*itr);
}
};

} // namespace

std::unique_ptr<cudf::column> find_and_replace_all(
cudf::strings_column_view const& input,
cudf::strings_column_view const& values_to_replace,
cudf::strings_column_view const& replacement_values,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto d_input = cudf::column_device_view::create(input.parent(), stream);
auto d_values_to_replace = cudf::column_device_view::create(values_to_replace.parent(), stream);
auto d_replacements = cudf::column_device_view::create(replacement_values.parent(), stream);

auto indices = rmm::device_uvector<string_index_pair>(input.size(), stream);

thrust::transform(rmm::exec_policy_nosync(stream),
thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(input.size()),
indices.begin(),
find_replace_fn{*d_input, *d_values_to_replace, *d_replacements});

return make_strings_column(indices.begin(), indices.end(), stream, mr);
}

} // namespace detail
} // namespace strings
} // namespace cudf
8 changes: 2 additions & 6 deletions cpp/tests/replace/replace_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,7 @@ TEST_F(ReplaceStringsTest, Strings)
ASSERT_NO_THROW(result = cudf::find_and_replace_all(
input_wrapper, values_to_replace_wrapper, replacement_wrapper));
std::vector<std::string> expected{"z", "b", "c", "d", "e", "f", "g", "h"};
std::vector<cudf::valid_type> ex_valid{1, 1, 1, 1, 1, 1, 1, 1};
cudf::test::strings_column_wrapper expected_wrapper{
expected.begin(), expected.end(), ex_valid.begin()};
cudf::test::strings_column_wrapper expected_wrapper{expected.begin(), expected.end()};

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected_wrapper);
}
Expand Down Expand Up @@ -160,7 +158,6 @@ TEST_F(ReplaceStringsTest, StringsResultAllEmpty)
std::vector<std::string> replacement{"a", ""};
std::vector<cudf::valid_type> replacement_valid{1, 1};
std::vector<std::string> expected{"", "", "", "", "", "", "", ""};
std::vector<cudf::valid_type> ex_valid{1, 1, 1, 1, 1, 1, 1, 1};
cudf::test::strings_column_wrapper input_wrapper{input.begin(), input.end()};
cudf::test::strings_column_wrapper values_to_replace_wrapper{values_to_replace.begin(),
values_to_replace.end()};
Expand All @@ -170,8 +167,7 @@ TEST_F(ReplaceStringsTest, StringsResultAllEmpty)
std::unique_ptr<cudf::column> result;
ASSERT_NO_THROW(result = cudf::find_and_replace_all(
input_wrapper, values_to_replace_wrapper, replacement_wrapper));
cudf::test::strings_column_wrapper expected_wrapper{
expected.begin(), expected.end(), ex_valid.begin()};
cudf::test::strings_column_wrapper expected_wrapper{expected.begin(), expected.end()};

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected_wrapper);
}
Expand Down
Loading