From ca2daf9c7d0f288f7a787c4817ad801f01476b42 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 14 Mar 2024 15:51:06 -0400 Subject: [PATCH 1/2] Rework cudf::find_and_replace_all to use gather-based make_strings_column --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/strings/detail/replace.hpp | 18 ++ cpp/src/replace/replace.cu | 212 +------------------- cpp/src/strings/replace/find_replace.cu | 89 ++++++++ cpp/tests/replace/replace_tests.cpp | 8 +- 5 files changed, 113 insertions(+), 215 deletions(-) create mode 100644 cpp/src/strings/replace/find_replace.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 12837c69e59..618d03f7078 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -590,6 +590,7 @@ add_library( src/strings/regex/regex_program.cpp src/strings/repeat_strings.cu src/strings/replace/backref_re.cu + src/strings/replace/find_replace.cu src/strings/replace/multi.cu src/strings/replace/multi_re.cu src/strings/replace/replace.cu diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp index 28027291b28..0f050f057fa 100644 --- a/cpp/include/cudf/strings/detail/replace.hpp +++ b/cpp/include/cudf/strings/detail/replace.hpp @@ -81,6 +81,24 @@ std::unique_ptr replace_slice(strings_column_view const& strings, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Return a copy of `input` replacing any `values_to_replace[i]` + * found with `replacement_values[i]` + * + * @param input The column to find and replace values + * @param values_to_replace The values to find + * @param replacement_values The corresponding replacement values + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return Copy of `input` with specified values replaced + */ +std::unique_ptr find_and_replace_all( + cudf::strings_column_view const& input, + cudf::strings_column_view const& values_to_replace, + cudf::strings_column_view const& replacement_values, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace strings } // namespace cudf diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index 88d5d3a2375..91a0ced791a 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include #include @@ -57,7 +57,6 @@ #include #include #include -#include namespace { // anonymous @@ -87,140 +86,6 @@ __device__ auto get_new_value(cudf::size_type idx, return thrust::make_pair(new_value, output_is_valid); } -__device__ int get_new_string_value(cudf::size_type idx, - cudf::column_device_view& input, - cudf::column_device_view& values_to_replace, - cudf::column_device_view&) -{ - cudf::string_view input_string = input.element(idx); - int match = -1; - for (int i = 0; i < values_to_replace.size(); i++) { - cudf::string_view value_string = values_to_replace.element(i); - if (input_string == value_string) { - match = i; - break; - } - } - return match; -} - -/** - * @brief Kernel which does the first pass of strings replace. - * - * It computes the output null_mask, null_count, and the offsets. - * - * @param input The input column to replace strings in. - * @param values_to_replace The string values to replace. - * @param replacement The replacement values. - * @param offsets The column which will contain the offsets of the new string column - * @param indices Temporary column used to store the replacement indices - * @param output_valid The output null_mask - * @param output_valid_count The output valid count - */ -template -CUDF_KERNEL void replace_strings_first_pass(cudf::column_device_view input, - cudf::column_device_view values_to_replace, - cudf::column_device_view replacement, - cudf::mutable_column_device_view offsets, - cudf::mutable_column_device_view indices, - cudf::bitmask_type* output_valid, - cudf::size_type* __restrict__ output_valid_count) -{ - cudf::size_type nrows = input.size(); - auto tid = cudf::detail::grid_1d::global_thread_id(); - auto const stride = cudf::detail::grid_1d::grid_stride(); - uint32_t active_mask = 0xffff'ffffu; - active_mask = __ballot_sync(active_mask, tid < nrows); - auto const lane_id{threadIdx.x % cudf::detail::warp_size}; - uint32_t valid_sum{0}; - - while (tid < nrows) { - auto const idx = static_cast(tid); - bool input_is_valid = true; - - if (input_has_nulls) input_is_valid = input.is_valid_nocheck(idx); - bool output_is_valid = input_is_valid; - - if (input_is_valid) { - int result = get_new_string_value(idx, input, values_to_replace, replacement); - cudf::string_view output = (result == -1) ? input.element(idx) - : replacement.element(result); - offsets.data()[idx] = output.size_bytes(); - indices.data()[idx] = result; - if (replacement_has_nulls && result != -1) { - output_is_valid = replacement.is_valid_nocheck(result); - } - } else { - offsets.data()[idx] = 0; - indices.data()[idx] = -1; - } - - uint32_t bitmask = __ballot_sync(active_mask, output_is_valid); - if (0 == lane_id) { - output_valid[cudf::word_index(idx)] = bitmask; - valid_sum += __popc(bitmask); - } - - tid += stride; - active_mask = __ballot_sync(active_mask, tid < nrows); - } - - // Compute total valid count for this block and add it to global count - uint32_t block_valid_count = cudf::detail::single_lane_block_sum_reduce(valid_sum); - // one thread computes and adds to output_valid_count - if (threadIdx.x == 0) { - atomicAdd(output_valid_count, static_cast(block_valid_count)); - } -} - -/** - * @brief Kernel which does the second pass of strings replace. - * - * It copies the string data needed from input and replacement into the new strings column chars - * column. - * - * @param input The input column - * @param replacement The replacement values - * @param offsets The offsets column of the new strings column - * @param strings The chars column of the new strings column - * @param indices Temporary column used to store the replacement indices. - */ -template -CUDF_KERNEL void replace_strings_second_pass(cudf::column_device_view input, - cudf::column_device_view replacement, - cudf::mutable_column_device_view offsets, - char* strings, - cudf::mutable_column_device_view indices) -{ - cudf::size_type nrows = input.size(); - auto tid = cudf::detail::grid_1d::global_thread_id(); - auto const stride = cudf::detail::grid_1d::grid_stride(); - - while (tid < nrows) { - auto const idx = static_cast(tid); - auto const replace_idx = indices.element(idx); - bool output_is_valid = true; - bool input_is_valid = true; - - if (input_has_nulls) { - input_is_valid = input.is_valid_nocheck(idx); - output_is_valid = input_is_valid; - } - if (replacement_has_nulls && replace_idx != -1) { - output_is_valid = replacement.is_valid_nocheck(replace_idx); - } - if (output_is_valid) { - cudf::string_view output = (replace_idx == -1) - ? input.element(idx) - : replacement.element(replace_idx); - std::memcpy( - strings + offsets.data()[idx], output.data(), output.size_bytes()); - } - - tid += stride; - } -} - /** * @brief Kernel that replaces elements from `output_data` given the following * rule: replace all `values_to_replace[i]` in [values_to_replace_begin`, @@ -375,79 +240,8 @@ std::unique_ptr replace_kernel_forwarder::operator() valid_counter(0, stream); - cudf::size_type* valid_count = valid_counter.data(); - - auto replace_first = replace_strings_first_pass; - auto replace_second = replace_strings_second_pass; - if (input_col.has_nulls()) { - if (replacement_values.has_nulls()) { - replace_first = replace_strings_first_pass; - replace_second = replace_strings_second_pass; - } - } else { - if (replacement_values.has_nulls()) { - replace_first = replace_strings_first_pass; - replace_second = replace_strings_second_pass; - } else { - replace_first = replace_strings_first_pass; - replace_second = replace_strings_second_pass; - } - } - - // Create new offsets column to use in kernel - std::unique_ptr sizes = - cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, - input_col.size(), - cudf::mask_state::UNALLOCATED, - stream); - std::unique_ptr indices = - cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, - input_col.size(), - cudf::mask_state::UNALLOCATED, - stream); - - auto sizes_view = sizes->mutable_view(); - auto indices_view = indices->mutable_view(); - - auto device_in = cudf::column_device_view::create(input_col, stream); - auto device_values_to_replace = cudf::column_device_view::create(values_to_replace, stream); - auto device_replacement = cudf::column_device_view::create(replacement_values, stream); - auto device_sizes = cudf::mutable_column_device_view::create(sizes_view, stream); - auto device_indices = cudf::mutable_column_device_view::create(indices_view, stream); - - rmm::device_buffer valid_bits = - cudf::detail::create_null_mask(input_col.size(), cudf::mask_state::UNINITIALIZED, stream, mr); - - // Call first pass kernel to get sizes in offsets - cudf::detail::grid_1d grid{input_col.size(), BLOCK_SIZE, 1}; - replace_first<<>>( - *device_in, - *device_values_to_replace, - *device_replacement, - *device_sizes, - *device_indices, - reinterpret_cast(valid_bits.data()), - valid_count); - - auto [offsets, bytes] = cudf::detail::make_offsets_child_column( - sizes_view.begin(), sizes_view.end(), stream, mr); - auto offsets_view = offsets->mutable_view(); - auto device_offsets = cudf::mutable_column_device_view::create(offsets_view, stream); - - // Allocate chars array and output null mask - cudf::size_type null_count = input_col.size() - valid_counter.value(stream); - rmm::device_uvector output_chars(bytes, stream, mr); - auto d_chars = output_chars.data(); - - replace_second<<>>( - *device_in, *device_replacement, *device_offsets, d_chars, *device_indices); - - return cudf::make_strings_column(input_col.size(), - std::move(offsets), - output_chars.release(), - null_count, - std::move(valid_bits)); + return cudf::strings::detail::find_and_replace_all( + input_col, values_to_replace, replacement_values, stream, mr); } template <> diff --git a/cpp/src/strings/replace/find_replace.cu b/cpp/src/strings/replace/find_replace.cu new file mode 100644 index 00000000000..1a6e5713230 --- /dev/null +++ b/cpp/src/strings/replace/find_replace.cu @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +namespace cudf { +namespace strings { +namespace detail { +namespace { +struct find_replace_fn { + column_device_view d_input; + column_device_view d_values; + column_device_view d_replacements; + + __device__ string_index_pair get_replacement(size_type idx) + { + if (d_replacements.is_null(idx)) { return string_index_pair{nullptr, 0}; } + auto const d_str = d_replacements.element(idx); + return string_index_pair{d_str.data(), d_str.size_bytes()}; + } + + __device__ string_index_pair operator()(size_type idx) + { + if (d_input.is_null(idx)) { return string_index_pair{nullptr, 0}; } + auto const d_str = d_input.element(idx); + // find d_str in d_values + // if found return corresponding replacement + // if not found, return d_str + auto const begin = thrust::counting_iterator(0); + auto const end = thrust::counting_iterator(d_values.size()); + auto const itr = + thrust::find_if(thrust::seq, begin, end, [d_values = d_values, d_str](size_type i) -> bool { + return d_str == d_values.element(i); + }); + return itr == end ? string_index_pair{d_str.data(), d_str.size_bytes()} : get_replacement(*itr); + } +}; + +} // namespace + +std::unique_ptr find_and_replace_all( + cudf::strings_column_view const& input, + cudf::strings_column_view const& values_to_replace, + cudf::strings_column_view const& replacement_values, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (input.is_empty()) { return cudf::make_empty_column(type_id::STRING); } + + auto d_input = cudf::column_device_view::create(input.parent(), stream); + auto d_values_to_replace = cudf::column_device_view::create(values_to_replace.parent(), stream); + auto d_replacements = cudf::column_device_view::create(replacement_values.parent(), stream); + + auto indices = rmm::device_uvector(input.size(), stream); + + thrust::transform(rmm::exec_policy_nosync(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + indices.begin(), + find_replace_fn{*d_input, *d_values_to_replace, *d_replacements}); + + return make_strings_column(indices.begin(), indices.end(), stream, mr); +} + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp index 8685e7300ba..613034efc12 100644 --- a/cpp/tests/replace/replace_tests.cpp +++ b/cpp/tests/replace/replace_tests.cpp @@ -97,9 +97,7 @@ TEST_F(ReplaceStringsTest, Strings) ASSERT_NO_THROW(result = cudf::find_and_replace_all( input_wrapper, values_to_replace_wrapper, replacement_wrapper)); std::vector expected{"z", "b", "c", "d", "e", "f", "g", "h"}; - std::vector ex_valid{1, 1, 1, 1, 1, 1, 1, 1}; - cudf::test::strings_column_wrapper expected_wrapper{ - expected.begin(), expected.end(), ex_valid.begin()}; + cudf::test::strings_column_wrapper expected_wrapper{expected.begin(), expected.end()}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected_wrapper); } @@ -160,7 +158,6 @@ TEST_F(ReplaceStringsTest, StringsResultAllEmpty) std::vector replacement{"a", ""}; std::vector replacement_valid{1, 1}; std::vector expected{"", "", "", "", "", "", "", ""}; - std::vector ex_valid{1, 1, 1, 1, 1, 1, 1, 1}; cudf::test::strings_column_wrapper input_wrapper{input.begin(), input.end()}; cudf::test::strings_column_wrapper values_to_replace_wrapper{values_to_replace.begin(), values_to_replace.end()}; @@ -170,8 +167,7 @@ TEST_F(ReplaceStringsTest, StringsResultAllEmpty) std::unique_ptr result; ASSERT_NO_THROW(result = cudf::find_and_replace_all( input_wrapper, values_to_replace_wrapper, replacement_wrapper)); - cudf::test::strings_column_wrapper expected_wrapper{ - expected.begin(), expected.end(), ex_valid.begin()}; + cudf::test::strings_column_wrapper expected_wrapper{expected.begin(), expected.end()}; CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected_wrapper); } From 4eb463286f63f168d63d947ebb01a6b23cb0c96b Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 20 Mar 2024 15:39:46 -0400 Subject: [PATCH 2/2] remove unneeded empty check --- cpp/src/strings/replace/find_replace.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/strings/replace/find_replace.cu b/cpp/src/strings/replace/find_replace.cu index 1a6e5713230..818bfa58427 100644 --- a/cpp/src/strings/replace/find_replace.cu +++ b/cpp/src/strings/replace/find_replace.cu @@ -67,8 +67,6 @@ std::unique_ptr find_and_replace_all( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if (input.is_empty()) { return cudf::make_empty_column(type_id::STRING); } - auto d_input = cudf::column_device_view::create(input.parent(), stream); auto d_values_to_replace = cudf::column_device_view::create(values_to_replace.parent(), stream); auto d_replacements = cudf::column_device_view::create(replacement_values.parent(), stream);