diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu index 2c6a0b2cf22..bc01a46ca6d 100644 --- a/cpp/src/strings/split/split.cu +++ b/cpp/src/strings/split/split.cu @@ -34,6 +34,7 @@ #include #include +#include #include #include #include diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh index 69a11aabfcd..ae3c0b3aa12 100644 --- a/cpp/src/strings/split/split.cuh +++ b/cpp/src/strings/split/split.cuh @@ -30,12 +30,9 @@ #include #include -#include #include -#include #include #include -#include #include namespace cudf::strings::detail { @@ -297,6 +294,44 @@ std::unique_ptr create_offsets_from_positions(strings_column_view const& rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); +/** + * @brief Count the number of delimiters in a strings column + * + * @tparam Tokenizer Functor containing `is_delimiter` function + * @tparam block_size Number of threads per block + * @tparam bytes_per_thread Number of bytes processed per thread + * + * @param tokenizer For checking delimiters + * @param d_offsets Offsets for the strings column + * @param chars_bytes Number of bytes in the strings column + * @param d_output Result of the count + */ +template +CUDF_KERNEL void count_delimiters_kernel(Tokenizer tokenizer, + cudf::detail::input_offsetalator d_offsets, + int64_t chars_bytes, + int64_t* d_output) +{ + auto const idx = cudf::detail::grid_1d::global_thread_id(); + auto const byte_idx = static_cast(idx) * bytes_per_thread; + auto const lane_idx = static_cast(threadIdx.x); + + using block_reduce = cub::BlockReduce; + __shared__ typename block_reduce::TempStorage temp_storage; + + int64_t count = 0; + // each thread processes multiple bytes + for (auto i = byte_idx; (i < (byte_idx + bytes_per_thread)) && (i < chars_bytes); ++i) { + count += tokenizer.is_delimiter(i, d_offsets, chars_bytes); + } + auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum()); + + if ((lane_idx == 0) && (total > 0)) { + cuda::atomic_ref ref{*d_output}; + ref.fetch_add(total, cuda::std::memory_order_relaxed); + } +} + /** * @brief Helper function used by split/rsplit and split_record/rsplit_record * @@ -326,17 +361,19 @@ std::pair, rmm::device_uvector> split cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset()); // count the number of delimiters in the entire column - auto const delimiter_count = - thrust::count_if(rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(chars_bytes), - [tokenizer, d_offsets, chars_bytes] __device__(int64_t idx) { - return tokenizer.is_delimiter(idx, d_offsets, chars_bytes); - }); + rmm::device_scalar d_count(0, stream); + constexpr int64_t block_size = 512; + constexpr size_type bytes_per_thread = 4; + auto const num_blocks = util::div_rounding_up_safe( + util::div_rounding_up_safe(chars_bytes, static_cast(bytes_per_thread)), block_size); + count_delimiters_kernel + <<>>( + tokenizer, d_offsets, chars_bytes, d_count.data()); + // Create a vector of every delimiter position in the chars column. // These may include overlapping or otherwise out-of-bounds delimiters which // will be resolved during token processing. - auto delimiter_positions = rmm::device_uvector(delimiter_count, stream); + auto delimiter_positions = rmm::device_uvector(d_count.value(stream), stream); auto d_positions = delimiter_positions.data(); cudf::detail::copy_if_safe( thrust::counting_iterator(0), diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 42b7f089d61..b5515cfcfa6 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -574,6 +574,7 @@ ConfigureTest( large_strings/merge_tests.cpp large_strings/parquet_tests.cpp large_strings/reshape_tests.cpp + large_strings/split_strings_tests.cpp GPUS 1 PERCENT 100 ) diff --git a/cpp/tests/large_strings/split_strings_tests.cpp b/cpp/tests/large_strings/split_strings_tests.cpp new file mode 100644 index 00000000000..320fb222241 --- /dev/null +++ b/cpp/tests/large_strings/split_strings_tests.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "large_strings_fixture.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +#include + +struct StringsSplitTest : public cudf::test::StringsLargeTest {}; + +TEST_F(StringsSplitTest, Split) +{ + auto const expected = this->long_column(); + auto const view = cudf::column_view(expected); + auto const multiplier = 10; + auto const separator = cudf::string_scalar("|"); + auto const input = cudf::strings::concatenate( + cudf::table_view(std::vector(multiplier, view)), separator); + + { + auto result = cudf::strings::split(cudf::strings_column_view(input->view()), separator); + for (auto c : result->view()) { + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, expected); + } + } + + auto lc = cudf::strings::split_record(cudf::strings_column_view(input->view()), separator); + auto lv = cudf::lists_column_view(lc->view()); + auto sv = cudf::strings_column_view(lv.child()); + EXPECT_EQ(sv.size(), view.size() * multiplier); + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); +}