Skip to content

Commit

Permalink
Change thrust::count_if call to raw kernel in strings split APIs (#15762
Browse files Browse the repository at this point in the history
)

Fixes calls to `thrust::count_if` in strings split APIs to better handle large strings.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Paul Mattione (https://github.com/pmattione-nvidia)

URL: #15762
  • Loading branch information
davidwendt authored May 30, 2024
1 parent 5ce95f0 commit 3e9cff2
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 11 deletions.
1 change: 1 addition & 0 deletions cpp/src/strings/split/split.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <rmm/resource_ref.hpp>

#include <cuda/functional>
#include <thrust/binary_search.h>
#include <thrust/fill.h>
#include <thrust/for_each.h>
#include <thrust/functional.h>
Expand Down
59 changes: 48 additions & 11 deletions cpp/src/strings/split/split.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,9 @@
#include <rmm/resource_ref.hpp>

#include <cuda/atomic>
#include <thrust/binary_search.h>
#include <thrust/copy.h>
#include <thrust/count.h>
#include <thrust/for_each.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/scan.h>
#include <thrust/transform.h>

namespace cudf::strings::detail {
Expand Down Expand Up @@ -297,6 +294,44 @@ std::unique_ptr<column> create_offsets_from_positions(strings_column_view const&
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @brief Count the number of delimiters in a strings column
*
* @tparam Tokenizer Functor containing `is_delimiter` function
* @tparam block_size Number of threads per block
* @tparam bytes_per_thread Number of bytes processed per thread
*
* @param tokenizer For checking delimiters
* @param d_offsets Offsets for the strings column
* @param chars_bytes Number of bytes in the strings column
* @param d_output Result of the count
*/
template <typename Tokenizer, int64_t block_size, size_type bytes_per_thread>
CUDF_KERNEL void count_delimiters_kernel(Tokenizer tokenizer,
cudf::detail::input_offsetalator d_offsets,
int64_t chars_bytes,
int64_t* d_output)
{
auto const idx = cudf::detail::grid_1d::global_thread_id();
auto const byte_idx = static_cast<int64_t>(idx) * bytes_per_thread;
auto const lane_idx = static_cast<cudf::size_type>(threadIdx.x);

using block_reduce = cub::BlockReduce<int64_t, block_size>;
__shared__ typename block_reduce::TempStorage temp_storage;

int64_t count = 0;
// each thread processes multiple bytes
for (auto i = byte_idx; (i < (byte_idx + bytes_per_thread)) && (i < chars_bytes); ++i) {
count += tokenizer.is_delimiter(i, d_offsets, chars_bytes);
}
auto const total = block_reduce(temp_storage).Reduce(count, cub::Sum());

if ((lane_idx == 0) && (total > 0)) {
cuda::atomic_ref<int64_t, cuda::thread_scope_block> ref{*d_output};
ref.fetch_add(total, cuda::std::memory_order_relaxed);
}
}

/**
* @brief Helper function used by split/rsplit and split_record/rsplit_record
*
Expand Down Expand Up @@ -326,17 +361,19 @@ std::pair<std::unique_ptr<column>, rmm::device_uvector<string_index_pair>> split
cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset());

// count the number of delimiters in the entire column
auto const delimiter_count =
thrust::count_if(rmm::exec_policy(stream),
thrust::counting_iterator<int64_t>(0),
thrust::counting_iterator<int64_t>(chars_bytes),
[tokenizer, d_offsets, chars_bytes] __device__(int64_t idx) {
return tokenizer.is_delimiter(idx, d_offsets, chars_bytes);
});
rmm::device_scalar<int64_t> d_count(0, stream);
constexpr int64_t block_size = 512;
constexpr size_type bytes_per_thread = 4;
auto const num_blocks = util::div_rounding_up_safe(
util::div_rounding_up_safe(chars_bytes, static_cast<int64_t>(bytes_per_thread)), block_size);
count_delimiters_kernel<Tokenizer, block_size, bytes_per_thread>
<<<num_blocks, block_size, 0, stream.value()>>>(
tokenizer, d_offsets, chars_bytes, d_count.data());

// Create a vector of every delimiter position in the chars column.
// These may include overlapping or otherwise out-of-bounds delimiters which
// will be resolved during token processing.
auto delimiter_positions = rmm::device_uvector<int64_t>(delimiter_count, stream);
auto delimiter_positions = rmm::device_uvector<int64_t>(d_count.value(stream), stream);
auto d_positions = delimiter_positions.data();
cudf::detail::copy_if_safe(
thrust::counting_iterator<int64_t>(0),
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,7 @@ ConfigureTest(
large_strings/merge_tests.cpp
large_strings/parquet_tests.cpp
large_strings/reshape_tests.cpp
large_strings/split_strings_tests.cpp
GPUS 1
PERCENT 100
)
Expand Down
53 changes: 53 additions & 0 deletions cpp/tests/large_strings/split_strings_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "large_strings_fixture.hpp"

#include <cudf_test/column_utilities.hpp>

#include <cudf/lists/lists_column_view.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/split/split.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table_view.hpp>

#include <vector>

struct StringsSplitTest : public cudf::test::StringsLargeTest {};

TEST_F(StringsSplitTest, Split)
{
auto const expected = this->long_column();
auto const view = cudf::column_view(expected);
auto const multiplier = 10;
auto const separator = cudf::string_scalar("|");
auto const input = cudf::strings::concatenate(
cudf::table_view(std::vector<cudf::column_view>(multiplier, view)), separator);

{
auto result = cudf::strings::split(cudf::strings_column_view(input->view()), separator);
for (auto c : result->view()) {
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, expected);
}
}

auto lc = cudf::strings::split_record(cudf::strings_column_view(input->view()), separator);
auto lv = cudf::lists_column_view(lc->view());
auto sv = cudf::strings_column_view(lv.child());
EXPECT_EQ(sv.size(), view.size() * multiplier);
EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64});
}

0 comments on commit 3e9cff2

Please sign in to comment.