diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp index ae6327f4ce0..e0b801ea0a7 100644 --- a/cpp/benchmarks/string/slice.cpp +++ b/cpp/benchmarks/string/slice.cpp @@ -43,7 +43,7 @@ static void BM_slice(benchmark::State& state, slice_type rt) cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); cudf::strings_column_view input(column->view()); - auto starts_itr = thrust::constant_iterator(1); + auto starts_itr = thrust::constant_iterator(max_str_length / 3); auto stops_itr = thrust::constant_iterator(max_str_length / 2); cudf::test::fixed_width_column_wrapper starts(starts_itr, starts_itr + n_rows); cudf::test::fixed_width_column_wrapper stops(stops_itr, stops_itr + n_rows); @@ -53,7 +53,9 @@ static void BM_slice(benchmark::State& state, slice_type rt) for (auto _ : state) { cuda_event_timer raii(state, true, cudf::get_default_stream()); switch (rt) { - case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break; + case position: + cudf::strings::slice_strings(input, max_str_length / 3, max_str_length / 2); + break; case multi_position: cudf::strings::slice_strings(input, starts, stops); break; case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break; case multi_delimiter: @@ -72,7 +74,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) int const row_mult = 8; int const min_rowlen = 1 << 5; int const max_rowlen = 1 << 13; - int const len_mult = 4; + int const len_mult = 2; generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); } diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index 29062167f11..97d14a1e24b 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -137,7 +137,7 @@ __device__ inline string_view::const_iterator string_view::const_iterator::opera } __device__ inline string_view::const_iterator string_view::const_iterator::operator+( - string_view::const_iterator::difference_type offset) + string_view::const_iterator::difference_type offset) const { const_iterator tmp(*this); size_type adjust = abs(offset); @@ -181,7 +181,7 @@ __device__ inline string_view::const_iterator& string_view::const_iterator::oper } __device__ inline string_view::const_iterator string_view::const_iterator::operator-( - string_view::const_iterator::difference_type offset) + string_view::const_iterator::difference_type offset) const { const_iterator tmp(*this); size_type adjust = abs(offset); @@ -396,12 +396,12 @@ __device__ inline size_type string_view::rfind(char_utf8 chr, size_type pos, siz } // parameters are character position values -__device__ inline string_view string_view::substr(size_type pos, size_type length) const +__device__ inline string_view string_view::substr(size_type pos, size_type count) const { - size_type spos = byte_offset(pos); - size_type epos = byte_offset(pos + length); - if (epos > size_bytes()) epos = size_bytes(); - if (spos >= epos) return string_view("", 0); + if (pos < 0 || pos >= length()) { return string_view{}; } + auto const itr = begin() + pos; + auto const spos = itr.byte_offset(); + auto const epos = count >= 0 ? (itr + count).byte_offset() : size_bytes(); return string_view(data() + spos, epos - spos); } diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp index 28f9d57e9bd..680b7c395ca 100644 --- a/cpp/include/cudf/strings/string_view.hpp +++ b/cpp/include/cudf/strings/string_view.hpp @@ -88,11 +88,11 @@ class string_view { __device__ inline const_iterator& operator++(); __device__ inline const_iterator operator++(int); __device__ inline const_iterator& operator+=(difference_type); - __device__ inline const_iterator operator+(difference_type); + __device__ inline const_iterator operator+(difference_type) const; __device__ inline const_iterator& operator--(); __device__ inline const_iterator operator--(int); __device__ inline const_iterator& operator-=(difference_type); - __device__ inline const_iterator operator-(difference_type); + __device__ inline const_iterator operator-(difference_type) const; __device__ inline bool operator==(const const_iterator&) const; __device__ inline bool operator!=(const const_iterator&) const; __device__ inline bool operator<(const const_iterator&) const; diff --git a/cpp/src/strings/slice.cu b/cpp/src/strings/slice.cu index f63a15a5b96..fbe54c01bba 100644 --- a/cpp/src/strings/slice.cu +++ b/cpp/src/strings/slice.cu @@ -30,12 +30,44 @@ #include #include +#include #include +#include namespace cudf { namespace strings { namespace detail { namespace { +/** + * @brief Function logic for compute_substrings_from_fn API + * + * This computes the output size and resolves the substring + */ +template +struct substring_from_fn { + column_device_view const d_column; + IndexIterator const starts; + IndexIterator const stops; + + __device__ string_view operator()(size_type idx) const + { + if (d_column.is_null(idx)) { return string_view{nullptr, 0}; } + auto const d_str = d_column.template element(idx); + auto const length = d_str.length(); + auto const start = std::max(starts[idx], 0); + if (start >= length) { return string_view{}; } + + auto const stop = stops[idx]; + auto const end = (((stop < 0) || (stop > length)) ? length : stop); + return d_str.substr(start, end - start); + } + + substring_from_fn(column_device_view const& d_column, IndexIterator starts, IndexIterator stops) + : d_column(d_column), starts(starts), stops(stops) + { + } +}; + /** * @brief Function logic for the substring API. * @@ -92,14 +124,50 @@ struct substring_fn { char* d_buffer = d_chars ? d_chars + d_offsets[idx] : nullptr; auto itr = begin; while (step > 0 ? itr < end : end < itr) { - bytes += bytes_in_char_utf8(*itr); - if (d_buffer) d_buffer += from_char_utf8(*itr, d_buffer); + if (d_buffer) { + d_buffer += from_char_utf8(*itr, d_buffer); + } else { + bytes += bytes_in_char_utf8(*itr); + } itr += step; } if (!d_chars) d_offsets[idx] = bytes; } }; +/** + * @brief Common utility function for the slice_strings APIs + * + * It wraps calling the functors appropriately to build the output strings column. + * + * The input iterators may have unique position values per string in `d_column`. + * This can also be called with constant value iterators to handle special + * slice functions if possible. + * + * @tparam IndexIterator Iterator type for character position values + * + * @param d_column Input strings column to substring + * @param starts Start positions index iterator + * @param stops Stop positions index iterator + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +template +std::unique_ptr compute_substrings_from_fn(column_device_view const& d_column, + IndexIterator starts, + IndexIterator stops, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto results = rmm::device_uvector(d_column.size(), stream); + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(d_column.size()), + results.begin(), + substring_from_fn{d_column, starts, stops}); + return make_strings_column(results, string_view{nullptr, 0}, stream, mr); +} + } // namespace // @@ -112,116 +180,69 @@ std::unique_ptr slice_strings(strings_column_view const& strings, { if (strings.is_empty()) return make_empty_column(type_id::STRING); - if (step.is_valid(stream)) CUDF_EXPECTS(step.value(stream) != 0, "Step parameter must not be 0"); + auto const step_valid = step.is_valid(stream); + auto const step_value = step_valid ? step.value(stream) : 0; + if (step_valid) { CUDF_EXPECTS(step_value != 0, "Step parameter must not be 0"); } auto const d_column = column_device_view::create(strings.parent(), stream); - auto const d_start = get_scalar_device_view(const_cast&>(start)); - auto const d_stop = get_scalar_device_view(const_cast&>(stop)); - auto const d_step = get_scalar_device_view(const_cast&>(step)); - auto children = make_strings_children( + // optimization for (step==1 and start < stop) -- expect this to be most common + if (step_value == 1 and start.is_valid(stream) and stop.is_valid(stream)) { + auto const start_value = start.value(stream); + auto const stop_value = stop.value(stream); + // note that any negative values here must use the alternate function below + if ((start_value >= 0) && (start_value < stop_value)) { + // this is about 2x faster on long strings for this common case + return compute_substrings_from_fn(*d_column, + thrust::constant_iterator(start_value), + thrust::constant_iterator(stop_value), + stream, + mr); + } + } + + auto const d_start = get_scalar_device_view(const_cast&>(start)); + auto const d_stop = get_scalar_device_view(const_cast&>(stop)); + auto const d_step = get_scalar_device_view(const_cast&>(step)); + + auto [offsets, chars] = make_strings_children( substring_fn{*d_column, d_start, d_stop, d_step}, strings.size(), stream, mr); return make_strings_column(strings.size(), - std::move(children.first), - std::move(children.second), + std::move(offsets), + std::move(chars), strings.null_count(), cudf::detail::copy_bitmask(strings.parent(), stream, mr)); } -} // namespace detail - -// external API - std::unique_ptr slice_strings(strings_column_view const& strings, - numeric_scalar const& start, - numeric_scalar const& stop, - numeric_scalar const& step, + column_view const& starts_column, + column_view const& stops_column, + rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_FUNC_RANGE(); - return detail::slice_strings(strings, start, stop, step, cudf::get_default_stream(), mr); + size_type strings_count = strings.size(); + if (strings_count == 0) return make_empty_column(type_id::STRING); + CUDF_EXPECTS(starts_column.size() == strings_count, + "Parameter starts must have the same number of rows as strings."); + CUDF_EXPECTS(stops_column.size() == strings_count, + "Parameter stops must have the same number of rows as strings."); + CUDF_EXPECTS(starts_column.type() == stops_column.type(), + "Parameters starts and stops must be of the same type."); + CUDF_EXPECTS(starts_column.null_count() == 0, "Parameter starts must not contain nulls."); + CUDF_EXPECTS(stops_column.null_count() == 0, "Parameter stops must not contain nulls."); + CUDF_EXPECTS(starts_column.type().id() != data_type{type_id::BOOL8}.id(), + "Positions values must not be bool type."); + CUDF_EXPECTS(is_fixed_width(starts_column.type()), "Positions values must be fixed width type."); + + auto strings_column = column_device_view::create(strings.parent(), stream); + auto starts_iter = cudf::detail::indexalator_factory::make_input_iterator(starts_column); + auto stops_iter = cudf::detail::indexalator_factory::make_input_iterator(stops_column); + return compute_substrings_from_fn(*strings_column, starts_iter, stops_iter, stream, mr); } -namespace detail { namespace { -/** - * @brief Function logic for substring_from API. - * - * This both calculates the output size and executes the substring. - */ -struct substring_from_fn { - column_device_view const d_column; - cudf::detail::input_indexalator const starts; - cudf::detail::input_indexalator const stops; - int32_t* d_offsets{}; - char* d_chars{}; - - __device__ void operator()(size_type idx) - { - if (d_column.is_null(idx)) { - if (!d_chars) d_offsets[idx] = 0; - return; - } - auto const d_str = d_column.template element(idx); - auto const length = d_str.length(); - auto const start = std::max(starts[idx], 0); - if (start >= length) { - if (!d_chars) d_offsets[idx] = 0; - return; - } - auto const stop = stops[idx]; - auto const end = (((stop < 0) || (stop > length)) ? length : stop); - - auto const d_substr = d_str.substr(start, end - start); - if (d_chars) - memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes()); - else - d_offsets[idx] = d_substr.size_bytes(); - } -}; - -/** - * @brief Common utility function for the slice_strings APIs. - * - * It wraps calling the functors appropriately to build the output strings column. - * - * The input iterators may have unique position values per string in `d_column`. - * - * @param d_column Input strings column to substring. - * @param null_count Number of nulls for the output column. - * @param starts Start positions index iterator. - * @param stops Stop positions index iterator. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - */ -std::unique_ptr compute_substrings_from_fn(column_device_view const& d_column, - size_type null_count, - cudf::detail::input_indexalator starts, - cudf::detail::input_indexalator stops, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto strings_count = d_column.size(); - - // Copy the null mask - rmm::device_buffer null_mask = - !d_column.nullable() - ? rmm::device_buffer{0, stream, mr} - : rmm::device_buffer( - d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr); - - auto children = - make_strings_children(substring_from_fn{d_column, starts, stops}, strings_count, stream, mr); - - return make_strings_column(strings_count, - std::move(children.first), - std::move(children.second), - null_count, - std::move(null_mask)); -} - /** * @brief Compute slice indices for each string. * @@ -287,34 +308,6 @@ void compute_substring_indices(column_device_view const& d_column, } // namespace -// -std::unique_ptr slice_strings(strings_column_view const& strings, - column_view const& starts_column, - column_view const& stops_column, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - size_type strings_count = strings.size(); - if (strings_count == 0) return make_empty_column(type_id::STRING); - CUDF_EXPECTS(starts_column.size() == strings_count, - "Parameter starts must have the same number of rows as strings."); - CUDF_EXPECTS(stops_column.size() == strings_count, - "Parameter stops must have the same number of rows as strings."); - CUDF_EXPECTS(starts_column.type() == stops_column.type(), - "Parameters starts and stops must be of the same type."); - CUDF_EXPECTS(starts_column.null_count() == 0, "Parameter starts must not contain nulls."); - CUDF_EXPECTS(stops_column.null_count() == 0, "Parameter stops must not contain nulls."); - CUDF_EXPECTS(starts_column.type().id() != data_type{type_id::BOOL8}.id(), - "Positions values must not be bool type."); - CUDF_EXPECTS(is_fixed_width(starts_column.type()), "Positions values must be fixed width type."); - - auto strings_column = column_device_view::create(strings.parent(), stream); - auto starts_iter = cudf::detail::indexalator_factory::make_input_iterator(starts_column); - auto stops_iter = cudf::detail::indexalator_factory::make_input_iterator(stops_column); - return compute_substrings_from_fn( - *strings_column, strings.null_count(), starts_iter, stops_iter, stream, mr); -} - template std::unique_ptr slice_strings(strings_column_view const& strings, DelimiterItrT const delimiter_itr, @@ -354,8 +347,7 @@ std::unique_ptr slice_strings(strings_column_view const& strings, cudf::detail::indexalator_factory::make_input_iterator(start_chars_pos_vec->view()); auto stops_iter = cudf::detail::indexalator_factory::make_input_iterator(stop_chars_pos_vec->view()); - return compute_substrings_from_fn( - d_column, strings.null_count(), starts_iter, stops_iter, stream, mr); + return compute_substrings_from_fn(d_column, starts_iter, stops_iter, stream, mr); } std::unique_ptr slice_strings(strings_column_view const& strings, @@ -387,6 +379,16 @@ std::unique_ptr slice_strings(strings_column_view const& strings, // external API +std::unique_ptr slice_strings(strings_column_view const& strings, + numeric_scalar const& start, + numeric_scalar const& stop, + numeric_scalar const& step, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::slice_strings(strings, start, stop, step, cudf::get_default_stream(), mr); +} + std::unique_ptr slice_strings(strings_column_view const& strings, column_view const& starts_column, column_view const& stops_column,