Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of slice_strings for long strings #13057

Merged
merged 17 commits into from
Apr 18, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/benchmarks/string/slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ static void BM_slice(benchmark::State& state, slice_type rt)
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
cudf::strings_column_view input(column->view());
auto starts_itr = thrust::constant_iterator<cudf::size_type>(1);
auto starts_itr = thrust::constant_iterator<cudf::size_type>(max_str_length / 3);
auto stops_itr = thrust::constant_iterator<cudf::size_type>(max_str_length / 2);
cudf::test::fixed_width_column_wrapper<int32_t> starts(starts_itr, starts_itr + n_rows);
cudf::test::fixed_width_column_wrapper<int32_t> stops(stops_itr, stops_itr + n_rows);
Expand All @@ -53,7 +53,9 @@ static void BM_slice(benchmark::State& state, slice_type rt)
for (auto _ : state) {
cuda_event_timer raii(state, true, cudf::get_default_stream());
switch (rt) {
case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break;
case position:
cudf::strings::slice_strings(input, max_str_length / 3, max_str_length / 2);
break;
case multi_position: cudf::strings::slice_strings(input, starts, stops); break;
case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break;
case multi_delimiter:
Expand All @@ -72,7 +74,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b)
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
int const len_mult = 2;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

Expand Down
16 changes: 8 additions & 8 deletions cpp/include/cudf/strings/string_view.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -137,7 +137,7 @@ __device__ inline string_view::const_iterator string_view::const_iterator::opera
}

__device__ inline string_view::const_iterator string_view::const_iterator::operator+(
string_view::const_iterator::difference_type offset)
string_view::const_iterator::difference_type offset) const
{
const_iterator tmp(*this);
size_type adjust = abs(offset);
Expand Down Expand Up @@ -181,7 +181,7 @@ __device__ inline string_view::const_iterator& string_view::const_iterator::oper
}

__device__ inline string_view::const_iterator string_view::const_iterator::operator-(
string_view::const_iterator::difference_type offset)
string_view::const_iterator::difference_type offset) const
{
const_iterator tmp(*this);
size_type adjust = abs(offset);
Expand Down Expand Up @@ -396,12 +396,12 @@ __device__ inline size_type string_view::rfind(char_utf8 chr, size_type pos, siz
}

// parameters are character position values
__device__ inline string_view string_view::substr(size_type pos, size_type length) const
__device__ inline string_view string_view::substr(size_type pos, size_type count) const
{
size_type spos = byte_offset(pos);
size_type epos = byte_offset(pos + length);
if (epos > size_bytes()) epos = size_bytes();
if (spos >= epos) return string_view("", 0);
if (pos < 0 || pos >= length()) { return string_view{}; }
auto const itr = begin() + pos;
auto const spos = itr.byte_offset();
auto const epos = count >= 0 ? (itr + count).byte_offset() : size_bytes();
return string_view(data() + spos, epos - spos);
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ class string_view {
__device__ inline const_iterator& operator++();
__device__ inline const_iterator operator++(int);
__device__ inline const_iterator& operator+=(difference_type);
__device__ inline const_iterator operator+(difference_type);
__device__ inline const_iterator operator+(difference_type) const;
__device__ inline const_iterator& operator--();
__device__ inline const_iterator operator--(int);
__device__ inline const_iterator& operator-=(difference_type);
__device__ inline const_iterator operator-(difference_type);
__device__ inline const_iterator operator-(difference_type) const;
__device__ inline bool operator==(const const_iterator&) const;
__device__ inline bool operator!=(const const_iterator&) const;
__device__ inline bool operator<(const const_iterator&) const;
Expand Down
Loading