From f6b8e2736c3b86d4d66093b13e4d629ef994c17a Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 11 Mar 2021 15:38:15 -0500 Subject: [PATCH 1/9] add benchmark for make_strings_column(pair) --- cpp/benchmarks/CMakeLists.txt | 1 + cpp/benchmarks/string/factory_benchmark.cu | 93 ++++++++++++++++++++++ cpp/tests/strings/factories_test.cu | 34 ++++++++ 3 files changed, 128 insertions(+) create mode 100644 cpp/benchmarks/string/factory_benchmark.cu diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index dfc340b1459..e63ea38a31b 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -185,6 +185,7 @@ ConfigureBench(STRINGS_BENCH string/convert_floats_benchmark.cpp string/copy_benchmark.cpp string/extract_benchmark.cpp + string/factory_benchmark.cu string/filter_benchmark.cpp string/find_benchmark.cpp string/replace_benchmark.cpp diff --git a/cpp/benchmarks/string/factory_benchmark.cu b/cpp/benchmarks/string/factory_benchmark.cu new file mode 100644 index 00000000000..bf3902153c3 --- /dev/null +++ b/cpp/benchmarks/string/factory_benchmark.cu @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#include + +#include "string_bench_args.hpp" + +namespace { +using string_pair = thrust::pair; +struct string_view_to_pair { + __device__ string_pair operator()(thrust::pair const& p) + { + return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, 0}; + } +}; +} // namespace + +class StringsFactory : public cudf::benchmark { +}; + +static void BM_factory(benchmark::State& state) +{ + cudf::size_type const n_rows{static_cast(state.range(0))}; + cudf::size_type const max_str_length{static_cast(state.range(1))}; + data_profile table_profile; + table_profile.set_distribution_params( + cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); + auto const table = + create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile); + auto d_column = cudf::column_device_view::create(table->view().column(0)); + rmm::device_vector pairs(d_column->size()); + thrust::transform(thrust::device, + d_column->pair_begin(), + d_column->pair_end(), + pairs.data(), + string_view_to_pair{}); + + for (auto _ : state) { + cuda_event_timer raii(state, true, 0); + cudf::make_strings_column(pairs); + } + + cudf::strings_column_view input(table->view().column(0)); + state.SetBytesProcessed(state.iterations() * input.chars_size()); +} + +static void generate_bench_args(benchmark::internal::Benchmark* b) +{ + int const min_rows = 1 << 12; + int const max_rows = 1 << 24; + int const row_mult = 8; + int const min_rowlen = 1 << 5; + int const max_rowlen = 1 << 13; + int const len_mult = 4; + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); +} + +#define STRINGS_BENCHMARK_DEFINE(name) \ + BENCHMARK_DEFINE_F(StringsFactory, name) \ + (::benchmark::State & st) { BM_factory(st); } \ + BENCHMARK_REGISTER_F(StringsFactory, name) \ + ->Apply(generate_bench_args) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +STRINGS_BENCHMARK_DEFINE(factory) diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index f904c404251..bd463a7ab0d 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -19,12 +19,18 @@ #include #include #include +#include #include #include #include #include #include +#include + +#include +#include + #include #include @@ -198,3 +204,31 @@ TEST_F(StringsFactoriesTest, CreateOffsets) } } } + +namespace { +using string_pair = thrust::pair; +struct string_view_to_pair { + __device__ string_pair operator()(thrust::pair const& p) + { + return (p.second) ? string_pair{p.first.data(), p.first.size_bytes()} : string_pair{nullptr, 0}; + } +}; +} // namespace + +TEST_F(StringsFactoriesTest, StringPairWithNullsAndEmpty) +{ + cudf::test::strings_column_wrapper data( + {"", "this", "is", "", "a", "", "column", "of", "strings", "", ""}, + {0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1}); + + auto d_column = cudf::column_device_view::create(data); + rmm::device_vector pairs(d_column->size()); + thrust::transform(thrust::device, + d_column->pair_begin(), + d_column->pair_end(), + pairs.data(), + string_view_to_pair{}); + + auto result = cudf::make_strings_column(pairs); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), data); +} From 1dde5cee52cc17844fbe574c54a7e5611266e36f Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 11 Mar 2021 15:38:49 -0500 Subject: [PATCH 2/9] Optimize cudf::make_strings_column for long strings --- .../detail/strings_column_factories.cuh | 94 +++++++++++++++---- 1 file changed, 77 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 8e843c555c5..8e5a09634d4 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -34,7 +35,27 @@ namespace cudf { namespace strings { namespace detail { -// Create a strings-type column from iterators of pointer/size pairs +/** + * @brief Average string byte-length threshold for deciding character-level + * vs. row-level parallel algorithm. + * + * This value was determined by running the factory_benchmark against different + * string lengths and observing the point where the performance is faster for + * long strings. + */ +constexpr size_type FACTORY_BYTES_PER_ROW_THRESHOLD = 64; + +/** + * @brief Create a strings-type column from iterators of pointer/size pairs + * + * @tparam IndexPairIterator iterator over type `pair` values + * + * @param begin First string row (inclusive) + * @param end Last string row (exclusive) + * @param stream CUDA stream used for device memory operations + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column + */ template std::unique_ptr make_strings_column(IndexPairIterator begin, IndexPairIterator end, @@ -51,7 +72,7 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, auto size_checker = [] __device__(string_index_pair const& item) { return (item.first != nullptr) ? item.second : 0; }; - size_t bytes = thrust::transform_reduce( + size_t const bytes = thrust::transform_reduce( rmm::exec_policy(stream), begin, end, size_checker, 0, thrust::plus()); CUDF_EXPECTS(bytes < static_cast(std::numeric_limits::max()), "total size of strings is too large for cudf column"); @@ -65,26 +86,50 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); // create null mask - auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; }; - auto new_nulls = cudf::detail::valid_if(begin, end, validator, stream, mr); - auto null_count = new_nulls.second; + auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; }; + auto new_nulls = cudf::detail::valid_if(begin, end, validator, stream, mr); + auto const null_count = new_nulls.second; auto null_mask = (null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr}; // build chars column auto chars_column = strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto d_chars = chars_column->mutable_view().template data(); - auto copy_chars = [d_chars] __device__(auto item) { - string_index_pair str = thrust::get<0>(item); - size_type offset = thrust::get<1>(item); - if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); - }; - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_zip_iterator( - thrust::make_tuple(begin, offsets_column->view().template begin())), - strings_count, - copy_chars); + auto d_chars = chars_column->mutable_view().template data(); + + // use a character-parallel kernel for long string lengths + auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); + if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) { + auto d_offsets = offsets_column->view().template data(); + // this algorithm is based on a similar one used in strings::detail::gather() + auto copy_chars = [begin, d_offsets, strings_count, d_chars] __device__(size_type out_idx) { + auto const next_row = + thrust::upper_bound(thrust::seq, d_offsets, d_offsets + strings_count + 1, out_idx); + auto const out_row = thrust::distance(d_offsets, next_row) - 1; + // get the corresponding pair + string_index_pair const in_row = begin[out_row]; + if (in_row.first && in_row.second) { + auto const offset = out_idx - d_offsets[out_row]; + d_chars[out_idx] = in_row.first[offset]; + } + }; + thrust::for_each_n(rmm::exec_policy(stream), + thrust::counting_iterator(0), + static_cast(bytes), + copy_chars); + } else { + // this approach is 2-3x faster for large number smaller string lengths + auto copy_chars = [d_chars] __device__(auto item) { + string_index_pair const str = thrust::get<0>(item); + size_type const offset = thrust::get<1>(item); + if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); + }; + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_zip_iterator(thrust::make_tuple( + begin, offsets_column->view().template begin())), + strings_count, + copy_chars); + } return make_strings_column(strings_count, std::move(offsets_column), @@ -95,7 +140,22 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, mr); } -// Create a strings-type column from iterators to chars, offsets, and bitmask. +/** + * @brief Create a strings-type column from iterators to chars, offsets, and bitmask. + * + * @tparam CharIterator iterator over character bytes (int8) + * @tparam OffsetIterator iterator over offset values (size_type) + * + * @param chars_begin First character byte (inclusive) + * @param chars_end Last character byte (exclusive) + * @param offset_begin First offset value (inclusive) + * @param offset_end Last offset value (exclusive) + * @param null_count Number of null rows + * @param null_mask The validity bitmask in Arrow format + * @param stream CUDA stream used for device memory operations + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column + */ template std::unique_ptr make_strings_column(CharIterator chars_begin, CharIterator chars_end, From 0507559dd21287d49f7a5cac0a7b6401f0b636e4 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 12 Mar 2021 20:00:19 -0500 Subject: [PATCH 3/9] move include of local file --- cpp/benchmarks/string/factory_benchmark.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/string/factory_benchmark.cu b/cpp/benchmarks/string/factory_benchmark.cu index bf3902153c3..6c5dceffaa8 100644 --- a/cpp/benchmarks/string/factory_benchmark.cu +++ b/cpp/benchmarks/string/factory_benchmark.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "string_bench_args.hpp" + #include #include #include @@ -30,8 +32,6 @@ #include -#include "string_bench_args.hpp" - namespace { using string_pair = thrust::pair; struct string_view_to_pair { From 226b347389ada806263b75f706a1901f54ca3ac3 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 12 Mar 2021 20:01:05 -0500 Subject: [PATCH 4/9] cudf/types.hpp include needed for cudf::size_type --- cpp/benchmarks/string/string_bench_args.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/benchmarks/string/string_bench_args.hpp b/cpp/benchmarks/string/string_bench_args.hpp index f81f859de74..9c709b064dd 100644 --- a/cpp/benchmarks/string/string_bench_args.hpp +++ b/cpp/benchmarks/string/string_bench_args.hpp @@ -17,6 +17,8 @@ #include +#include + /** * @brief Generate row count and row length argument ranges for a string benchmark. * From b9c3d5ac4cce777bb2e87f4efed804cd983e8f11 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 12 Mar 2021 20:01:29 -0500 Subject: [PATCH 5/9] remove unneeded is_signed_iterator --- cpp/include/cudf/detail/gather.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 87f5c9251c7..eee8c1941b8 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -83,8 +83,7 @@ struct gather_bitmask_functor { auto col = input.column(mask_idx); if (Op != gather_bitmask_op::DONT_CHECK) { - bool out_of_range = is_signed_iterator() ? (row_idx < 0 || row_idx >= col.size()) - : row_idx >= col.size(); + bool out_of_range = (row_idx < 0 || row_idx >= col.size()); if (out_of_range) { if (Op == gather_bitmask_op::PASSTHROUGH) { return bit_is_set(masks[mask_idx], bit_idx); From 97a63bc2f14b731c96bddfc388ecda456816ae75 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 12 Mar 2021 20:02:00 -0500 Subject: [PATCH 6/9] create common gather_chars for make_strings_column and strings::detail::gather --- cpp/include/cudf/strings/detail/gather.cuh | 87 +++++++++++++------ .../detail/strings_column_factories.cuh | 76 ++++++++-------- 2 files changed, 97 insertions(+), 66 deletions(-) diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 28da8ef4324..988fa552100 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -31,15 +31,60 @@ #include namespace cudf { +namespace strings { +namespace detail { -template -constexpr inline bool is_signed_iterator() +/** + * @brief Returns a new chars column using the specified indices to select + * strings from the input iterator. + * + * This uses a character-parallel gather CUDA kernel that performs very + * well on a strings column with long strings (e.g. average > 64 bytes). + * + * @tparam StringIterator Iterator should produce `string_view` objects. + * @tparam MapIterator Iterator for retrieving integer indices of the `StringIterator`. + * + * @param strings_begin Start of the iterator to retrieve `string_view` instances + * @param map_begin Start of index iterator. + * @param map_end End of index iterator. + * @param offsets The offset values to be associated with the output chars column. + * @param chars_bytes The total number of bytes for the output chars column. + * @param mr Device memory resource used to allocate the returned column's device memory. + * @param stream CUDA stream used for device memory operations and kernel launches. + * @return New chars column fit for a strings column. + */ +template +std::unique_ptr gather_chars(StringIterator strings_begin, + MapIterator map_begin, + MapIterator map_end, + cudf::device_span const offsets, + size_type chars_bytes, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - return std::is_signed::value_type>::value; -} + auto const output_count = std::distance(map_begin, map_end); + if (output_count == 0) return make_empty_column(data_type{type_id::INT8}); -namespace strings { -namespace detail { + auto chars_column = create_chars_child_column(output_count, 0, chars_bytes, stream, mr); + auto const d_chars = chars_column->mutable_view().template data(); + + auto gather_chars_fn = [strings_begin, map_begin, offsets] __device__(size_type out_idx) -> char { + auto const out_row = + thrust::prev(thrust::upper_bound(thrust::seq, offsets.begin(), offsets.end(), out_idx)); + auto const row_idx = map_begin[thrust::distance(offsets.begin(), out_row)]; // get row index + auto const d_str = strings_begin[row_idx]; // get row's string + auto const offset = out_idx - *out_row; // get string's char + return d_str.data()[offset]; + }; + + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(chars_bytes), + d_chars, + gather_chars_fn); + + return chars_column; +} /** * @brief Returns a new strings column using the specified indices to select @@ -107,29 +152,15 @@ std::unique_ptr gather( rmm::exec_policy(stream), d_out_offsets, d_out_offsets + output_count + 1, d_out_offsets); // build chars column - size_type const out_chars_bytes = static_cast(total_bytes); - auto out_chars_column = create_chars_child_column(output_count, 0, out_chars_bytes, stream, mr); - auto const d_out_chars = out_chars_column->mutable_view().template data(); - - // fill in chars cudf::device_span const d_out_offsets_span(d_out_offsets, output_count + 1); - auto const d_in_chars = (strings_count > 0) ? strings.chars().data() : nullptr; - auto gather_chars_fn = - [d_out_offsets_span, begin, d_in_offsets, d_in_chars] __device__(size_type out_char_idx) { - // find output row index for this output char index - auto const next_row_ptr = thrust::upper_bound( - thrust::seq, d_out_offsets_span.begin(), d_out_offsets_span.end(), out_char_idx); - auto const out_row_idx = thrust::distance(d_out_offsets_span.begin(), next_row_ptr) - 1; - auto const str_char_offset = out_char_idx - d_out_offsets_span[out_row_idx]; - auto const in_row_idx = begin[out_row_idx]; - auto const in_char_offset = d_in_offsets[in_row_idx] + str_char_offset; - return d_in_chars[in_char_offset]; - }; - thrust::transform(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(out_chars_bytes), - d_out_chars, - gather_chars_fn); + auto const d_strings = column_device_view::create(strings.parent(), stream); + auto out_chars_column = gather_chars(d_strings->begin(), + begin, + end, + d_out_offsets_span, + static_cast(total_bytes), + stream, + mr); return make_strings_column(output_count, std::move(out_offsets_column), diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 8e5a09634d4..07ece7ee3c7 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,7 @@ #include #include -#include +#include #include #include @@ -93,43 +94,42 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, (null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr}; // build chars column - auto chars_column = - strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); - auto d_chars = chars_column->mutable_view().template data(); - - // use a character-parallel kernel for long string lengths - auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); - if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) { - auto d_offsets = offsets_column->view().template data(); - // this algorithm is based on a similar one used in strings::detail::gather() - auto copy_chars = [begin, d_offsets, strings_count, d_chars] __device__(size_type out_idx) { - auto const next_row = - thrust::upper_bound(thrust::seq, d_offsets, d_offsets + strings_count + 1, out_idx); - auto const out_row = thrust::distance(d_offsets, next_row) - 1; - // get the corresponding pair - string_index_pair const in_row = begin[out_row]; - if (in_row.first && in_row.second) { - auto const offset = out_idx - d_offsets[out_row]; - d_chars[out_idx] = in_row.first[offset]; - } - }; - thrust::for_each_n(rmm::exec_policy(stream), - thrust::counting_iterator(0), - static_cast(bytes), - copy_chars); - } else { - // this approach is 2-3x faster for large number smaller string lengths - auto copy_chars = [d_chars] __device__(auto item) { - string_index_pair const str = thrust::get<0>(item); - size_type const offset = thrust::get<1>(item); - if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); - }; - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_zip_iterator(thrust::make_tuple( - begin, offsets_column->view().template begin())), - strings_count, - copy_chars); - } + std::unique_ptr chars_column = [&] { + // use a character-parallel kernel for long string lengths + auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); + if (avg_bytes_per_row > 0) { // FACTORY_BYTES_PER_ROW_THRESHOLD + auto const d_offsets = + device_span{offsets_column->view().template data(), + static_cast(offsets_column->size())}; + auto const str_begin = thrust::make_transform_iterator(begin, [] __device__(auto ip) { + return string_view{ip.first, ip.second}; + }); + + return gather_chars(str_begin, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings_count), + d_offsets, + static_cast(bytes), + stream, + mr); + } else { + // this approach is 2-3x faster for large number smaller string lengths + auto chars_column = + strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); + auto d_chars = chars_column->mutable_view().template data(); + auto copy_chars = [d_chars] __device__(auto item) { + string_index_pair const str = thrust::get<0>(item); + size_type const offset = thrust::get<1>(item); + if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); + }; + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_zip_iterator(thrust::make_tuple( + begin, offsets_column->view().template begin())), + strings_count, + copy_chars); + return chars_column; + } + }(); return make_strings_column(strings_count, std::move(offsets_column), From b8dddceae239be7f8de0fccc85d379a9d69a9081 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 12 Mar 2021 20:07:00 -0500 Subject: [PATCH 7/9] reinstate commented out threshold --- cpp/include/cudf/strings/detail/strings_column_factories.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 07ece7ee3c7..6a89f3a7d2d 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -97,7 +97,7 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, std::unique_ptr chars_column = [&] { // use a character-parallel kernel for long string lengths auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); - if (avg_bytes_per_row > 0) { // FACTORY_BYTES_PER_ROW_THRESHOLD + if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) { auto const d_offsets = device_span{offsets_column->view().template data(), static_cast(offsets_column->size())}; From 3ea82a40b7b80a3acee53f92df01d85020180df6 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Tue, 16 Mar 2021 12:33:34 -0400 Subject: [PATCH 8/9] change comment wording --- cpp/include/cudf/strings/detail/strings_column_factories.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 6a89f3a7d2d..932f7eb0926 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -113,7 +113,7 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, stream, mr); } else { - // this approach is 2-3x faster for large number smaller string lengths + // this approach is 2-3x faster for a large number of smaller string lengths auto chars_column = strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr); auto d_chars = chars_column->mutable_view().template data(); From 6163360876d8e2878b261127450fb6e2b9522cf5 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Wed, 17 Mar 2021 17:19:10 -0400 Subject: [PATCH 9/9] resurrect is_unsigned_iterator --- cpp/include/cudf/detail/gather.cuh | 3 ++- cpp/include/cudf/utilities/traits.hpp | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index eee8c1941b8..87f5c9251c7 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -83,7 +83,8 @@ struct gather_bitmask_functor { auto col = input.column(mask_idx); if (Op != gather_bitmask_op::DONT_CHECK) { - bool out_of_range = (row_idx < 0 || row_idx >= col.size()); + bool out_of_range = is_signed_iterator() ? (row_idx < 0 || row_idx >= col.size()) + : row_idx >= col.size(); if (out_of_range) { if (Op == gather_bitmask_op::PASSTHROUGH) { return bit_is_set(masks[mask_idx], bit_idx); diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index e045476ea77..1e0d45d081d 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -224,6 +224,18 @@ constexpr inline bool is_unsigned(data_type type) return cudf::type_dispatcher(type, is_unsigned_impl{}); } +/** + * @brief Indicates whether the `Iterator` value type is unsigned. + * + * @tparam Iterator The type to verify + * @return true if the iterator's value type is unsigned + */ +template +constexpr inline bool is_signed_iterator() +{ + return std::is_signed::value_type>::value; +} + /** * @brief Indicates whether the type `T` is a floating point type. *