From bcb3de6861f709e23bbee1eac81d78e8cf692add Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 29 Apr 2021 13:19:13 -0400 Subject: [PATCH 01/12] fix compile warnings --- cpp/benchmarks/string/filter_benchmark.cpp | 13 +++---------- cpp/benchmarks/string/find_benchmark.cpp | 14 +++----------- cpp/benchmarks/string/split_benchmark.cpp | 14 +++----------- cpp/benchmarks/string/string_bench_args.hpp | 2 +- cpp/benchmarks/text/normalize_benchmark.cpp | 2 +- 5 files changed, 11 insertions(+), 34 deletions(-) diff --git a/cpp/benchmarks/string/filter_benchmark.cpp b/cpp/benchmarks/string/filter_benchmark.cpp index d510ca9baed..0ae06d43fc1 100644 --- a/cpp/benchmarks/string/filter_benchmark.cpp +++ b/cpp/benchmarks/string/filter_benchmark.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "string_bench_args.hpp" + #include #include #include @@ -26,7 +28,6 @@ #include #include -#include #include enum FilterAPI { filter, filter_chars, strip }; @@ -69,15 +70,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) int const min_rowlen = 1 << 5; int const max_rowlen = 1 << 13; int const len_mult = 4; - for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { - for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { - // avoid generating combinations that exceed the cudf column limit - size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < std::numeric_limits::max()) { - b->Args({row_count, rowlen}); - } - } - } + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); } #define STRINGS_BENCHMARK_DEFINE(name) \ diff --git a/cpp/benchmarks/string/find_benchmark.cpp b/cpp/benchmarks/string/find_benchmark.cpp index fd7c515eb0b..49b89ce873f 100644 --- a/cpp/benchmarks/string/find_benchmark.cpp +++ b/cpp/benchmarks/string/find_benchmark.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "string_bench_args.hpp" + #include #include #include @@ -25,8 +27,6 @@ #include #include -#include - enum FindAPI { find, find_multi, contains, starts_with, ends_with }; class StringFindScalar : public cudf::benchmark { @@ -69,15 +69,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) int const min_rowlen = 1 << 5; int const max_rowlen = 1 << 13; int const len_mult = 4; - for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { - for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { - // avoid generating combinations that exceed the cudf column limit - size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < std::numeric_limits::max()) { - b->Args({row_count, rowlen}); - } - } - } + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); } #define STRINGS_BENCHMARK_DEFINE(name) \ diff --git a/cpp/benchmarks/string/split_benchmark.cpp b/cpp/benchmarks/string/split_benchmark.cpp index 0494fba7642..971821c0a62 100644 --- a/cpp/benchmarks/string/split_benchmark.cpp +++ b/cpp/benchmarks/string/split_benchmark.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "string_bench_args.hpp" + #include #include #include @@ -24,8 +26,6 @@ #include #include -#include - class StringSplit : public cudf::benchmark { }; @@ -64,15 +64,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) int const min_rowlen = 1 << 5; int const max_rowlen = 1 << 13; int const len_mult = 4; - for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { - for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { - // avoid generating combinations that exceed the cudf column limit - size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < std::numeric_limits::max()) { - b->Args({row_count, rowlen}); - } - } - } + generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); } #define STRINGS_BENCHMARK_DEFINE(name) \ diff --git a/cpp/benchmarks/string/string_bench_args.hpp b/cpp/benchmarks/string/string_bench_args.hpp index 05ed1bf5b33..92a46374438 100644 --- a/cpp/benchmarks/string/string_bench_args.hpp +++ b/cpp/benchmarks/string/string_bench_args.hpp @@ -48,7 +48,7 @@ inline void generate_string_bench_args(benchmark::internal::Benchmark* b, for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= rowlen_mult) { // avoid generating combinations that exceed the cudf column limit size_t total_chars = static_cast(row_count) * rowlen; - if (total_chars < std::numeric_limits::max()) { + if (total_chars < static_cast(std::numeric_limits::max())) { b->Args({row_count, rowlen}); } } diff --git a/cpp/benchmarks/text/normalize_benchmark.cpp b/cpp/benchmarks/text/normalize_benchmark.cpp index bb872fee0b3..f041547d021 100644 --- a/cpp/benchmarks/text/normalize_benchmark.cpp +++ b/cpp/benchmarks/text/normalize_benchmark.cpp @@ -60,7 +60,7 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { // avoid generating combinations that exceed the cudf column limit size_t total_chars = static_cast(row_count) * rowlen * 4; - if (total_chars < std::numeric_limits::max()) { + if (total_chars < static_cast(std::numeric_limits::max())) { b->Args({row_count, rowlen}); } } From bb3e4270a6cfb5b0109e79f1c50a31b5989c98f6 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 29 Apr 2021 13:19:33 -0400 Subject: [PATCH 02/12] add thrust::lower_bound workarounds --- cpp/src/dictionary/detail/concatenate.cu | 19 +++++++++++-- cpp/src/dictionary/set_keys.cu | 35 +++++++++++++++--------- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index cdf086e3f4a..3649d5e4a7e 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -153,19 +153,34 @@ struct dispatch_compute_indices { })); auto new_keys_view = column_device_view::create(new_keys, stream); + + auto begin = new_keys_view->begin(); + auto end = new_keys_view->end(); + // create the indices output column auto result = make_numeric_column( all_indices.type(), all_indices.size(), mask_state::UNALLOCATED, stream, mr); auto result_itr = cudf::detail::indexalator_factory::make_output_iterator(result->mutable_view()); // new indices values are computed by matching the concatenated keys to the new key set +#ifdef NDEBUG // something wrong with this in Debug build thrust::lower_bound(rmm::exec_policy(stream), - new_keys_view->begin(), - new_keys_view->end(), + begin, + end, all_itr, all_itr + all_indices.size(), result_itr, thrust::less()); +#else + thrust::transform(rmm::exec_policy(stream), + all_itr, + all_itr + all_indices.size(), + result_itr, + [begin, end] __device__(auto key) { + auto itr = thrust::lower_bound(thrust::seq, begin, end, key); + return (size_type)thrust::distance(begin, itr); + }); +#endif return result; } diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index f3f1ffcfdab..791f6bc904a 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,7 @@ namespace cudf { namespace dictionary { namespace detail { namespace { + /** * @brief Type-dispatch functor for remapping the old indices to new values based on the new * key-set. @@ -55,16 +57,11 @@ struct dispatch_compute_indices { rmm::mr::device_memory_resource* mr) { auto dictionary_view = column_device_view::create(input.parent(), stream); - auto d_dictionary = *dictionary_view; - auto keys_view = column_device_view::create(input.keys(), stream); - auto dictionary_itr = thrust::make_permutation_iterator( - keys_view->begin(), - thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [d_dictionary] __device__(size_type idx) { - if (d_dictionary.is_null(idx)) return 0; - return static_cast(d_dictionary.element(idx)); - })); - auto new_keys_view = column_device_view::create(new_keys, stream); + auto dictionary_itr = make_dictionary_iterator(*dictionary_view); + auto new_keys_view = column_device_view::create(new_keys, stream); + + auto begin = new_keys_view->begin(); + auto end = new_keys_view->end(); // create output indices column auto result = make_numeric_column(get_indices_type_for_size(new_keys.size()), @@ -74,14 +71,26 @@ struct dispatch_compute_indices { mr); auto result_itr = cudf::detail::indexalator_factory::make_output_iterator(result->mutable_view()); +#ifdef NDEBUG // something wrong with this in Debug build thrust::lower_bound(rmm::exec_policy(stream), - new_keys_view->begin(), - new_keys_view->end(), + begin, + end, dictionary_itr, dictionary_itr + input.size(), result_itr, thrust::less()); +#else + thrust::transform(rmm::exec_policy(stream), + dictionary_itr, + dictionary_itr + input.size(), + result_itr, + [begin, end] __device__(auto key) { + auto itr = thrust::lower_bound(thrust::seq, begin, end, key); + return (size_type)thrust::distance(begin, itr); + }); +#endif result->set_null_count(0); + return result; } From 68cf55a8324e29a8f4041d758cce88a19615cb9b Mon Sep 17 00:00:00 2001 From: davidwendt Date: Tue, 4 May 2021 13:44:15 -0400 Subject: [PATCH 03/12] fix fixed-point result-type --- cpp/src/groupby/sort/group_single_pass_reduction_util.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index e5e93bbef47..67062658c39 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -65,7 +65,7 @@ struct reduce_functor { using OpType = cudf::detail::corresponding_operator_t; using ResultType = cudf::detail::target_type_t; - auto result_type = is_fixed_point() + auto result_type = is_fixed_point() ? data_type{type_to_id(), values.type().scale()} : data_type{type_to_id()}; From b777b0adb77a011e2ad4405c166b9326a3584204 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Tue, 4 May 2021 13:44:37 -0400 Subject: [PATCH 04/12] fix fixed-point hashing --- .../cudf/detail/utilities/hash_functions.cuh | 14 ++++++++++ cpp/include/cudf/table/row_operators.cuh | 27 ++++++++++--------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/hash_functions.cuh b/cpp/include/cudf/detail/utilities/hash_functions.cuh index 7f3c05134e2..888a892d003 100644 --- a/cpp/include/cudf/detail/utilities/hash_functions.cuh +++ b/cpp/include/cudf/detail/utilities/hash_functions.cuh @@ -549,6 +549,20 @@ hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(double c return this->compute_floating_point(key); } +template <> +hash_value_type CUDA_DEVICE_CALLABLE +MurmurHash3_32::operator()(numeric::decimal32 const& key) const +{ + return this->compute(key.value()); +} + +template <> +hash_value_type CUDA_DEVICE_CALLABLE +MurmurHash3_32::operator()(numeric::decimal64 const& key) const +{ + return this->compute(key.value()); +} + template <> hash_value_type CUDA_DEVICE_CALLABLE MurmurHash3_32::operator()(cudf::list_view const& key) const diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 61d714c5538..012a2814e16 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -475,17 +475,19 @@ class row_hasher { // Hash the first column w/ the seed auto const initial_hash = hash_combiner(hash_value_type{0}, - type_dispatcher(_table.column(0).type(), - element_hasher_with_seed{_seed}, - _table.column(0), - row_index)); + type_dispatcher( // + _table.column(0).type(), + element_hasher_with_seed{_seed}, + _table.column(0), + row_index)); // Hashes an element in a column auto hasher = [=](size_type column_index) { - return cudf::type_dispatcher(_table.column(column_index).type(), - element_hasher{}, - _table.column(column_index), - row_index); + return cudf::type_dispatcher( // + _table.column(column_index).type(), + element_hasher{}, + _table.column(column_index), + row_index); }; // Hash each element and combine all the hash values together @@ -528,10 +530,11 @@ class row_hasher_initial_values { // Hashes an element in a column and combines with an initial value auto hasher = [=](size_type column_index) { - auto hash_value = cudf::type_dispatcher(_table.column(column_index).type(), - element_hasher{}, - _table.column(column_index), - row_index); + auto hash_value = + cudf::type_dispatcher(_table.column(column_index).type(), // + element_hasher{}, + _table.column(column_index), + row_index); return hash_combiner(_initial_hash[column_index], hash_value); }; From a24cbb86d5cc297998672848a08914e697c2176f Mon Sep 17 00:00:00 2001 From: davidwendt Date: Tue, 4 May 2021 17:49:26 -0400 Subject: [PATCH 05/12] fix MD5 hash to use dispatch-storage type --- cpp/src/hash/hashing.cu | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu index 530849601de..fa25f940855 100644 --- a/cpp/src/hash/hashing.cu +++ b/cpp/src/hash/hashing.cu @@ -121,11 +121,12 @@ std::unique_ptr md5_hash(table_view const& input, MD5Hash hasher = MD5Hash{}; for (int col_index = 0; col_index < device_input.num_columns(); col_index++) { if (device_input.column(col_index).is_valid(row_index)) { - cudf::type_dispatcher(device_input.column(col_index).type(), - hasher, - device_input.column(col_index), - row_index, - &hash_state); + cudf::type_dispatcher( + device_input.column(col_index).type(), + hasher, + device_input.column(col_index), + row_index, + &hash_state); } } hasher.finalize(&hash_state, d_chars + (row_index * 32)); From cd5392d8b6049a5bca71adcf5ea1f56c8da3cb9f Mon Sep 17 00:00:00 2001 From: davidwendt Date: Tue, 25 May 2021 08:44:34 -0400 Subject: [PATCH 06/12] fix is_packable in concurrent_unordered_map --- cpp/src/hash/concurrent_unordered_map.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh index 2dfd15925d2..ebfc35aba84 100644 --- a/cpp/src/hash/concurrent_unordered_map.cuh +++ b/cpp/src/hash/concurrent_unordered_map.cuh @@ -73,7 +73,7 @@ template ::value and std::is_integral::value and - not std::is_void>::value; + not std::is_void>::value and sizeof(key_type) == sizeof(value_type); } /** From d5573b7467f58425c8c4ea982773fd2b71ebf14c Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 27 May 2021 16:50:43 -0400 Subject: [PATCH 07/12] fixed invalid access of padding bits --- cpp/src/hash/concurrent_unordered_map.cuh | 3 ++- cpp/tests/bitmask/bitmask_tests.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh index ebfc35aba84..b27076b7c87 100644 --- a/cpp/src/hash/concurrent_unordered_map.cuh +++ b/cpp/src/hash/concurrent_unordered_map.cuh @@ -73,7 +73,8 @@ template ::value and std::is_integral::value and - not std::is_void>::value and sizeof(key_type) == sizeof(value_type); + not std::is_void>::value and + std::has_unique_object_representations_v; } /** diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index 3fb12efcc93..2743607ccb4 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -384,8 +384,12 @@ void cleanEndWord(rmm::device_buffer &mask, int begin_bit, int end_bit) auto number_of_mask_words = cudf::num_bitmask_words(static_cast(end_bit - begin_bit)); auto number_of_bits = end_bit - begin_bit; if (number_of_bits % 32 != 0) { - auto end_mask = ptr[number_of_mask_words - 1]; - end_mask = end_mask & ((1 << (number_of_bits % 32)) - 1); + cudf::bitmask_type end_mask{}; + CUDA_TRY(cudaMemcpy( + &end_mask, ptr + number_of_mask_words - 1, sizeof(end_mask), cudaMemcpyDeviceToHost)); + end_mask = end_mask & ((1 << (number_of_bits % 32)) - 1); + CUDA_TRY(cudaMemcpy( + ptr + number_of_mask_words - 1, &end_mask, sizeof(end_mask), cudaMemcpyHostToDevice)); } } From 86b80d59a4b936cb3c1037d2a02e4fcd20c5e528 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Wed, 2 Jun 2021 08:50:13 -0400 Subject: [PATCH 08/12] fix merge conflict error --- cpp/tests/bitmask/bitmask_tests.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index e6d31252cfc..aca93c17b9f 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -384,11 +384,7 @@ void cleanEndWord(rmm::device_buffer &mask, int begin_bit, int end_bit) auto number_of_mask_words = cudf::num_bitmask_words(static_cast(end_bit - begin_bit)); auto number_of_bits = end_bit - begin_bit; if (number_of_bits % 32 != 0) { -<<<<<<< HEAD - cudf::bitmask_type end_mask{}; -======= cudf::bitmask_type end_mask = 0; ->>>>>>> branch-21.08 CUDA_TRY(cudaMemcpy( &end_mask, ptr + number_of_mask_words - 1, sizeof(end_mask), cudaMemcpyDeviceToHost)); end_mask = end_mask & ((1 << (number_of_bits % 32)) - 1); From ef251e8164321c175f090f046205a0c3a3caece0 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 3 Jun 2021 11:14:45 -0400 Subject: [PATCH 09/12] undo out-of-scope benchmark changes --- cpp/benchmarks/string/filter_benchmark.cpp | 13 ++++++++++--- cpp/benchmarks/string/find_benchmark.cpp | 14 +++++++++++--- cpp/benchmarks/string/split_benchmark.cpp | 14 +++++++++++--- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/cpp/benchmarks/string/filter_benchmark.cpp b/cpp/benchmarks/string/filter_benchmark.cpp index 0ae06d43fc1..97228122c42 100644 --- a/cpp/benchmarks/string/filter_benchmark.cpp +++ b/cpp/benchmarks/string/filter_benchmark.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include #include #include @@ -28,6 +26,7 @@ #include #include +#include #include enum FilterAPI { filter, filter_chars, strip }; @@ -70,7 +69,15 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) int const min_rowlen = 1 << 5; int const max_rowlen = 1 << 13; int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { + for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { + // avoid generating combinations that exceed the cudf column limit + size_t total_chars = static_cast(row_count) * rowlen; + if (total_chars < static_cast(std::numeric_limits::max())) { + b->Args({row_count, rowlen}); + } + } + } } #define STRINGS_BENCHMARK_DEFINE(name) \ diff --git a/cpp/benchmarks/string/find_benchmark.cpp b/cpp/benchmarks/string/find_benchmark.cpp index 49b89ce873f..8e570a55440 100644 --- a/cpp/benchmarks/string/find_benchmark.cpp +++ b/cpp/benchmarks/string/find_benchmark.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include #include #include @@ -27,6 +25,8 @@ #include #include +#include + enum FindAPI { find, find_multi, contains, starts_with, ends_with }; class StringFindScalar : public cudf::benchmark { @@ -69,7 +69,15 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) int const min_rowlen = 1 << 5; int const max_rowlen = 1 << 13; int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { + for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { + // avoid generating combinations that exceed the cudf column limit + size_t total_chars = static_cast(row_count) * rowlen; + if (total_chars < static_cast(std::numeric_limits::max())) { + b->Args({row_count, rowlen}); + } + } + } } #define STRINGS_BENCHMARK_DEFINE(name) \ diff --git a/cpp/benchmarks/string/split_benchmark.cpp b/cpp/benchmarks/string/split_benchmark.cpp index 971821c0a62..cab477754a6 100644 --- a/cpp/benchmarks/string/split_benchmark.cpp +++ b/cpp/benchmarks/string/split_benchmark.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include #include #include @@ -26,6 +24,8 @@ #include #include +#include + class StringSplit : public cudf::benchmark { }; @@ -64,7 +64,15 @@ static void generate_bench_args(benchmark::internal::Benchmark* b) int const min_rowlen = 1 << 5; int const max_rowlen = 1 << 13; int const len_mult = 4; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); + for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) { + for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) { + // avoid generating combinations that exceed the cudf column limit + size_t total_chars = static_cast(row_count) * rowlen; + if (total_chars < static_cast(std::numeric_limits::max())) { + b->Args({row_count, rowlen}); + } + } + } } #define STRINGS_BENCHMARK_DEFINE(name) \ From 06ea976dc004995e95eec43607983be3eff9e560 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 3 Jun 2021 17:56:39 -0400 Subject: [PATCH 10/12] fix cast statements --- cpp/src/dictionary/detail/concatenate.cu | 5 +++-- cpp/src/dictionary/set_keys.cu | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 619c03196f4..b87cb22649d 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -163,7 +163,8 @@ struct dispatch_compute_indices { auto result_itr = cudf::detail::indexalator_factory::make_output_iterator(result->mutable_view()); // new indices values are computed by matching the concatenated keys to the new key set -#ifdef NDEBUG // something wrong with this in Debug build + +#ifdef NDEBUG // something goes wrong here in a Debug build thrust::lower_bound(rmm::exec_policy(stream), begin, end, @@ -178,7 +179,7 @@ struct dispatch_compute_indices { result_itr, [begin, end] __device__(auto key) { auto itr = thrust::lower_bound(thrust::seq, begin, end, key); - return (size_type)thrust::distance(begin, itr); + return static_cast(thrust::distance(begin, itr)); }); #endif return result; diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 66d5f22c46a..21c8961c590 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -71,7 +71,8 @@ struct dispatch_compute_indices { mr); auto result_itr = cudf::detail::indexalator_factory::make_output_iterator(result->mutable_view()); -#ifdef NDEBUG // something wrong with this in Debug build + +#ifdef NDEBUG // something goes wrong here in a Debug build thrust::lower_bound(rmm::exec_policy(stream), begin, end, @@ -86,7 +87,7 @@ struct dispatch_compute_indices { result_itr, [begin, end] __device__(auto key) { auto itr = thrust::lower_bound(thrust::seq, begin, end, key); - return (size_type)thrust::distance(begin, itr); + return static_cast(thrust::distance(begin, itr)); }); #endif result->set_null_count(0); From 92021b33a166f65c0f8b86f9d09e931b06b0b154 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 4 Jun 2021 19:54:39 -0400 Subject: [PATCH 11/12] add issue link to comment on workaround --- cpp/src/dictionary/detail/concatenate.cu | 5 ++++- cpp/src/dictionary/set_keys.cu | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index b87cb22649d..8520a777eb2 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -164,7 +164,7 @@ struct dispatch_compute_indices { cudf::detail::indexalator_factory::make_output_iterator(result->mutable_view()); // new indices values are computed by matching the concatenated keys to the new key set -#ifdef NDEBUG // something goes wrong here in a Debug build +#ifdef NDEBUG thrust::lower_bound(rmm::exec_policy(stream), begin, end, @@ -173,6 +173,9 @@ struct dispatch_compute_indices { result_itr, thrust::less()); #else + // There is a problem with thrust::lower_bound and the output_indexalator + // https://github.com/NVIDIA/thrust/issues/1452 + // This is a workaround. thrust::transform(rmm::exec_policy(stream), all_itr, all_itr + all_indices.size(), diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index b8e8f817343..5ba0d9e591e 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -72,7 +72,7 @@ struct dispatch_compute_indices { auto result_itr = cudf::detail::indexalator_factory::make_output_iterator(result->mutable_view()); -#ifdef NDEBUG // something goes wrong here in a Debug build +#ifdef NDEBUG thrust::lower_bound(rmm::exec_policy(stream), begin, end, @@ -81,6 +81,9 @@ struct dispatch_compute_indices { result_itr, thrust::less()); #else + // There is a problem with thrust::lower_bound and the output_indexalator + // https://github.com/NVIDIA/thrust/issues/1452 + // This is a workaround. thrust::transform(rmm::exec_policy(stream), dictionary_itr, dictionary_itr + input.size(), From 70e49d33ea55185e577ed4d3649f488eb4d3b12e Mon Sep 17 00:00:00 2001 From: davidwendt Date: Tue, 8 Jun 2021 11:59:11 -0400 Subject: [PATCH 12/12] add nvbug to workaround comment --- cpp/src/dictionary/detail/concatenate.cu | 4 ++-- cpp/src/dictionary/set_keys.cu | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 8520a777eb2..a3cac6ac5c1 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -173,8 +173,8 @@ struct dispatch_compute_indices { result_itr, thrust::less()); #else - // There is a problem with thrust::lower_bound and the output_indexalator - // https://github.com/NVIDIA/thrust/issues/1452 + // There is a problem with thrust::lower_bound and the output_indexalator. + // https://github.com/NVIDIA/thrust/issues/1452; thrust team created nvbug 3322776 // This is a workaround. thrust::transform(rmm::exec_policy(stream), all_itr, diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index 5ba0d9e591e..51ca6f5d962 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -82,7 +82,7 @@ struct dispatch_compute_indices { thrust::less()); #else // There is a problem with thrust::lower_bound and the output_indexalator - // https://github.com/NVIDIA/thrust/issues/1452 + // https://github.com/NVIDIA/thrust/issues/1452; thrust team created nvbug 3322776 // This is a workaround. thrust::transform(rmm::exec_policy(stream), dictionary_itr,