From f4fe9037accfceaa7316ab3fa1c5a6e7f51e1702 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 13 Mar 2024 15:39:36 -0400 Subject: [PATCH 1/5] Use offsetalator in cudf::strings::detail::merge --- cpp/CMakeLists.txt | 1 + cpp/include/cudf/strings/detail/merge.hpp | 47 +++++++++++++++++ cpp/src/merge/merge.cu | 10 ++-- .../merge.cuh => src/strings/merge/merge.cu} | 51 +++++++------------ 4 files changed, 69 insertions(+), 40 deletions(-) create mode 100644 cpp/include/cudf/strings/detail/merge.hpp rename cpp/{include/cudf/strings/detail/merge.cuh => src/strings/merge/merge.cu} (66%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4f64c094ead..695b24345ed 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -583,6 +583,7 @@ add_library( src/strings/filling/fill.cu src/strings/filter_chars.cu src/strings/like.cu + src/strings/merge/merge.cu src/strings/padding.cu src/strings/regex/regcomp.cpp src/strings/regex/regexec.cpp diff --git a/cpp/include/cudf/strings/detail/merge.hpp b/cpp/include/cudf/strings/detail/merge.hpp new file mode 100644 index 00000000000..054ade7f69c --- /dev/null +++ b/cpp/include/cudf/strings/detail/merge.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace cudf { +namespace strings { +namespace detail { +/** + * @brief Merges two strings columns + * + * Caller must set the validity mask in the output column. + * + * @param lhs First column + * @param rhs Second column + * @param row_order Indexes for each column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column + */ +std::unique_ptr merge(strings_column_view const& lhs, + strings_column_view const& rhs, + cudf::detail::index_vector const& row_order, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 8be503025bd..dedb2acb9da 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -433,12 +433,8 @@ std::unique_ptr column_merger::operator()( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - auto column = strings::detail::merge(strings_column_view(lcol), - strings_column_view(rcol), - row_order_.begin(), - row_order_.end(), - stream, - mr); + auto column = strings::detail::merge( + strings_column_view(lcol), strings_column_view(rcol), row_order_, stream, mr); if (lcol.has_nulls() || rcol.has_nulls()) { auto merged_view = column->mutable_view(); materialize_bitmask( diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/src/strings/merge/merge.cu similarity index 66% rename from cpp/include/cudf/strings/detail/merge.cuh rename to cpp/src/strings/merge/merge.cu index f05e957783f..9bc5d3d334f 100644 --- a/cpp/include/cudf/strings/detail/merge.cuh +++ b/cpp/src/strings/merge/merge.cu @@ -13,13 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#pragma once #include #include #include -#include #include +#include +#include #include #include #include @@ -36,62 +36,47 @@ namespace cudf { namespace strings { namespace detail { -/** - * @brief Merges two strings columns. - * - * Caller must set the validity mask in the output column. - * - * @tparam row_order_iterator This must be an iterator for type thrust::tuple. - * - * @param lhs First column. - * @param rhs Second column. - * @param row_order Indexes for each column. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. - */ -template std::unique_ptr merge(strings_column_view const& lhs, strings_column_view const& rhs, - row_order_iterator begin, - row_order_iterator end, + cudf::detail::index_vector const& row_order, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { using cudf::detail::side; - size_type strings_count = static_cast(std::distance(begin, end)); - if (strings_count == 0) return make_empty_column(type_id::STRING); + if (row_order.is_empty()) { return make_empty_column(type_id::STRING); } + auto const strings_count = static_cast(row_order.size()); - auto lhs_column = column_device_view::create(lhs.parent(), stream); - auto d_lhs = *lhs_column; - auto rhs_column = column_device_view::create(rhs.parent(), stream); - auto d_rhs = *rhs_column; + auto const lhs_column = column_device_view::create(lhs.parent(), stream); + auto const d_lhs = *lhs_column; + auto const rhs_column = column_device_view::create(rhs.parent(), stream); + auto const d_rhs = *rhs_column; // caller will set the null mask - rmm::device_buffer null_mask{0, stream, mr}; - size_type null_count = lhs.null_count() + rhs.null_count(); - if (null_count > 0) - null_mask = cudf::detail::create_null_mask(strings_count, mask_state::ALL_VALID, stream, mr); + auto const null_count = lhs.null_count() + rhs.null_count(); + auto null_mask = (null_count > 0) ? cudf::detail::create_null_mask( + strings_count, mask_state::ALL_VALID, stream, mr) + : rmm::device_buffer{}; // build offsets column auto offsets_transformer = cuda::proclaim_return_type([d_lhs, d_rhs] __device__(auto index_pair) { auto const [side, index] = index_pair; - if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return 0; + if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) { return 0; } auto d_str = side == side::LEFT ? d_lhs.element(index) : d_rhs.element(index); return d_str.size_bytes(); }); + auto const begin = row_order.begin(); auto offsets_transformer_itr = thrust::make_transform_iterator(begin, offsets_transformer); - auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column( + auto [offsets_column, bytes] = cudf::strings::detail::make_offsets_child_column( offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_offsets = offsets_column->view().template data(); + auto d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); // create the chars column rmm::device_uvector chars(bytes, stream, mr); auto d_chars = chars.data(); thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), + thrust::counting_iterator(0), strings_count, [d_lhs, d_rhs, begin, d_offsets, d_chars] __device__(size_type idx) { auto const [side, index] = begin[idx]; From 5ad6dda0a9a7e5366f3563c7d64f103a481c55dc Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 14 Mar 2024 12:58:56 -0400 Subject: [PATCH 2/5] add large strings test --- .../cudf/strings/detail/strings_children.cuh | 36 +++++++++++-------- cpp/tests/merge/merge_string_test.cpp | 36 +++++++++++++++++++ 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index 49c4be88ca5..d77bdbe6b27 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -153,32 +153,38 @@ std::pair, int64_t> make_offsets_child_column( data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); auto d_offsets = offsets_column->mutable_view().template data(); - // The number of offsets is strings_count+1 so to build the offsets from the sizes - // using exclusive-scan technically requires strings_count+1 input values even though - // the final input value is never used. - // The input iterator is wrapped here to allow the 'last value' to be safely read. - auto map_fn = cuda::proclaim_return_type( - [begin, strings_count] __device__(size_type idx) -> size_type { - return idx < strings_count ? static_cast(begin[idx]) : size_type{0}; - }); - auto input_itr = cudf::detail::make_counting_transform_iterator(0, map_fn); + // First, store the sizes into a temporary vector. + // This helps minimize calls to the input iterators which may have expensive + // logic to compute each size. This can also speed up the scan-to-offsets + // since the scan uses aggressive inlining on the iterator calls. + // Also, if this requires int64 offsets, the sizes can be reused. + auto sizes = rmm::device_uvector(strings_count + 1, stream); + // The number of offsets is strings_count+1 so the sizes occupy all except the + // last entry which is filled in by the scan and technically should never be read. + thrust::copy(rmm::exec_policy_nosync(stream), begin, end, sizes.begin()); // Use the sizes-to-offsets iterator to compute the total number of elements - auto const total_elements = - cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream); + auto const total_bytes = + cudf::detail::sizes_to_offsets(sizes.begin(), sizes.end(), d_offsets, stream); // TODO: replace exception with if-statement when enabling creating INT64 offsets - CUDF_EXPECTS(total_elements <= size_type_max, + CUDF_EXPECTS(total_bytes <= size_type_max, "Size of output exceeds the character size limit", std::overflow_error); - // if (total_elements >= get_offset64_threshold()) { + // auto const threshold = get_offset64_threshold(); + // if (!is_large_strings_enabled()) { + // CUDF_EXPECTS( + // total_bytes < threshold, "Size of output exceeds the column size limit", + // std::overflow_error); + // } + // if (total_bytes >= get_offset64_threshold()) { // // recompute as int64 offsets when above the threshold // offsets_column = make_numeric_column( // data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); // auto d_offsets64 = offsets_column->mutable_view().template data(); - // sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets64, stream); + // cudf::detail::sizes_to_offsets(sizes.begin(), sizes.end(), d_offsets64, stream); // } - return std::pair(std::move(offsets_column), total_elements); + return std::pair(std::move(offsets_column), total_bytes); } } // namespace detail diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp index 28179a7341c..e3c467405a9 100644 --- a/cpp/tests/merge/merge_string_test.cpp +++ b/cpp/tests/merge/merge_string_test.cpp @@ -411,3 +411,39 @@ TYPED_TEST(MergeStringTest, Merge2StringKeyNullColumns) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_column_view2, output_column_view2); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_column_view3, output_column_view3); } + +class MergeLargeStringsTest : public cudf::test::BaseFixture {}; + +TEST_F(MergeLargeStringsTest, DISABLED_MergeLargeStrings) +{ + // CUDF_TEST_ENABLE_LARGE_STRINGS(); waiting on PR 15195 + auto itr = thrust::constant_iterator( + "abcdefghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXY"); // 50 bytes + auto input = cudf::test::strings_column_wrapper(itr, itr + 5'000'000); // 250MB + std::vector input_views; + for (int i = 0; i < 10; ++i) { // 2500MB > 2GB + input_views.push_back(cudf::table_view({input})); + } + std::vector column_order{cudf::order::ASCENDING}; + std::vector null_precedence{cudf::null_order::AFTER}; + + auto result = cudf::merge(input_views, {0}, column_order, null_precedence); + auto sv = cudf::strings_column_view(result->view().column(0)); + EXPECT_EQ(sv.size(), 50'000'000); + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); + + // verify results in sections + auto splits = std::vector({5'000'000, + 10'000'000, + 15'000'000, + 20'000'000, + 25'000'000, + 30'000'000, + 35'000'000, + 40'000'000, + 45'000'000}); + auto sliced = cudf::split(sv.parent(), splits); + for (auto c : sliced) { + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, input); + } +} From 809700a40df541663bfb3d62beeae40a68e74045 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 20 Mar 2024 13:57:22 -0400 Subject: [PATCH 3/5] use gather factory for merge results --- cpp/benchmarks/CMakeLists.txt | 2 +- cpp/benchmarks/merge/merge_strings.cpp | 64 +++++++++++++++++++ .../cudf/strings/detail/strings_children.cuh | 25 ++++---- cpp/src/strings/merge/merge.cu | 61 ++++++------------ cpp/tests/merge/merge_string_test.cpp | 2 +- 5 files changed, 99 insertions(+), 55 deletions(-) create mode 100644 cpp/benchmarks/merge/merge_strings.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index c82e475dece..0c0e5cf6abe 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -235,7 +235,7 @@ ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp) # ################################################################################################## # * merge benchmark ------------------------------------------------------------------------------- ConfigureBench(MERGE_BENCH merge/merge.cpp) -ConfigureNVBench(MERGE_NVBENCH merge/merge_structs.cpp merge/merge_lists.cpp) +ConfigureNVBench(MERGE_NVBENCH merge/merge_lists.cpp merge/merge_structs.cpp merge/merge_strings.cpp) # ################################################################################################## # * null_mask benchmark --------------------------------------------------------------------------- diff --git a/cpp/benchmarks/merge/merge_strings.cpp b/cpp/benchmarks/merge/merge_strings.cpp new file mode 100644 index 00000000000..3d0f1865490 --- /dev/null +++ b/cpp/benchmarks/merge/merge_strings.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +void nvbench_merge_strings(nvbench::state& state) +{ + auto stream = cudf::get_default_stream(); + + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + if (static_cast(2 * num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = + data_profile_builder() + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) + .no_validity(); + auto const source_tables = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, table_profile); + + auto const sorted_lhs = cudf::sort(cudf::table_view({source_tables->view().column(0)})); + auto const sorted_rhs = cudf::sort(cudf::table_view({source_tables->view().column(1)})); + auto const lhs = sorted_lhs->view().column(0); + auto const rhs = sorted_rhs->view().column(0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = cudf::strings_column_view(lhs).chars_size(stream) + + cudf::strings_column_view(rhs).chars_size(stream); + state.add_global_memory_reads(chars_size); // all bytes are read + state.add_global_memory_writes(chars_size); // all bytes are written + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + [[maybe_unused]] auto result = cudf::merge( + {cudf::table_view({lhs}), cudf::table_view({rhs})}, {0}, {cudf::order::ASCENDING}); + }); +} + +NVBENCH_BENCH(nvbench_merge_strings) + .set_name("merge_strings") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index d77bdbe6b27..4ef157eb483 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -153,25 +153,25 @@ std::pair, int64_t> make_offsets_child_column( data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); auto d_offsets = offsets_column->mutable_view().template data(); - // First, store the sizes into a temporary vector. - // This helps minimize calls to the input iterators which may have expensive - // logic to compute each size. This can also speed up the scan-to-offsets - // since the scan uses aggressive inlining on the iterator calls. - // Also, if this requires int64 offsets, the sizes can be reused. - auto sizes = rmm::device_uvector(strings_count + 1, stream); - // The number of offsets is strings_count+1 so the sizes occupy all except the - // last entry which is filled in by the scan and technically should never be read. - thrust::copy(rmm::exec_policy_nosync(stream), begin, end, sizes.begin()); + // The number of offsets is strings_count+1 so to build the offsets from the sizes + // using exclusive-scan technically requires strings_count+1 input values even though + // the final input value is never used. + // The input iterator is wrapped here to allow the 'last value' to be safely read. + auto map_fn = cuda::proclaim_return_type( + [begin, strings_count] __device__(size_type idx) -> size_type { + return idx < strings_count ? static_cast(begin[idx]) : size_type{0}; + }); + auto input_itr = cudf::detail::make_counting_transform_iterator(0, map_fn); // Use the sizes-to-offsets iterator to compute the total number of elements auto const total_bytes = - cudf::detail::sizes_to_offsets(sizes.begin(), sizes.end(), d_offsets, stream); + cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream); // TODO: replace exception with if-statement when enabling creating INT64 offsets CUDF_EXPECTS(total_bytes <= size_type_max, "Size of output exceeds the character size limit", std::overflow_error); // auto const threshold = get_offset64_threshold(); - // if (!is_large_strings_enabled()) { + // if (!is_large_strings_enabled()) { implemented in PR 15195 // CUDF_EXPECTS( // total_bytes < threshold, "Size of output exceeds the column size limit", // std::overflow_error); @@ -181,7 +181,8 @@ std::pair, int64_t> make_offsets_child_column( // offsets_column = make_numeric_column( // data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); // auto d_offsets64 = offsets_column->mutable_view().template data(); - // cudf::detail::sizes_to_offsets(sizes.begin(), sizes.end(), d_offsets64, stream); + // cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets64, + // stream); // } return std::pair(std::move(offsets_column), total_bytes); diff --git a/cpp/src/strings/merge/merge.cu b/cpp/src/strings/merge/merge.cu index 9bc5d3d334f..9c75ee0e0c3 100644 --- a/cpp/src/strings/merge/merge.cu +++ b/cpp/src/strings/merge/merge.cu @@ -16,11 +16,8 @@ #include #include -#include -#include -#include #include -#include +#include #include #include @@ -28,9 +25,8 @@ #include #include -#include #include -#include +#include #include namespace cudf { @@ -51,43 +47,26 @@ std::unique_ptr merge(strings_column_view const& lhs, auto const rhs_column = column_device_view::create(rhs.parent(), stream); auto const d_rhs = *rhs_column; - // caller will set the null mask - auto const null_count = lhs.null_count() + rhs.null_count(); - auto null_mask = (null_count > 0) ? cudf::detail::create_null_mask( - strings_count, mask_state::ALL_VALID, stream, mr) - : rmm::device_buffer{}; + auto const begin = row_order.begin(); - // build offsets column - auto offsets_transformer = - cuda::proclaim_return_type([d_lhs, d_rhs] __device__(auto index_pair) { - auto const [side, index] = index_pair; - if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) { return 0; } - auto d_str = - side == side::LEFT ? d_lhs.element(index) : d_rhs.element(index); - return d_str.size_bytes(); - }); - auto const begin = row_order.begin(); - auto offsets_transformer_itr = thrust::make_transform_iterator(begin, offsets_transformer); - auto [offsets_column, bytes] = cudf::strings::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); + // build vector of strings + rmm::device_uvector indices(strings_count, stream); + thrust::transform(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings_count), + indices.begin(), + [d_lhs, d_rhs, begin] __device__(size_type idx) { + auto const [side, index] = begin[idx]; + if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) { + return string_index_pair{nullptr, 0}; + } + auto d_str = side == side::LEFT ? d_lhs.element(index) + : d_rhs.element(index); + return string_index_pair{d_str.data(), d_str.size_bytes()}; + }); - // create the chars column - rmm::device_uvector chars(bytes, stream, mr); - auto d_chars = chars.data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::counting_iterator(0), - strings_count, - [d_lhs, d_rhs, begin, d_offsets, d_chars] __device__(size_type idx) { - auto const [side, index] = begin[idx]; - if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return; - auto d_str = side == side::LEFT ? d_lhs.element(index) - : d_rhs.element(index); - memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes()); - }); - - return make_strings_column( - strings_count, std::move(offsets_column), chars.release(), null_count, std::move(null_mask)); + // convert vector into strings column + return make_strings_column(indices.begin(), indices.end(), stream, mr); } } // namespace detail diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp index e3c467405a9..f5b8dfd96f4 100644 --- a/cpp/tests/merge/merge_string_test.cpp +++ b/cpp/tests/merge/merge_string_test.cpp @@ -416,7 +416,7 @@ class MergeLargeStringsTest : public cudf::test::BaseFixture {}; TEST_F(MergeLargeStringsTest, DISABLED_MergeLargeStrings) { - // CUDF_TEST_ENABLE_LARGE_STRINGS(); waiting on PR 15195 + // CUDF_TEST_ENABLE_LARGE_STRINGS(); implemented in PR 15195 auto itr = thrust::constant_iterator( "abcdefghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXY"); // 50 bytes auto input = cudf::test::strings_column_wrapper(itr, itr + 5'000'000); // 250MB From 81af79a07622c9ae52002ec6c1c268132bcc958d Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 20 Mar 2024 13:57:46 -0400 Subject: [PATCH 4/5] fix cmake format --- cpp/benchmarks/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 0c0e5cf6abe..59387638299 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -235,7 +235,9 @@ ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp) # ################################################################################################## # * merge benchmark ------------------------------------------------------------------------------- ConfigureBench(MERGE_BENCH merge/merge.cpp) -ConfigureNVBench(MERGE_NVBENCH merge/merge_lists.cpp merge/merge_structs.cpp merge/merge_strings.cpp) +ConfigureNVBench( + MERGE_NVBENCH merge/merge_lists.cpp merge/merge_structs.cpp merge/merge_strings.cpp +) # ################################################################################################## # * null_mask benchmark --------------------------------------------------------------------------- From a42e5dbffcafe0085405b8eff184e49c7b23938e Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 20 Mar 2024 14:49:09 -0400 Subject: [PATCH 5/5] fix example code (until 15346 is merged) --- cpp/examples/strings/custom_optimized.cu | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/examples/strings/custom_optimized.cu b/cpp/examples/strings/custom_optimized.cu index cefa3346150..62ca19a5ca9 100644 --- a/cpp/examples/strings/custom_optimized.cu +++ b/cpp/examples/strings/custom_optimized.cu @@ -153,8 +153,12 @@ std::unique_ptr redact_strings(cudf::column_view const& names, redact_kernel<<>>( *d_names, *d_visibilities, offsets.data(), chars.data()); - // create column from offsets and chars vectors (no copy is performed) - auto result = cudf::make_strings_column(names.size(), std::move(offsets), chars.release(), {}, 0); + // create column from offsets vector (move only) + auto offsets_column = std::make_unique(std::move(offsets), rmm::device_buffer{}, 0); + + // create column for chars vector (no copy is performed) + auto result = cudf::make_strings_column( + names.size(), std::move(offsets_column), chars.release(), 0, rmm::device_buffer{}); // wait for all of the above to finish stream.synchronize();