From f71aabd4358e97804024cd8a7557ca4a30d3df79 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 22 Mar 2024 09:04:00 -0400 Subject: [PATCH 1/8] Large strings support in cudf::merge --- cpp/CMakeLists.txt | 1 + cpp/benchmarks/CMakeLists.txt | 4 +- cpp/benchmarks/merge/merge_strings.cpp | 64 ++++++++++ cpp/include/cudf/strings/detail/merge.cuh | 110 ------------------ cpp/include/cudf/strings/detail/merge.hpp | 47 ++++++++ .../cudf/strings/detail/strings_children.cuh | 17 ++- cpp/src/merge/merge.cu | 10 +- cpp/src/strings/merge/merge.cu | 74 ++++++++++++ cpp/tests/merge/merge_string_test.cpp | 36 ++++++ 9 files changed, 240 insertions(+), 123 deletions(-) create mode 100644 cpp/benchmarks/merge/merge_strings.cpp delete mode 100644 cpp/include/cudf/strings/detail/merge.cuh create mode 100644 cpp/include/cudf/strings/detail/merge.hpp create mode 100644 cpp/src/strings/merge/merge.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 12837c69e59..e267025817c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -584,6 +584,7 @@ add_library( src/strings/filling/fill.cu src/strings/filter_chars.cu src/strings/like.cu + src/strings/merge/merge.cu src/strings/padding.cu src/strings/regex/regcomp.cpp src/strings/regex/regexec.cpp diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index c82e475dece..59387638299 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -235,7 +235,9 @@ ConfigureNVBench(HASHING_NVBENCH hashing/hash.cpp) # ################################################################################################## # * merge benchmark ------------------------------------------------------------------------------- ConfigureBench(MERGE_BENCH merge/merge.cpp) -ConfigureNVBench(MERGE_NVBENCH merge/merge_structs.cpp merge/merge_lists.cpp) +ConfigureNVBench( + MERGE_NVBENCH merge/merge_lists.cpp merge/merge_structs.cpp merge/merge_strings.cpp +) # ################################################################################################## # * null_mask benchmark --------------------------------------------------------------------------- diff --git a/cpp/benchmarks/merge/merge_strings.cpp b/cpp/benchmarks/merge/merge_strings.cpp new file mode 100644 index 00000000000..3d0f1865490 --- /dev/null +++ b/cpp/benchmarks/merge/merge_strings.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +void nvbench_merge_strings(nvbench::state& state) +{ + auto stream = cudf::get_default_stream(); + + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + if (static_cast(2 * num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } + + data_profile const table_profile = + data_profile_builder() + .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width) + .no_validity(); + auto const source_tables = create_random_table( + {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, table_profile); + + auto const sorted_lhs = cudf::sort(cudf::table_view({source_tables->view().column(0)})); + auto const sorted_rhs = cudf::sort(cudf::table_view({source_tables->view().column(1)})); + auto const lhs = sorted_lhs->view().column(0); + auto const rhs = sorted_rhs->view().column(0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + auto chars_size = cudf::strings_column_view(lhs).chars_size(stream) + + cudf::strings_column_view(rhs).chars_size(stream); + state.add_global_memory_reads(chars_size); // all bytes are read + state.add_global_memory_writes(chars_size); // all bytes are written + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + [[maybe_unused]] auto result = cudf::merge( + {cudf::table_view({lhs}), cudf::table_view({rhs})}, {0}, {cudf::order::ASCENDING}); + }); +} + +NVBENCH_BENCH(nvbench_merge_strings) + .set_name("merge_strings") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096}) + .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216}); diff --git a/cpp/include/cudf/strings/detail/merge.cuh b/cpp/include/cudf/strings/detail/merge.cuh deleted file mode 100644 index f05e957783f..00000000000 --- a/cpp/include/cudf/strings/detail/merge.cuh +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -namespace cudf { -namespace strings { -namespace detail { -/** - * @brief Merges two strings columns. - * - * Caller must set the validity mask in the output column. - * - * @tparam row_order_iterator This must be an iterator for type thrust::tuple. - * - * @param lhs First column. - * @param rhs Second column. - * @param row_order Indexes for each column. - * @param stream CUDA stream used for device memory operations and kernel launches. - * @param mr Device memory resource used to allocate the returned column's device memory. - * @return New strings column. - */ -template -std::unique_ptr merge(strings_column_view const& lhs, - strings_column_view const& rhs, - row_order_iterator begin, - row_order_iterator end, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - using cudf::detail::side; - size_type strings_count = static_cast(std::distance(begin, end)); - if (strings_count == 0) return make_empty_column(type_id::STRING); - - auto lhs_column = column_device_view::create(lhs.parent(), stream); - auto d_lhs = *lhs_column; - auto rhs_column = column_device_view::create(rhs.parent(), stream); - auto d_rhs = *rhs_column; - - // caller will set the null mask - rmm::device_buffer null_mask{0, stream, mr}; - size_type null_count = lhs.null_count() + rhs.null_count(); - if (null_count > 0) - null_mask = cudf::detail::create_null_mask(strings_count, mask_state::ALL_VALID, stream, mr); - - // build offsets column - auto offsets_transformer = - cuda::proclaim_return_type([d_lhs, d_rhs] __device__(auto index_pair) { - auto const [side, index] = index_pair; - if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return 0; - auto d_str = - side == side::LEFT ? d_lhs.element(index) : d_rhs.element(index); - return d_str.size_bytes(); - }); - auto offsets_transformer_itr = thrust::make_transform_iterator(begin, offsets_transformer); - auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column( - offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto d_offsets = offsets_column->view().template data(); - - // create the chars column - rmm::device_uvector chars(bytes, stream, mr); - auto d_chars = chars.data(); - thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - strings_count, - [d_lhs, d_rhs, begin, d_offsets, d_chars] __device__(size_type idx) { - auto const [side, index] = begin[idx]; - if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) return; - auto d_str = side == side::LEFT ? d_lhs.element(index) - : d_rhs.element(index); - memcpy(d_chars + d_offsets[idx], d_str.data(), d_str.size_bytes()); - }); - - return make_strings_column( - strings_count, std::move(offsets_column), chars.release(), null_count, std::move(null_mask)); -} - -} // namespace detail -} // namespace strings -} // namespace cudf diff --git a/cpp/include/cudf/strings/detail/merge.hpp b/cpp/include/cudf/strings/detail/merge.hpp new file mode 100644 index 00000000000..054ade7f69c --- /dev/null +++ b/cpp/include/cudf/strings/detail/merge.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +namespace cudf { +namespace strings { +namespace detail { +/** + * @brief Merges two strings columns + * + * Caller must set the validity mask in the output column. + * + * @param lhs First column + * @param rhs Second column + * @param row_order Indexes for each column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New strings column + */ +std::unique_ptr merge(strings_column_view const& lhs, + strings_column_view const& rhs, + cudf::detail::index_vector const& row_order, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index 49c4be88ca5..4ef157eb483 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -163,22 +163,29 @@ std::pair, int64_t> make_offsets_child_column( }); auto input_itr = cudf::detail::make_counting_transform_iterator(0, map_fn); // Use the sizes-to-offsets iterator to compute the total number of elements - auto const total_elements = + auto const total_bytes = cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream); // TODO: replace exception with if-statement when enabling creating INT64 offsets - CUDF_EXPECTS(total_elements <= size_type_max, + CUDF_EXPECTS(total_bytes <= size_type_max, "Size of output exceeds the character size limit", std::overflow_error); - // if (total_elements >= get_offset64_threshold()) { + // auto const threshold = get_offset64_threshold(); + // if (!is_large_strings_enabled()) { implemented in PR 15195 + // CUDF_EXPECTS( + // total_bytes < threshold, "Size of output exceeds the column size limit", + // std::overflow_error); + // } + // if (total_bytes >= get_offset64_threshold()) { // // recompute as int64 offsets when above the threshold // offsets_column = make_numeric_column( // data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); // auto d_offsets64 = offsets_column->mutable_view().template data(); - // sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets64, stream); + // cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets64, + // stream); // } - return std::pair(std::move(offsets_column), total_elements); + return std::pair(std::move(offsets_column), total_bytes); } } // namespace detail diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 8be503025bd..dedb2acb9da 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -433,12 +433,8 @@ std::unique_ptr column_merger::operator()( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - auto column = strings::detail::merge(strings_column_view(lcol), - strings_column_view(rcol), - row_order_.begin(), - row_order_.end(), - stream, - mr); + auto column = strings::detail::merge( + strings_column_view(lcol), strings_column_view(rcol), row_order_, stream, mr); if (lcol.has_nulls() || rcol.has_nulls()) { auto merged_view = column->mutable_view(); materialize_bitmask( diff --git a/cpp/src/strings/merge/merge.cu b/cpp/src/strings/merge/merge.cu new file mode 100644 index 00000000000..9c75ee0e0c3 --- /dev/null +++ b/cpp/src/strings/merge/merge.cu @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace cudf { +namespace strings { +namespace detail { +std::unique_ptr merge(strings_column_view const& lhs, + strings_column_view const& rhs, + cudf::detail::index_vector const& row_order, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + using cudf::detail::side; + if (row_order.is_empty()) { return make_empty_column(type_id::STRING); } + auto const strings_count = static_cast(row_order.size()); + + auto const lhs_column = column_device_view::create(lhs.parent(), stream); + auto const d_lhs = *lhs_column; + auto const rhs_column = column_device_view::create(rhs.parent(), stream); + auto const d_rhs = *rhs_column; + + auto const begin = row_order.begin(); + + // build vector of strings + rmm::device_uvector indices(strings_count, stream); + thrust::transform(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(strings_count), + indices.begin(), + [d_lhs, d_rhs, begin] __device__(size_type idx) { + auto const [side, index] = begin[idx]; + if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) { + return string_index_pair{nullptr, 0}; + } + auto d_str = side == side::LEFT ? d_lhs.element(index) + : d_rhs.element(index); + return string_index_pair{d_str.data(), d_str.size_bytes()}; + }); + + // convert vector into strings column + return make_strings_column(indices.begin(), indices.end(), stream, mr); +} + +} // namespace detail +} // namespace strings +} // namespace cudf diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp index 28179a7341c..f5b8dfd96f4 100644 --- a/cpp/tests/merge/merge_string_test.cpp +++ b/cpp/tests/merge/merge_string_test.cpp @@ -411,3 +411,39 @@ TYPED_TEST(MergeStringTest, Merge2StringKeyNullColumns) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_column_view2, output_column_view2); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_column_view3, output_column_view3); } + +class MergeLargeStringsTest : public cudf::test::BaseFixture {}; + +TEST_F(MergeLargeStringsTest, DISABLED_MergeLargeStrings) +{ + // CUDF_TEST_ENABLE_LARGE_STRINGS(); implemented in PR 15195 + auto itr = thrust::constant_iterator( + "abcdefghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXY"); // 50 bytes + auto input = cudf::test::strings_column_wrapper(itr, itr + 5'000'000); // 250MB + std::vector input_views; + for (int i = 0; i < 10; ++i) { // 2500MB > 2GB + input_views.push_back(cudf::table_view({input})); + } + std::vector column_order{cudf::order::ASCENDING}; + std::vector null_precedence{cudf::null_order::AFTER}; + + auto result = cudf::merge(input_views, {0}, column_order, null_precedence); + auto sv = cudf::strings_column_view(result->view().column(0)); + EXPECT_EQ(sv.size(), 50'000'000); + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); + + // verify results in sections + auto splits = std::vector({5'000'000, + 10'000'000, + 15'000'000, + 20'000'000, + 25'000'000, + 30'000'000, + 35'000'000, + 40'000'000, + 45'000'000}); + auto sliced = cudf::split(sv.parent(), splits); + for (auto c : sliced) { + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, input); + } +} From 31ed7c276fb38b3821ab5e2a8143ceff9fbf7291 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 22 Mar 2024 13:21:11 -0400 Subject: [PATCH 2/8] ensure empty!=null in indices output --- cpp/include/cudf/strings/detail/merge.hpp | 2 -- cpp/src/merge/merge.cu | 8 +------- cpp/src/strings/merge/merge.cu | 12 +++++++----- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/cpp/include/cudf/strings/detail/merge.hpp b/cpp/include/cudf/strings/detail/merge.hpp index 054ade7f69c..eeb7819757b 100644 --- a/cpp/include/cudf/strings/detail/merge.hpp +++ b/cpp/include/cudf/strings/detail/merge.hpp @@ -27,8 +27,6 @@ namespace detail { /** * @brief Merges two strings columns * - * Caller must set the validity mask in the output column. - * * @param lhs First column * @param rhs Second column * @param row_order Indexes for each column diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index dedb2acb9da..a878bc254e2 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -433,14 +433,8 @@ std::unique_ptr column_merger::operator()( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) const { - auto column = strings::detail::merge( + return strings::detail::merge( strings_column_view(lcol), strings_column_view(rcol), row_order_, stream, mr); - if (lcol.has_nulls() || rcol.has_nulls()) { - auto merged_view = column->mutable_view(); - materialize_bitmask( - lcol, rcol, merged_view.null_mask(), merged_view.size(), row_order_.data(), stream); - } - return column; } // specialization for dictionary diff --git a/cpp/src/strings/merge/merge.cu b/cpp/src/strings/merge/merge.cu index 9c75ee0e0c3..0c1cd2aaca1 100644 --- a/cpp/src/strings/merge/merge.cu +++ b/cpp/src/strings/merge/merge.cu @@ -56,13 +56,15 @@ std::unique_ptr merge(strings_column_view const& lhs, thrust::make_counting_iterator(strings_count), indices.begin(), [d_lhs, d_rhs, begin] __device__(size_type idx) { - auto const [side, index] = begin[idx]; - if (side == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) { + auto const [s, index] = begin[idx]; + if (s == side::LEFT ? d_lhs.is_null(index) : d_rhs.is_null(index)) { return string_index_pair{nullptr, 0}; } - auto d_str = side == side::LEFT ? d_lhs.element(index) - : d_rhs.element(index); - return string_index_pair{d_str.data(), d_str.size_bytes()}; + auto d_str = (s == side::LEFT) ? d_lhs.element(index) + : d_rhs.element(index); + return d_str.size_bytes() == 0 + ? string_index_pair{"", 0} // ensures empty != null + : string_index_pair{d_str.data(), d_str.size_bytes()}; }); // convert vector into strings column From a8d51e21ab96b2b04255f76067407770de775250 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 8 Apr 2024 11:36:10 -0400 Subject: [PATCH 3/8] enable large-strings test; fix factory --- .../cudf/strings/detail/strings_children.cuh | 30 +++++------- .../detail/strings_column_factories.cuh | 29 +++++------ cpp/tests/merge/merge_string_test.cpp | 49 +++++++++++-------- 3 files changed, 52 insertions(+), 56 deletions(-) diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index 4ef157eb483..847147197c6 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -166,24 +166,18 @@ std::pair, int64_t> make_offsets_child_column( auto const total_bytes = cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream); - // TODO: replace exception with if-statement when enabling creating INT64 offsets - CUDF_EXPECTS(total_bytes <= size_type_max, - "Size of output exceeds the character size limit", - std::overflow_error); - // auto const threshold = get_offset64_threshold(); - // if (!is_large_strings_enabled()) { implemented in PR 15195 - // CUDF_EXPECTS( - // total_bytes < threshold, "Size of output exceeds the column size limit", - // std::overflow_error); - // } - // if (total_bytes >= get_offset64_threshold()) { - // // recompute as int64 offsets when above the threshold - // offsets_column = make_numeric_column( - // data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); - // auto d_offsets64 = offsets_column->mutable_view().template data(); - // cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets64, - // stream); - // } + auto const threshold = get_offset64_threshold(); + if (!is_large_strings_enabled()) { + CUDF_EXPECTS( + total_bytes < threshold, "Size of output exceeds the column size limit", std::overflow_error); + } + if (total_bytes >= get_offset64_threshold()) { + // recompute as int64 offsets when above the threshold + offsets_column = make_numeric_column( + data_type{type_id::INT64}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); + auto d_offsets64 = offsets_column->mutable_view().template data(); + cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets64, stream); + } return std::pair(std::move(offsets_column), total_bytes); } diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index 8e19f08a5cc..b220e6e0918 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -85,9 +85,10 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, return (item.first != nullptr ? static_cast(item.second) : size_type{0}); }); auto offsets_transformer_itr = thrust::make_transform_iterator(begin, offsets_transformer); - auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column( + auto [offsets_column, bytes] = cudf::strings::detail::make_offsets_child_column( offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr); - auto offsets_view = offsets_column->view(); + auto const d_offsets = + cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); // create null mask auto validator = [] __device__(string_index_pair const item) { return item.first != nullptr; }; @@ -97,11 +98,10 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, (null_count > 0) ? std::move(new_nulls.first) : rmm::device_buffer{0, stream, mr}; // build chars column - auto chars_data = [offsets_view, bytes = bytes, begin, strings_count, null_count, stream, mr] { + auto chars_data = [d_offsets, bytes = bytes, begin, strings_count, null_count, stream, mr] { auto const avg_bytes_per_row = bytes / std::max(strings_count - null_count, 1); // use a character-parallel kernel for long string lengths if (avg_bytes_per_row > FACTORY_BYTES_PER_ROW_THRESHOLD) { - auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_view); auto const str_begin = thrust::make_transform_iterator( begin, cuda::proclaim_return_type([] __device__(auto ip) { return string_view{ip.first, ip.second}; @@ -120,12 +120,11 @@ std::unique_ptr make_strings_column(IndexPairIterator begin, auto d_chars = chars_data.data(); auto copy_chars = [d_chars] __device__(auto item) { string_index_pair const str = thrust::get<0>(item); - size_type const offset = thrust::get<1>(item); + int64_t const offset = thrust::get<1>(item); if (str.first != nullptr) memcpy(d_chars + offset, str.first, str.second); }; thrust::for_each_n(rmm::exec_policy(stream), - thrust::make_zip_iterator( - thrust::make_tuple(begin, offsets_view.template begin())), + thrust::make_zip_iterator(thrust::make_tuple(begin, d_offsets)), strings_count, copy_chars); return chars_data; @@ -167,21 +166,15 @@ std::unique_ptr make_strings_column(CharIterator chars_begin, { CUDF_FUNC_RANGE(); size_type strings_count = thrust::distance(offsets_begin, offsets_end) - 1; - size_type bytes = std::distance(chars_begin, chars_end) * sizeof(char); - if (strings_count == 0) return make_empty_column(type_id::STRING); + if (strings_count == 0) { return make_empty_column(type_id::STRING); } + int64_t const bytes = std::distance(chars_begin, chars_end) * sizeof(char); CUDF_EXPECTS(bytes >= 0, "invalid offsets data"); // build offsets column -- this is the number of strings + 1 - auto offsets_column = make_numeric_column( - data_type{type_to_id()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr); - auto offsets_view = offsets_column->mutable_view(); - thrust::transform(rmm::exec_policy(stream), - offsets_begin, - offsets_end, - offsets_view.data(), - cuda::proclaim_return_type( - [] __device__(auto offset) { return static_cast(offset); })); + auto [offsets_column, computed_bytes] = + cudf::strings::detail::make_offsets_child_column(offsets_begin, offsets_end, stream, mr); + CUDF_EXPECTS(bytes == computed_bytes, "unexpected byte count"); // build chars column rmm::device_uvector chars_data(bytes, stream, mr); diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp index f5b8dfd96f4..d3b8cade578 100644 --- a/cpp/tests/merge/merge_string_test.cpp +++ b/cpp/tests/merge/merge_string_test.cpp @@ -414,36 +414,45 @@ TYPED_TEST(MergeStringTest, Merge2StringKeyNullColumns) class MergeLargeStringsTest : public cudf::test::BaseFixture {}; -TEST_F(MergeLargeStringsTest, DISABLED_MergeLargeStrings) +TEST_F(MergeLargeStringsTest, MergeLargeStrings) { - // CUDF_TEST_ENABLE_LARGE_STRINGS(); implemented in PR 15195 + CUDF_TEST_ENABLE_LARGE_STRINGS(); auto itr = thrust::constant_iterator( - "abcdefghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXY"); // 50 bytes - auto input = cudf::test::strings_column_wrapper(itr, itr + 5'000'000); // 250MB - std::vector input_views; - for (int i = 0; i < 10; ++i) { // 2500MB > 2GB - input_views.push_back(cudf::table_view({input})); + "abcdefghijklmnopqrstuvwxyABCDEFGHIJKLMNOPQRSTUVWXY"); // 50 bytes + auto const input = cudf::test::strings_column_wrapper(itr, itr + 5'000'000); // 250MB + auto input_views = std::vector(); + auto const view = cudf::table_view({input}); + std::vector splits; + int const multiplier = 10; + for (int i = 0; i < multiplier; ++i) { // 2500MB > 2GB + input_views.push_back(view); + splits.push_back(view.num_rows() * (i + 1)); } - std::vector column_order{cudf::order::ASCENDING}; - std::vector null_precedence{cudf::null_order::AFTER}; + splits.pop_back(); // remove last entry + auto const column_order = std::vector{cudf::order::ASCENDING}; + auto const null_precedence = std::vector{cudf::null_order::AFTER}; auto result = cudf::merge(input_views, {0}, column_order, null_precedence); auto sv = cudf::strings_column_view(result->view().column(0)); - EXPECT_EQ(sv.size(), 50'000'000); + EXPECT_EQ(sv.size(), view.num_rows() * multiplier); EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); - // verify results in sections - auto splits = std::vector({5'000'000, - 10'000'000, - 15'000'000, - 20'000'000, - 25'000'000, - 30'000'000, - 35'000'000, - 40'000'000, - 45'000'000}); auto sliced = cudf::split(sv.parent(), splits); for (auto c : sliced) { CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, input); } + + // also test with large strings column as input + input_views.clear(); + input_views.push_back(view); // regular column + input_views.push_back(result->view()); // large column + result = cudf::merge(input_views, {0}, column_order, null_precedence); + sv = cudf::strings_column_view(result->view().column(0)); + EXPECT_EQ(sv.size(), view.num_rows() * (multiplier + 1)); + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT64}); + splits.push_back(view.num_rows() * multiplier); + sliced = cudf::split(sv.parent(), splits); + for (auto c : sliced) { + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, input); + } } From 890d8d13d3e89a460842ae8938ec8c3cbeea19c8 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 8 Apr 2024 13:04:46 -0400 Subject: [PATCH 4/8] fix doxygen --- cpp/include/cudf/strings/detail/merge.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/strings/detail/merge.hpp b/cpp/include/cudf/strings/detail/merge.hpp index eeb7819757b..5b8d1011d46 100644 --- a/cpp/include/cudf/strings/detail/merge.hpp +++ b/cpp/include/cudf/strings/detail/merge.hpp @@ -29,7 +29,7 @@ namespace detail { * * @param lhs First column * @param rhs Second column - * @param row_order Indexes for each column + * @param row_order Indices for each column * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New strings column From 0d648b4ff3f60eec5f7eb51c36cfc6491d9f406a Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 10 Apr 2024 14:46:56 -0400 Subject: [PATCH 5/8] remove unneeded headers --- cpp/src/strings/merge/merge.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/strings/merge/merge.cu b/cpp/src/strings/merge/merge.cu index 0c1cd2aaca1..cee198d3dbd 100644 --- a/cpp/src/strings/merge/merge.cu +++ b/cpp/src/strings/merge/merge.cu @@ -24,10 +24,8 @@ #include #include -#include #include #include -#include namespace cudf { namespace strings { From 1f4c22fc0dbaa645a630bbde82d104e623b6dca1 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 17 Apr 2024 10:16:33 -0400 Subject: [PATCH 6/8] fix mr parameter type --- cpp/src/strings/merge/merge.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/strings/merge/merge.cu b/cpp/src/strings/merge/merge.cu index cee198d3dbd..28e171f157e 100644 --- a/cpp/src/strings/merge/merge.cu +++ b/cpp/src/strings/merge/merge.cu @@ -34,7 +34,7 @@ std::unique_ptr merge(strings_column_view const& lhs, strings_column_view const& rhs, cudf::detail::index_vector const& row_order, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::device_async_resource_ref mr) { using cudf::detail::side; if (row_order.is_empty()) { return make_empty_column(type_id::STRING); } From b2f5c3e5c11ec4d63bd7679a2b31ef44b81c18c4 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 18 Apr 2024 15:10:06 -0400 Subject: [PATCH 7/8] add 32-bit offsets test too --- cpp/include/cudf/strings/detail/merge.hpp | 8 ++------ cpp/tests/merge/merge_string_test.cpp | 12 ++++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/strings/detail/merge.hpp b/cpp/include/cudf/strings/detail/merge.hpp index 7d86a821175..35fd9c0593d 100644 --- a/cpp/include/cudf/strings/detail/merge.hpp +++ b/cpp/include/cudf/strings/detail/merge.hpp @@ -21,9 +21,7 @@ #include -namespace cudf { -namespace strings { -namespace detail { +namespace cudf ::strings ::detail { /** * @brief Merges two strings columns * @@ -40,6 +38,4 @@ std::unique_ptr merge(strings_column_view const& lhs, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -} // namespace detail -} // namespace strings -} // namespace cudf +} // namespace cudf::strings::detail diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp index d3b8cade578..d7368d31944 100644 --- a/cpp/tests/merge/merge_string_test.cpp +++ b/cpp/tests/merge/merge_string_test.cpp @@ -455,4 +455,16 @@ TEST_F(MergeLargeStringsTest, MergeLargeStrings) for (auto c : sliced) { CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(c, input); } + + // also check merge still returns 32-bit offsets for regular columns + input_views.clear(); + input_views.push_back(view); + input_views.push_back(view); + result = cudf::merge(input_views, {0}, column_order, null_precedence); + sv = cudf::strings_column_view(result->view().column(0)); + EXPECT_EQ(sv.size(), view.num_rows() * 2); + EXPECT_EQ(sv.offsets().type(), cudf::data_type{cudf::type_id::INT32}); + sliced = cudf::split(sv.parent(), {view.num_rows()}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sliced[0], input); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sliced[1], input); } From 45853b93da0c0c9a066fd73035af2d54140ad186 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 19 Apr 2024 08:46:08 -0400 Subject: [PATCH 8/8] roll if-stmt into CUDF_EXPECTS --- cpp/include/cudf/strings/detail/strings_children.cuh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index f23ff1032f1..35812c0573d 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -168,10 +168,9 @@ std::pair, int64_t> make_offsets_child_column( cudf::detail::sizes_to_offsets(input_itr, input_itr + strings_count + 1, d_offsets, stream); auto const threshold = get_offset64_threshold(); - if (!is_large_strings_enabled()) { - CUDF_EXPECTS( - total_bytes < threshold, "Size of output exceeds the column size limit", std::overflow_error); - } + CUDF_EXPECTS(is_large_strings_enabled() || (total_bytes < threshold), + "Size of output exceeds the column size limit", + std::overflow_error); if (total_bytes >= get_offset64_threshold()) { // recompute as int64 offsets when above the threshold offsets_column = make_numeric_column(