From fe4c3d48a2f54956a71e2b0af4e4fbbdae7422c6 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 21 Apr 2023 13:02:14 -0700 Subject: [PATCH 1/8] Compute child null count in get_sliced_child. --- .../cudf/structs/structs_column_view.hpp | 10 +++++-- cpp/src/structs/structs_column_view.cpp | 26 +++++++++++-------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp index ffc035b36e3..d1ec6c8178f 100644 --- a/cpp/include/cudf/structs/structs_column_view.hpp +++ b/cpp/include/cudf/structs/structs_column_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,9 @@ #include #include +#include + +#include /** * @file @@ -87,9 +90,12 @@ class structs_column_view : public column_view { * @throw cudf::logic error if this is an empty column * * @param index The index of the child column to return + * @param stream The stream on which to perform the operation. Uses the default CUDF + * stream if none is specified. * @return The child column sliced relative to the parent's offset and size */ - [[nodiscard]] column_view get_sliced_child(int index) const; + [[nodiscard]] column_view get_sliced_child( + int index, rmm::cuda_stream_view stream = cudf::get_default_stream()) const; }; // class structs_column_view; /** @} */ // end of group } // namespace cudf diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp index 7d8c8837d2d..b0284e9cb96 100644 --- a/cpp/src/structs/structs_column_view.cpp +++ b/cpp/src/structs/structs_column_view.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,9 @@ */ #include +#include #include +#include #include namespace cudf { @@ -27,22 +29,24 @@ structs_column_view::structs_column_view(column_view const& rhs) : column_view{r column_view structs_column_view::parent() const { return *this; } -column_view structs_column_view::get_sliced_child(int index) const +column_view structs_column_view::get_sliced_child(int index, rmm::cuda_stream_view stream) const { std::vector children; children.reserve(child(index).num_children()); for (size_type i = 0; i < child(index).num_children(); i++) { children.push_back(child(index).child(i)); } - return column_view{child(index).type(), - size(), - child(index).head(), - child(index).null_mask(), - // TODO: could potentially compute the actual count here, but at - // the moment this interface doesn't take a stream. - UNKNOWN_NULL_COUNT, - offset(), - children}; + + return column_view{ + child(index).type(), + size(), + child(index).head(), + child(index).null_mask(), + child(index).null_count() + ? cudf::detail::null_count(child(index).null_mask(), offset(), offset() + size(), stream) + : 0, + offset(), + children}; } } // namespace cudf From 483b1e560612bf68e5ec89c13dc37b6eb16d16b7 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 24 Apr 2023 10:15:18 -0700 Subject: [PATCH 2/8] Set struct child null count when superimposing --- cpp/src/structs/utilities.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index b4d5f3457ce..bc24add5e75 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -270,11 +270,12 @@ std::unique_ptr superimpose_nulls_no_sanitize(bitmask_type const* null_m auto content = input->release(); // Build new children columns. - std::for_each( - content.children.begin(), content.children.end(), [current_mask, stream, mr](auto& child) { - child = superimpose_nulls_no_sanitize( - current_mask, cudf::UNKNOWN_NULL_COUNT, std::move(child), stream, mr); - }); + std::for_each(content.children.begin(), + content.children.end(), + [current_mask, new_null_count, stream, mr](auto& child) { + child = superimpose_nulls_no_sanitize( + current_mask, new_null_count, std::move(child), stream, mr); + }); // Replace the children columns. return cudf::make_structs_column(num_rows, From 56a914cb1deb256422b74dae678b0ad128e3f355 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 24 Apr 2023 10:21:18 -0700 Subject: [PATCH 3/8] Compute null count when copying list slice --- cpp/src/lists/copying/copying.cu | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/src/lists/copying/copying.cu b/cpp/src/lists/copying/copying.cu index be316bd644e..1585981f23d 100644 --- a/cpp/src/lists/copying/copying.cu +++ b/cpp/src/lists/copying/copying.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "cudf/types.hpp" #include #include #include @@ -81,10 +82,13 @@ std::unique_ptr copy_slice(lists_column_view const& lists, // Compute the null mask of the result: auto null_mask = cudf::detail::copy_bitmask(lists.null_mask(), start, end, stream, mr); + auto null_count = cudf::detail::null_count( + static_cast(null_mask.data()), 0, end - start, stream); + return make_lists_column(lists_count, std::move(offsets), std::move(child), - cudf::UNKNOWN_NULL_COUNT, + null_count, std::move(null_mask), stream, mr); From 24737144d42b98b6f2821f572aee79259e108983 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 24 Apr 2023 10:29:50 -0700 Subject: [PATCH 4/8] Compute null count when copying string slice --- cpp/src/strings/copying/copying.cu | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/src/strings/copying/copying.cu b/cpp/src/strings/copying/copying.cu index 23406444cfd..e6796c2209b 100644 --- a/cpp/src/strings/copying/copying.cu +++ b/cpp/src/strings/copying/copying.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,10 +73,13 @@ std::unique_ptr copy_slice(strings_column_view const& strings, auto null_mask = cudf::detail::copy_bitmask( strings.null_mask(), offsets_offset, offsets_offset + strings_count, stream, mr); + auto null_count = cudf::detail::null_count( + static_cast(null_mask.data()), 0, strings_count, stream); + return make_strings_column(strings_count, std::move(offsets_column), std::move(chars_column), - UNKNOWN_NULL_COUNT, + null_count, std::move(null_mask)); } From 8604adbcfece5f2fbf3897d6882e092870b7b338 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 24 Apr 2023 10:36:03 -0700 Subject: [PATCH 5/8] Fix include --- cpp/src/lists/copying/copying.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/lists/copying/copying.cu b/cpp/src/lists/copying/copying.cu index 1585981f23d..0e4b631d56b 100644 --- a/cpp/src/lists/copying/copying.cu +++ b/cpp/src/lists/copying/copying.cu @@ -13,13 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "cudf/types.hpp" #include #include #include #include #include #include +#include #include #include From 6f3b0f6eb6a2ebc4a17a3f6b93a935d479c50e14 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 25 Apr 2023 15:50:45 -0700 Subject: [PATCH 6/8] Forward streams to all internal calls to the function --- cpp/include/cudf/detail/structs/utilities.hpp | 2 +- cpp/src/copying/concatenate.cu | 2 +- cpp/src/copying/contiguous_split.cu | 30 ++++--- cpp/src/structs/copying/concatenate.cu | 2 +- cpp/src/structs/utilities.cpp | 8 +- cpp/tests/structs/utilities_tests.cpp | 85 ++++++++++++------- cpp/tests/utilities/column_utilities.cu | 10 +-- 7 files changed, 85 insertions(+), 54 deletions(-) diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 5fcc331a382..c0a79142cef 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -79,7 +79,7 @@ struct temporary_nullable_data { * @return New column with concatenated results. */ std::vector> extract_ordered_struct_children( - host_span struct_cols); + host_span struct_cols, rmm::cuda_stream_view stream); /** * @brief Check whether the specified column is of type LIST, or any LISTs in its descendent diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index ad7068eb106..c42cc5c69f9 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -420,7 +420,7 @@ void traverse_children::operator()(host_span setup_source_buf_info(InputIter begin, InputIter end, src_buf_info* head, src_buf_info* current, + rmm::cuda_stream_view stream, int offset_stack_pos = 0, int parent_offset_index = -1, int offset_depth = 0); @@ -449,7 +450,8 @@ struct buf_info_functor { src_buf_info* current, int offset_stack_pos, int parent_offset_index, - int offset_depth) + int offset_depth, + rmm::cuda_stream_view stream) { if (col.nullable()) { std::tie(current, offset_stack_pos) = @@ -491,7 +493,8 @@ std::pair buf_info_functor::operator() buf_info_functor::operator() buf_info_functor::operator() buf_info_functor::operator() buf_info_functor::operator() sliced_children; sliced_children.reserve(scv.num_children()); - std::transform(thrust::make_counting_iterator(0), - thrust::make_counting_iterator(scv.num_children()), - std::back_inserter(sliced_children), - [&scv](size_type child_index) { return scv.get_sliced_child(child_index); }); + std::transform( + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(scv.num_children()), + std::back_inserter(sliced_children), + [&scv, &stream](size_type child_index) { return scv.get_sliced_child(child_index, stream); }); return setup_source_buf_info(sliced_children.begin(), sliced_children.end(), head, current, + stream, offset_stack_pos, parent_offset_index, offset_depth); @@ -634,6 +642,7 @@ std::pair setup_source_buf_info(InputIter begin, InputIter end, src_buf_info* head, src_buf_info* current, + rmm::cuda_stream_view stream, int offset_stack_pos, int parent_offset_index, int offset_depth) @@ -645,7 +654,8 @@ std::pair setup_source_buf_info(InputIter begin, current, offset_stack_pos, parent_offset_index, - offset_depth); + offset_depth, + stream); }); return {current, offset_stack_pos}; } @@ -1044,7 +1054,7 @@ std::vector contiguous_split(cudf::table_view const& input, std::copy(splits.begin(), splits.end(), std::next(h_indices)); // setup source buf info - setup_source_buf_info(input.begin(), input.end(), h_src_buf_info, h_src_buf_info); + setup_source_buf_info(input.begin(), input.end(), h_src_buf_info, h_src_buf_info, stream); // HtoD indices and source buf info to device CUDF_CUDA_TRY(cudaMemcpyAsync( diff --git a/cpp/src/structs/copying/concatenate.cu b/cpp/src/structs/copying/concatenate.cu index 105d84ac14a..19552b2dc03 100644 --- a/cpp/src/structs/copying/concatenate.cu +++ b/cpp/src/structs/copying/concatenate.cu @@ -42,7 +42,7 @@ std::unique_ptr concatenate(host_span columns, rmm::mr::device_memory_resource* mr) { // get ordered children - auto ordered_children = extract_ordered_struct_children(columns); + auto ordered_children = extract_ordered_struct_children(columns, stream); // concatenate them std::vector> children; diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index bc24add5e75..ea244260161 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -36,7 +36,7 @@ namespace cudf::structs::detail { * @copydoc cudf::structs::detail::extract_ordered_struct_children */ std::vector> extract_ordered_struct_children( - host_span struct_cols) + host_span struct_cols, rmm::cuda_stream_view stream) { auto const num_children = struct_cols[0].num_children(); auto const num_cols = static_cast(struct_cols.size()); @@ -56,7 +56,7 @@ std::vector> extract_ordered_struct_children( "Mismatch in number of children during struct concatenate"); CUDF_EXPECTS(struct_cols[0].child(child_index).type() == scv.child(child_index).type(), "Mismatch in child types during struct concatenate"); - children.push_back(scv.get_sliced_child(child_index)); + children.push_back(scv.get_sliced_child(child_index, stream)); } result.push_back(std::move(children)); @@ -161,7 +161,7 @@ struct table_flattener { if (not null_precedence.empty()) { flat_null_precedence.push_back(col_null_order); } } for (decltype(col.num_children()) i = 0; i < col.num_children(); ++i) { - auto const& child = col.get_sliced_child(i); + auto const& child = col.get_sliced_child(i, stream); if (child.type().id() == type_id::STRUCT) { flatten_struct_column(structs_column_view{child}, col_order, col_null_order); } else { @@ -305,7 +305,7 @@ std::pair push_down_nulls_no_sanitize( // Function to rewrite child null mask. auto const child_with_new_mask = [&](auto const& child_idx) { - auto child = structs_view.get_sliced_child(child_idx); + auto child = structs_view.get_sliced_child(child_idx, stream); // If struct is not nullable, child null mask is retained. NOOP. if (not structs_view.nullable()) { return child; } diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index 79be0757162..183056563fc 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "cudf_test/default_stream.hpp" #include #include #include @@ -120,12 +121,12 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) auto expected_nums_col_1 = cudf::column(nums_col); auto expected_structs_col = cudf::test::fixed_width_column_wrapper{{1, 1, 1, 1, 1, 1, 1}}; - auto expected_nums_col_2 = - cudf::column(static_cast(structs_col).get_sliced_child(0)); - auto expected_strings_col = - cudf::column(static_cast(structs_col).get_sliced_child(1)); - auto expected = cudf::table_view{ - {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; + auto expected_nums_col_2 = cudf::column(static_cast(structs_col) + .get_sliced_child(0, cudf::get_default_stream())); + auto expected_strings_col = cudf::column(static_cast(structs_col) + .get_sliced_child(1, cudf::get_default_stream())); + auto expected = cudf::table_view{ + {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; auto flattened_table = cudf::structs::detail::flatten_nested_columns(table, @@ -153,12 +154,12 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) auto expected_nums_col_1 = cudf::column(nums_col); auto expected_structs_col = cudf::test::fixed_width_column_wrapper{ {1, 1, 0, 1, 1, 1, 1}, cudf::test::iterators::null_at(2)}; - auto expected_nums_col_2 = - cudf::column(static_cast(structs_col).get_sliced_child(0)); - auto expected_strings_col = - cudf::column(static_cast(structs_col).get_sliced_child(1)); - auto expected = cudf::table_view{ - {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; + auto expected_nums_col_2 = cudf::column(static_cast(structs_col) + .get_sliced_child(0, cudf::get_default_stream())); + auto expected_strings_col = cudf::column(static_cast(structs_col) + .get_sliced_child(1, cudf::get_default_stream())); + auto expected = cudf::table_view{ + {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}}; auto flattened_table = cudf::structs::detail::flatten_nested_columns(table, @@ -191,12 +192,17 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) auto expected_nums_col_1 = cudf::column(nums_col); auto expected_structs_col_1 = cudf::test::fixed_width_column_wrapper{{1, 1, 1, 1, 1, 1, 1}}; auto expected_nums_col_2 = - cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(0, cudf::get_default_stream())); auto expected_structs_col_2 = cudf::test::fixed_width_column_wrapper{{1, 1, 1, 1, 1, 1, 1}}; - auto expected_nums_col_3 = cudf::column( - static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); - auto expected_strings_col = cudf::column( - static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected_nums_col_3 = + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(1, cudf::get_default_stream()) + .child(0)); + auto expected_strings_col = + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(1, cudf::get_default_stream()) + .child(1)); auto expected = cudf::table_view{{expected_nums_col_1, expected_structs_col_1, expected_nums_col_2, @@ -235,13 +241,18 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) auto expected_nums_col_1 = cudf::column(nums_col); auto expected_structs_col_1 = cudf::test::fixed_width_column_wrapper{{1, 1, 1, 1, 1, 1, 1}}; auto expected_nums_col_2 = - cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(0, cudf::get_default_stream())); auto expected_structs_col_2 = cudf::test::fixed_width_column_wrapper{ {1, 1, 0, 1, 1, 1, 1}, cudf::test::iterators::null_at(2)}; - auto expected_nums_col_3 = cudf::column( - static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); - auto expected_strings_col = cudf::column( - static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected_nums_col_3 = + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(1, cudf::get_default_stream()) + .child(0)); + auto expected_strings_col = + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(1, cudf::get_default_stream()) + .child(1)); auto expected = cudf::table_view{{expected_nums_col_1, expected_structs_col_1, expected_nums_col_2, @@ -281,13 +292,18 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) auto expected_structs_col_1 = cudf::test::fixed_width_column_wrapper{ {1, 1, 1, 1, 0, 1, 1}, cudf::test::iterators::null_at(4)}; auto expected_nums_col_2 = - cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(0, cudf::get_default_stream())); auto expected_structs_col_2 = cudf::test::fixed_width_column_wrapper{ {1, 1, 1, 1, 0, 1, 1}, cudf::test::iterators::null_at(4)}; - auto expected_nums_col_3 = cudf::column( - static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); - auto expected_strings_col = cudf::column( - static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected_nums_col_3 = + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(1, cudf::get_default_stream()) + .child(0)); + auto expected_strings_col = + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(1, cudf::get_default_stream()) + .child(1)); auto expected = cudf::table_view{{expected_nums_col_1, expected_structs_col_1, expected_nums_col_2, @@ -327,13 +343,18 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) auto expected_structs_col_1 = cudf::test::fixed_width_column_wrapper{ {1, 1, 1, 1, 0, 1, 1}, cudf::test::iterators::null_at(4)}; auto expected_nums_col_2 = - cudf::column(static_cast(struct_of_structs_col).get_sliced_child(0)); + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(0, cudf::get_default_stream())); auto expected_structs_col_2 = cudf::test::fixed_width_column_wrapper{{1, 1, 0, 1, 0, 1, 1}, {1, 1, 0, 1, 0, 1, 1}}; - auto expected_nums_col_3 = cudf::column( - static_cast(struct_of_structs_col).get_sliced_child(1).child(0)); - auto expected_strings_col = cudf::column( - static_cast(struct_of_structs_col).get_sliced_child(1).child(1)); + auto expected_nums_col_3 = + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(1, cudf::get_default_stream()) + .child(0)); + auto expected_strings_col = + cudf::column(static_cast(struct_of_structs_col) + .get_sliced_child(1, cudf::get_default_stream()) + .child(1)); auto expected = cudf::table_view{{expected_nums_col_1, expected_structs_col_1, expected_nums_col_2, diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index f391c9cfb57..cef9c143c9d 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -355,8 +355,8 @@ struct column_property_comparator { structs_column_view r_scv(rhs); for (size_type i = 0; i < lhs.num_children(); i++) { - column_view lhs_child = l_scv.get_sliced_child(i); - column_view rhs_child = r_scv.get_sliced_child(i); + column_view lhs_child = l_scv.get_sliced_child(i, cudf::get_default_stream()); + column_view rhs_child = r_scv.get_sliced_child(i, cudf::get_default_stream()); if (!cudf::type_dispatcher(lhs_child.type(), column_property_comparator{}, lhs_child, @@ -746,8 +746,8 @@ struct column_comparator_impl { structs_column_view r_scv(rhs); for (size_type i = 0; i < lhs.num_children(); i++) { - column_view lhs_child = l_scv.get_sliced_child(i); - column_view rhs_child = r_scv.get_sliced_child(i); + column_view lhs_child = l_scv.get_sliced_child(i, cudf::get_default_stream()); + column_view rhs_child = r_scv.get_sliced_child(i, cudf::get_default_stream()); if (!cudf::type_dispatcher(lhs_child.type(), column_comparator{}, lhs_child, @@ -1205,7 +1205,7 @@ struct column_view_printer { iter + view.num_children(), std::ostream_iterator(out_stream, "\n"), [&](size_type index) { - auto child = view.get_sliced_child(index); + auto child = view.get_sliced_child(index, cudf::get_default_stream()); // non-nested types don't typically display their null masks, so do it here for convenience. return (!is_nested(child.type()) && child.nullable() From 55e49d7e1b13fd70ca3c4938c517ba71420d5a2a Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 25 Apr 2023 16:05:50 -0700 Subject: [PATCH 7/8] Forward streams to all internal calls to get_sliced_child --- cpp/include/cudf/detail/gather.cuh | 4 ++-- cpp/src/io/json/write_json.cu | 4 ++-- cpp/src/io/parquet/writer_impl.cu | 2 +- cpp/src/merge/merge.cu | 8 ++++++-- cpp/src/reductions/scan/scan_inclusive.cu | 4 ++-- cpp/src/reshape/interleave_columns.cu | 6 +++--- cpp/src/transform/row_bit_count.cu | 2 +- 7 files changed, 17 insertions(+), 13 deletions(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 5460a0e5a76..955f9914632 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -461,8 +461,8 @@ struct column_gatherer_impl { std::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(column.num_children()), std::back_inserter(sliced_children), - [structs_view = structs_column_view{column}](auto const idx) { - return structs_view.get_sliced_child(idx); + [&stream, structs_view = structs_column_view{column}](auto const idx) { + return structs_view.get_sliced_child(idx, stream); }); std::vector> output_struct_members; diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index aedcafb7aee..817093c8899 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -473,8 +473,8 @@ struct column_to_strings_fn { operator()(column_view const& column, host_span children_names) const { auto const child_it = cudf::detail::make_counting_transform_iterator( - 0, [structs_view = structs_column_view{column}](auto const child_idx) { - return structs_view.get_sliced_child(child_idx); + 0, [&stream = stream_, structs_view = structs_column_view{column}](auto const child_idx) { + return structs_view.get_sliced_child(child_idx, stream); }); auto col_string = operator()(child_it, child_it + column.num_children(), children_names); col_string->set_null_mask(cudf::detail::copy_bitmask(column, stream_, mr_), diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 79581c0d21c..61145007b59 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -101,7 +101,7 @@ size_t column_size(column_view const& column, rmm::cuda_stream_view stream) auto const scol = structs_column_view(column); size_t ret = 0; for (int i = 0; i < scol.num_children(); i++) { - ret += column_size(scol.get_sliced_child(i), stream); + ret += column_size(scol.get_sliced_child(i, stream), stream); } return ret; } else if (column.type().id() == type_id::LIST) { diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index 83ee6793efb..9fc58504d0b 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -364,8 +364,12 @@ std::unique_ptr column_merger::operator()( auto it = cudf::detail::make_counting_transform_iterator( 0, [&, merger = column_merger{row_order_}](size_type i) { - return cudf::type_dispatcher( - lhs.child(i).type(), merger, lhs.get_sliced_child(i), rhs.get_sliced_child(i), stream, mr); + return cudf::type_dispatcher(lhs.child(i).type(), + merger, + lhs.get_sliced_child(i, stream), + rhs.get_sliced_child(i, stream), + stream, + mr); }); auto merged_children = std::vector>(it, it + lhs.num_children()); diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 1b83b5b1105..3c7645dc8d7 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -173,8 +173,8 @@ struct scan_functor { // handle input in case it is a sliced view. auto const input_children = [&] { auto const it = cudf::detail::make_counting_transform_iterator( - 0, [structs_view = structs_column_view{input}, stream](auto const child_idx) { - return structs_view.get_sliced_child(child_idx); + 0, [structs_view = structs_column_view{input}, &stream](auto const child_idx) { + return structs_view.get_sliced_child(child_idx, stream); }); return std::vector(it, it + input.num_children()); }(); diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 43807e19477..015bdd02eca 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -93,9 +93,9 @@ struct interleave_columns_impl> output_struct_members; for (size_type child_idx = 0; child_idx < num_children; ++child_idx) { // Collect children columns from the input structs columns at index `child_idx`. - auto const child_iter = - thrust::make_transform_iterator(structs_columns.begin(), [child_idx](auto const& col) { - return structs_column_view(col).get_sliced_child(child_idx); + auto const child_iter = thrust::make_transform_iterator( + structs_columns.begin(), [&stream = stream, child_idx](auto const& col) { + return structs_column_view(col).get_sliced_child(child_idx, stream); }); auto children = std::vector(child_iter, child_iter + num_columns); diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index b982a010e6e..1507a8ce7c6 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -246,7 +246,7 @@ struct flatten_functor { structs_column_view scv(col); auto iter = cudf::detail::make_counting_transform_iterator( - 0, [&scv](auto i) { return scv.get_sliced_child(i); }); + 0, [&scv, &stream](auto i) { return scv.get_sliced_child(i, stream); }); flatten_hierarchy(iter, iter + scv.num_children(), out, From 8e8af24889b40f80f004d6549421bb9627bf2709 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 25 Apr 2023 16:10:00 -0700 Subject: [PATCH 8/8] Format --- cpp/include/cudf/structs/structs_column_view.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp index d1ec6c8178f..6a9f2890177 100644 --- a/cpp/include/cudf/structs/structs_column_view.hpp +++ b/cpp/include/cudf/structs/structs_column_view.hpp @@ -91,7 +91,7 @@ class structs_column_view : public column_view { * * @param index The index of the child column to return * @param stream The stream on which to perform the operation. Uses the default CUDF - * stream if none is specified. + * stream if none is specified. * @return The child column sliced relative to the parent's offset and size */ [[nodiscard]] column_view get_sliced_child(