From 8e1ef05b2b96775ce7e1a2f22894ec7a8ebb65a4 Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Thu, 16 Nov 2023 16:43:29 -0500 Subject: [PATCH] Change `nullable()` to `has_nulls()` in `cudf::detail::gather` (#14363) In https://github.com/rapidsai/cudf/pull/13795, we found out that `nullable()` causes severe perf degradation for the nested-type case when the input is read from file via `cudf::io::read_json`. This is because the JSON reader adds a null mask for columns that don't have NULLs. This change is a no-overhead replacement that checks the actual null count instead of checking if a null mask is present. This PR also solves a bug in quantile/median groupby where NULLs were being [set](https://github.com/rapidsai/cudf/blob/8deb3dd7573000e7d87f18a9e2bbe39cf2932e10/cpp/src/groupby/sort/group_quantiles.cu#L73) but the null count was not updated. Authors: - Divye Gala (https://github.com/divyegala) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/14363 --- cpp/include/cudf/detail/gather.cuh | 22 ++++++---- cpp/include/cudf/detail/null_mask.hpp | 17 ++++++++ cpp/include/cudf/table/table_view.hpp | 17 ++++++++ cpp/src/bitmask/null_mask.cu | 15 +++++++ cpp/src/groupby/sort/group_quantiles.cu | 17 +++++--- cpp/tests/join/join_tests.cpp | 56 ------------------------- 6 files changed, 75 insertions(+), 69 deletions(-) diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 955f9914632..c9975ef2199 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -673,14 +673,20 @@ std::unique_ptr gather(table_view const& source_table, mr)); } - auto const nullable = bounds_policy == out_of_bounds_policy::NULLIFY || - std::any_of(source_table.begin(), source_table.end(), [](auto const& col) { - return col.nullable(); - }); - if (nullable) { - auto const op = bounds_policy == out_of_bounds_policy::NULLIFY ? gather_bitmask_op::NULLIFY - : gather_bitmask_op::DONT_CHECK; - gather_bitmask(source_table, gather_map_begin, destination_columns, op, stream, mr); + auto needs_new_bitmask = bounds_policy == out_of_bounds_policy::NULLIFY || + cudf::has_nested_nullable_columns(source_table); + if (needs_new_bitmask) { + needs_new_bitmask = needs_new_bitmask || cudf::has_nested_nulls(source_table); + if (needs_new_bitmask) { + auto const op = bounds_policy == out_of_bounds_policy::NULLIFY + ? gather_bitmask_op::NULLIFY + : gather_bitmask_op::DONT_CHECK; + gather_bitmask(source_table, gather_map_begin, destination_columns, op, stream, mr); + } else { + for (size_type i = 0; i < source_table.num_columns(); ++i) { + set_all_valid_null_masks(source_table.column(i), *destination_columns[i], stream, mr); + } + } } return std::make_unique
(std::move(destination_columns)); diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 8c10bbe416f..74e2ccd2ea1 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -259,6 +260,22 @@ cudf::size_type inplace_bitmask_and(device_span dest_mask, size_type mask_size_bits, rmm::cuda_stream_view stream); +/** + * @brief Recursively set valid null masks for all children. + * + * This function applies all valid null masks to the output column if input column satisfies + * `nullable() == true` condition + * + * @param input input column to check for nullability + * @param output output column to mirror nullability of input + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +void set_all_valid_null_masks(column_view const& input, + column& output, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + } // namespace detail } // namespace cudf diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index b90b2dac012..5d9c930d137 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -336,6 +336,23 @@ inline bool has_nested_nulls(table_view const& input) }); } +/** + * @brief Returns True if the table has a nullable column at any level of the column hierarchy + * + * @param input The table to check for nullable columns + * @return True if the table has nullable columns at any level of the column hierarchy, false + * otherwise + */ +inline bool has_nested_nullable_columns(table_view const& input) +{ + return std::any_of(input.begin(), input.end(), [](auto const& col) { + return col.nullable() || + std::any_of(col.child_begin(), col.child_end(), [](auto const& child_col) { + return has_nested_nullable_columns(table_view{{child_col}}); + }); + }); +} + /** * @brief The function to collect all nullable columns at all nested levels in a given table. * diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index 3ff56eabe1e..1a1cbb17d15 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -510,6 +510,21 @@ std::pair bitmask_or(table_view const& view, return std::pair(std::move(null_mask), 0); } +void set_all_valid_null_masks(column_view const& input, + column& output, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + if (input.nullable()) { + auto mask = detail::create_null_mask(output.size(), mask_state::ALL_VALID, stream, mr); + output.set_null_mask(std::move(mask), 0); + + for (size_type i = 0; i < input.num_children(); ++i) { + set_all_valid_null_masks(input.child(i), output.child(i), stream, mr); + } + } +} + } // namespace detail // Create a bitmask from a specific range diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index a9edcfecbf7..a456d4b5964 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -49,6 +49,7 @@ struct calculate_quantile_fn { double const* d_quantiles; size_type num_quantiles; interpolation interpolation; + size_type* null_count; __device__ void operator()(size_type i) { @@ -68,11 +69,13 @@ struct calculate_quantile_fn { thrust::for_each_n(thrust::seq, thrust::make_counting_iterator(0), num_quantiles, - [d_result = d_result, segment_size, offset](size_type j) { - if (segment_size == 0) + [d_result = d_result, segment_size, offset, this](size_type j) { + if (segment_size == 0) { d_result.set_null(offset + j); - else + atomicAdd(this->null_count, 1); + } else { d_result.set_valid(offset + j); + } }); } }; @@ -104,6 +107,7 @@ struct quantiles_functor { auto values_view = column_device_view::create(values, stream); auto group_size_view = column_device_view::create(group_sizes, stream); auto result_view = mutable_column_device_view::create(result->mutable_view(), stream); + auto null_count = rmm::device_scalar(0, stream, mr); // For each group, calculate quantile if (!cudf::is_dictionary(values.type())) { @@ -118,7 +122,8 @@ struct quantiles_functor { group_offsets.data(), quantile.data(), static_cast(quantile.size()), - interpolation}); + interpolation, + null_count.data()}); } else { auto values_iter = cudf::dictionary::detail::make_dictionary_iterator(*values_view); thrust::for_each_n(rmm::exec_policy(stream), @@ -131,9 +136,11 @@ struct quantiles_functor { group_offsets.data(), quantile.data(), static_cast(quantile.size()), - interpolation}); + interpolation, + null_count.data()}); } + result->set_null_count(null_count.value(stream)); return result; } diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 089db315748..a416df0c7c3 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -1941,62 +1941,6 @@ TEST_F(JoinTest, FullJoinWithStructsAndNulls) CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result); } -TEST_F(JoinTest, Repro_StructsWithoutNullsPushedDown) -{ - // When joining on a STRUCT column, if the parent nulls are not reflected in - // the children, the join might produce incorrect results. - // - // In this test, a fact table of structs is joined against a dimension table. - // Both tables must match (only) on the NULL row. This will fail if the fact table's - // nulls are not pushed down into its children. - using ints = column_wrapper; - using structs = cudf::test::structs_column_wrapper; - using namespace cudf::test::iterators; - - auto make_table = [](auto&& col) { - auto columns = CVector{}; - columns.push_back(std::move(col)); - return cudf::table{std::move(columns)}; - }; - - auto const fact_table = [make_table] { - auto fact_ints = ints{0, 1, 2, 3, 4}; - auto fact_structs = structs{{fact_ints}, no_nulls()}.release(); - // Now set struct validity to invalidate index#3. - cudf::detail::set_null_mask( - fact_structs->mutable_view().null_mask(), 3, 4, false, cudf::get_default_stream()); - // Struct row#3 is null, but Struct.child has a non-null value. - return make_table(std::move(fact_structs)); - }(); - - auto const dimension_table = [make_table] { - auto dim_ints = ints{999}; - auto dim_structs = structs{{dim_ints}, null_at(0)}; - return make_table(dim_structs.release()); - }(); - - auto const result = inner_join(fact_table.view(), dimension_table.view(), {0}, {0}); - EXPECT_EQ(result->num_rows(), 1); // The null STRUCT rows should match. - - // Note: Join result might not have nulls pushed down, since it's an output of gather(). - // Must superimpose parent nulls before comparisons. - auto [superimposed_results, _] = cudf::structs::detail::push_down_nulls( - *result, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); - - auto const expected = [] { - auto fact_ints = ints{0}; - auto fact_structs = structs{{fact_ints}, null_at(0)}; - auto dim_ints = ints{0}; - auto dim_structs = structs{{dim_ints}, null_at(0)}; - auto columns = CVector{}; - columns.push_back(fact_structs.release()); - columns.push_back(dim_structs.release()); - return cudf::table{std::move(columns)}; - }(); - - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(superimposed_results, expected); -} - using lcw = cudf::test::lists_column_wrapper; using cudf::test::iterators::null_at;