Skip to content
/ cudf Public
forked from rapidsai/cudf

Commit

Permalink
Change nullable() to has_nulls() in cudf::detail::gather (rapid…
Browse files Browse the repository at this point in the history
…sai#14363)

In rapidsai#13795, we found out that `nullable()` causes severe perf degradation for the nested-type case when the input is read from file via `cudf::io::read_json`. This is because the JSON reader adds a null mask for columns that don't have NULLs. This change is a no-overhead replacement that checks the actual null count instead of checking if a null mask is present.

This PR also solves a bug in quantile/median groupby where NULLs were being [set](https://github.com/rapidsai/cudf/blob/8deb3dd7573000e7d87f18a9e2bbe39cf2932e10/cpp/src/groupby/sort/group_quantiles.cu#L73) but the null count was not updated.

Authors:
  - Divye Gala (https://github.com/divyegala)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)

URL: rapidsai#14363
  • Loading branch information
divyegala authored Nov 16, 2023
1 parent 53127de commit 8e1ef05
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 69 deletions.
22 changes: 14 additions & 8 deletions cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -673,14 +673,20 @@ std::unique_ptr<table> gather(table_view const& source_table,
mr));
}

auto const nullable = bounds_policy == out_of_bounds_policy::NULLIFY ||
std::any_of(source_table.begin(), source_table.end(), [](auto const& col) {
return col.nullable();
});
if (nullable) {
auto const op = bounds_policy == out_of_bounds_policy::NULLIFY ? gather_bitmask_op::NULLIFY
: gather_bitmask_op::DONT_CHECK;
gather_bitmask(source_table, gather_map_begin, destination_columns, op, stream, mr);
auto needs_new_bitmask = bounds_policy == out_of_bounds_policy::NULLIFY ||
cudf::has_nested_nullable_columns(source_table);
if (needs_new_bitmask) {
needs_new_bitmask = needs_new_bitmask || cudf::has_nested_nulls(source_table);
if (needs_new_bitmask) {
auto const op = bounds_policy == out_of_bounds_policy::NULLIFY
? gather_bitmask_op::NULLIFY
: gather_bitmask_op::DONT_CHECK;
gather_bitmask(source_table, gather_map_begin, destination_columns, op, stream, mr);
} else {
for (size_type i = 0; i < source_table.num_columns(); ++i) {
set_all_valid_null_masks(source_table.column(i), *destination_columns[i], stream, mr);
}
}
}

return std::make_unique<table>(std::move(destination_columns));
Expand Down
17 changes: 17 additions & 0 deletions cpp/include/cudf/detail/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/span.hpp>
Expand Down Expand Up @@ -259,6 +260,22 @@ cudf::size_type inplace_bitmask_and(device_span<bitmask_type> dest_mask,
size_type mask_size_bits,
rmm::cuda_stream_view stream);

/**
* @brief Recursively set valid null masks for all children.
*
* This function applies all valid null masks to the output column if input column satisfies
* `nullable() == true` condition
*
* @param input input column to check for nullability
* @param output output column to mirror nullability of input
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
*/
void set_all_valid_null_masks(column_view const& input,
column& output,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail

} // namespace cudf
17 changes: 17 additions & 0 deletions cpp/include/cudf/table/table_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,23 @@ inline bool has_nested_nulls(table_view const& input)
});
}

/**
* @brief Returns True if the table has a nullable column at any level of the column hierarchy
*
* @param input The table to check for nullable columns
* @return True if the table has nullable columns at any level of the column hierarchy, false
* otherwise
*/
inline bool has_nested_nullable_columns(table_view const& input)
{
return std::any_of(input.begin(), input.end(), [](auto const& col) {
return col.nullable() ||
std::any_of(col.child_begin(), col.child_end(), [](auto const& child_col) {
return has_nested_nullable_columns(table_view{{child_col}});
});
});
}

/**
* @brief The function to collect all nullable columns at all nested levels in a given table.
*
Expand Down
15 changes: 15 additions & 0 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,21 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
return std::pair(std::move(null_mask), 0);
}

void set_all_valid_null_masks(column_view const& input,
column& output,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (input.nullable()) {
auto mask = detail::create_null_mask(output.size(), mask_state::ALL_VALID, stream, mr);
output.set_null_mask(std::move(mask), 0);

for (size_type i = 0; i < input.num_children(); ++i) {
set_all_valid_null_masks(input.child(i), output.child(i), stream, mr);
}
}
}

} // namespace detail

// Create a bitmask from a specific range
Expand Down
17 changes: 12 additions & 5 deletions cpp/src/groupby/sort/group_quantiles.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ struct calculate_quantile_fn {
double const* d_quantiles;
size_type num_quantiles;
interpolation interpolation;
size_type* null_count;

__device__ void operator()(size_type i)
{
Expand All @@ -68,11 +69,13 @@ struct calculate_quantile_fn {
thrust::for_each_n(thrust::seq,
thrust::make_counting_iterator(0),
num_quantiles,
[d_result = d_result, segment_size, offset](size_type j) {
if (segment_size == 0)
[d_result = d_result, segment_size, offset, this](size_type j) {
if (segment_size == 0) {
d_result.set_null(offset + j);
else
atomicAdd(this->null_count, 1);
} else {
d_result.set_valid(offset + j);
}
});
}
};
Expand Down Expand Up @@ -104,6 +107,7 @@ struct quantiles_functor {
auto values_view = column_device_view::create(values, stream);
auto group_size_view = column_device_view::create(group_sizes, stream);
auto result_view = mutable_column_device_view::create(result->mutable_view(), stream);
auto null_count = rmm::device_scalar<cudf::size_type>(0, stream, mr);

// For each group, calculate quantile
if (!cudf::is_dictionary(values.type())) {
Expand All @@ -118,7 +122,8 @@ struct quantiles_functor {
group_offsets.data(),
quantile.data(),
static_cast<size_type>(quantile.size()),
interpolation});
interpolation,
null_count.data()});
} else {
auto values_iter = cudf::dictionary::detail::make_dictionary_iterator<T>(*values_view);
thrust::for_each_n(rmm::exec_policy(stream),
Expand All @@ -131,9 +136,11 @@ struct quantiles_functor {
group_offsets.data(),
quantile.data(),
static_cast<size_type>(quantile.size()),
interpolation});
interpolation,
null_count.data()});
}

result->set_null_count(null_count.value(stream));
return result;
}

Expand Down
56 changes: 0 additions & 56 deletions cpp/tests/join/join_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1941,62 +1941,6 @@ TEST_F(JoinTest, FullJoinWithStructsAndNulls)
CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*sorted_gold, *sorted_result);
}

TEST_F(JoinTest, Repro_StructsWithoutNullsPushedDown)
{
// When joining on a STRUCT column, if the parent nulls are not reflected in
// the children, the join might produce incorrect results.
//
// In this test, a fact table of structs is joined against a dimension table.
// Both tables must match (only) on the NULL row. This will fail if the fact table's
// nulls are not pushed down into its children.
using ints = column_wrapper<int32_t>;
using structs = cudf::test::structs_column_wrapper;
using namespace cudf::test::iterators;

auto make_table = [](auto&& col) {
auto columns = CVector{};
columns.push_back(std::move(col));
return cudf::table{std::move(columns)};
};

auto const fact_table = [make_table] {
auto fact_ints = ints{0, 1, 2, 3, 4};
auto fact_structs = structs{{fact_ints}, no_nulls()}.release();
// Now set struct validity to invalidate index#3.
cudf::detail::set_null_mask(
fact_structs->mutable_view().null_mask(), 3, 4, false, cudf::get_default_stream());
// Struct row#3 is null, but Struct.child has a non-null value.
return make_table(std::move(fact_structs));
}();

auto const dimension_table = [make_table] {
auto dim_ints = ints{999};
auto dim_structs = structs{{dim_ints}, null_at(0)};
return make_table(dim_structs.release());
}();

auto const result = inner_join(fact_table.view(), dimension_table.view(), {0}, {0});
EXPECT_EQ(result->num_rows(), 1); // The null STRUCT rows should match.

// Note: Join result might not have nulls pushed down, since it's an output of gather().
// Must superimpose parent nulls before comparisons.
auto [superimposed_results, _] = cudf::structs::detail::push_down_nulls(
*result, cudf::get_default_stream(), rmm::mr::get_current_device_resource());

auto const expected = [] {
auto fact_ints = ints{0};
auto fact_structs = structs{{fact_ints}, null_at(0)};
auto dim_ints = ints{0};
auto dim_structs = structs{{dim_ints}, null_at(0)};
auto columns = CVector{};
columns.push_back(fact_structs.release());
columns.push_back(dim_structs.release());
return cudf::table{std::move(columns)};
}();

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(superimposed_results, expected);
}

using lcw = cudf::test::lists_column_wrapper<int32_t>;
using cudf::test::iterators::null_at;

Expand Down

0 comments on commit 8e1ef05

Please sign in to comment.