Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle nullptr return value from bitmask_or in distinct_count #13590

Merged
merged 2 commits into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions cpp/src/stream_compaction/distinct_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,24 @@ cudf::size_type distinct_count(table_view const& keys,
stream.value()};

auto const iter = thrust::counting_iterator<cudf::size_type>(0);
// when nulls are equal, insert non-null rows only to improve efficiency
// when nulls are equal, we skip hashing any row that has a null
// in every column to improve efficiency.
if (nulls_equal == null_equality::EQUAL and has_nulls) {
thrust::counting_iterator<size_type> stencil(0);
// We must consider a row if any of its column entries is valid,
// hence OR together the validities of the columns.
auto const [row_bitmask, null_count] =
cudf::detail::bitmask_or(keys, stream, rmm::mr::get_current_device_resource());
row_validity pred{static_cast<bitmask_type const*>(row_bitmask.data())};

return key_set.insert_if(iter, iter + num_rows, stencil, pred, stream.value()) +
static_cast<cudf::size_type>(null_count > 0);
// Unless all columns have a null mask, row_bitmask will be
// null, and null_count will be zero. Equally, unless there is
// some row which is null in all columns, null_count will be
// zero. So, it is only when null_count is not zero that we need
// to do a filtered insertion.
if (null_count > 0) {
row_validity pred{static_cast<bitmask_type const*>(row_bitmask.data())};
return key_set.insert_if(iter, iter + num_rows, stencil, pred, stream.value()) + 1;
}
}
// otherwise, insert all
return key_set.insert(iter, iter + num_rows, stream.value());
Expand Down
10 changes: 10 additions & 0 deletions cpp/tests/stream_compaction/distinct_count_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,16 @@ TEST_F(DistinctCount, TableWithNull)
EXPECT_EQ(10, cudf::distinct_count(input, null_equality::UNEQUAL));
}

TEST_F(DistinctCount, TableWithSomeNull)
{
cudf::test::fixed_width_column_wrapper<int32_t> col1{{1, 2, 3, 4, 5, 6}, {1, 0, 1, 0, 1, 0}};
cudf::test::fixed_width_column_wrapper<int32_t> col2{{1, 1, 1, 1, 1, 1}};
cudf::table_view input{{col1, col2}};

EXPECT_EQ(4, cudf::distinct_count(input, null_equality::EQUAL));
EXPECT_EQ(6, cudf::distinct_count(input, null_equality::UNEQUAL));
}

TEST_F(DistinctCount, EmptyColumnedTable)
{
std::vector<cudf::column_view> cols{};
Expand Down