Skip to content

Commit

Permalink
Configurable NaN handling in device_row_comparators (#10870)
Browse files Browse the repository at this point in the history
Further splitting up #9452 -- split off at the suggestion of @bdice 

Related to #10781 and #4760 -- issues and discussions related to NaN comparison behavior.

Authors:
  - Ryan Lee (https://github.com/rwlee)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Jake Hemstad (https://github.com/jrhemstad)
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: #10870
  • Loading branch information
rwlee authored Jun 3, 2022
1 parent fe9a4f8 commit 00f02a4
Show file tree
Hide file tree
Showing 10 changed files with 534 additions and 81 deletions.
369 changes: 295 additions & 74 deletions cpp/include/cudf/table/experimental/row_operators.cuh

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ std::unique_ptr<table> groupby(table_view const& keys,
auto preprocessed_keys = cudf::experimental::row::hash::preprocessed_table::create(keys, stream);
auto const comparator = cudf::experimental::row::equality::self_comparator{preprocessed_keys};
auto const row_hash = cudf::experimental::row::hash::row_hasher{std::move(preprocessed_keys)};
auto const d_key_equal = comparator.device_comparator(has_null, null_keys_are_equal);
auto const d_key_equal = comparator.equal_to(has_null, null_keys_are_equal);
auto const d_row_hash = row_hash.device_hasher(has_null);

size_type constexpr unused_key{std::numeric_limits<size_type>::max()};
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/reductions/scan/rank_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ std::unique_ptr<column> rank_generator(column_view const& order_by,
{
auto comp = cudf::experimental::row::equality::self_comparator(table_view{{order_by}}, stream);
auto const device_comparator =
comp.device_comparator(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))});
comp.equal_to(nullate::DYNAMIC{has_nested_nulls(table_view({order_by}))});
auto ranks = make_fixed_width_column(
data_type{type_to_id<size_type>()}, order_by.size(), mask_state::UNALLOCATED, stream, mr);
auto mutable_ranks = ranks->mutable_view();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/search/contains_nested.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ bool contains_nested_element(column_view const& haystack,

auto const comparator =
cudf::experimental::row::equality::two_table_comparator(haystack_tv, needle_tv, stream);
auto const d_comp = comparator.device_comparator(nullate::DYNAMIC{has_nulls});
auto const d_comp = comparator.equal_to(nullate::DYNAMIC{has_nulls});

auto const begin = cudf::experimental::row::lhs_iterator(0);
auto const end = begin + haystack.size();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/search/search_ordered.cu
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ std::unique_ptr<column> search_ordered(table_view const& haystack,
auto const comparator = cudf::experimental::row::lexicographic::two_table_comparator(
matched_haystack, matched_needles, column_order, null_precedence, stream);
auto const has_nulls = has_nested_nulls(matched_haystack) or has_nested_nulls(matched_needles);
auto const d_comparator = comparator.device_comparator(nullate::DYNAMIC{has_nulls});
auto const d_comparator = comparator.less(nullate::DYNAMIC{has_nulls});

auto const haystack_it = cudf::experimental::row::lhs_iterator(0);
auto const needles_it = cudf::experimental::row::rhs_iterator(0);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/sort/sort_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ std::unique_ptr<column> sorted_order(table_view input,

auto comp =
experimental::row::lexicographic::self_comparator(input, column_order, null_precedence, stream);
auto comparator = comp.device_comparator(nullate::DYNAMIC{has_nested_nulls(input)});
auto comparator = comp.less(nullate::DYNAMIC{has_nested_nulls(input)});

if (stable) {
thrust::stable_sort(rmm::exec_policy(stream),
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/stream_compaction/distinct.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ std::unique_ptr<table> distinct(table_view const& input,
experimental::compaction_hash hash_key(row_hash.device_hasher(has_null));

cudf::experimental::row::equality::self_comparator row_equal(preprocessed_keys);
auto key_equal = row_equal.device_comparator(has_null, nulls_equal);
auto key_equal = row_equal.equal_to(has_null, nulls_equal);

auto iter = cudf::detail::make_counting_transform_iterator(
0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ ConfigureTest(TRANSPOSE_TEST transpose/transpose_test.cpp)
# * table tests -----------------------------------------------------------------------------------
ConfigureTest(
TABLE_TEST table/table_tests.cpp table/table_view_tests.cu table/row_operators_tests.cpp
table/experimental_row_operator_tests.cu
)

# ##################################################################################################
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/groupby/lists_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ inline void test_hash_based_sum_agg(column_view const& keys,

auto const null_keys_are_equal =
include_null_keys == null_policy::INCLUDE ? null_equality::EQUAL : null_equality::UNEQUAL;
auto row_equal = comparator.device_comparator(nullate::DYNAMIC{true}, null_keys_are_equal);
auto row_equal = comparator.equal_to(nullate::DYNAMIC{true}, null_keys_are_equal);
auto func = match_expected_fn{num_rows, row_equal};

// For each row in expected table `t[0, num_rows)`, there must be a match
Expand Down
231 changes: 231 additions & 0 deletions cpp/tests/table/experimental_row_operator_tests.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/column/column_view.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/table/table_view.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform.h>

#include <cmath>
#include <vector>

using namespace cudf::test;
using namespace cudf::experimental::row;

template <typename T>
struct TypedTableViewTest : public cudf::test::BaseFixture {
};

using NumericTypesNotBool = Concat<IntegralTypesNotBool, FloatingPointTypes>;
TYPED_TEST_SUITE(TypedTableViewTest, NumericTypesNotBool);

template <typename PhysicalElementComparator>
auto self_comparison(cudf::table_view input,
std::vector<cudf::order> const& column_order,
PhysicalElementComparator comparator)
{
rmm::cuda_stream_view stream{};

auto const table_comparator = lexicographic::self_comparator{input, column_order, {}, stream};
auto const less_comparator = table_comparator.less(cudf::nullate::NO{}, comparator);

auto output = cudf::make_numeric_column(
cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED);

thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.num_rows()),
thrust::make_counting_iterator(0),
output->mutable_view().data<bool>(),
less_comparator);
return output;
}

template <typename PhysicalElementComparator>
auto two_table_comparison(cudf::table_view lhs,
cudf::table_view rhs,
std::vector<cudf::order> const& column_order,
PhysicalElementComparator comparator)
{
rmm::cuda_stream_view stream{};

auto const table_comparator =
lexicographic::two_table_comparator{lhs, rhs, column_order, {}, stream};
auto const less_comparator = table_comparator.less(cudf::nullate::NO{}, comparator);
auto const lhs_it = cudf::experimental::row::lhs_iterator(0);
auto const rhs_it = cudf::experimental::row::rhs_iterator(0);

auto output = cudf::make_numeric_column(
cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED);

thrust::transform(rmm::exec_policy(stream),
lhs_it,
lhs_it + lhs.num_rows(),
rhs_it,
output->mutable_view().data<bool>(),
less_comparator);
return output;
}

template <typename PhysicalElementComparator>
auto self_equality(cudf::table_view input,
std::vector<cudf::order> const& column_order,
PhysicalElementComparator comparator)
{
rmm::cuda_stream_view stream{};

auto const table_comparator = equality::self_comparator{input, stream};
auto const equal_comparator =
table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);

auto output = cudf::make_numeric_column(
cudf::data_type(cudf::type_id::BOOL8), input.num_rows(), cudf::mask_state::UNALLOCATED);

thrust::transform(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.num_rows()),
thrust::make_counting_iterator(0),
output->mutable_view().data<bool>(),
equal_comparator);
return output;
}

template <typename PhysicalElementComparator>
auto two_table_equality(cudf::table_view lhs,
cudf::table_view rhs,
std::vector<cudf::order> const& column_order,
PhysicalElementComparator comparator)
{
rmm::cuda_stream_view stream{};

auto const table_comparator = equality::two_table_comparator{lhs, rhs, stream};
auto const equal_comparator =
table_comparator.equal_to(cudf::nullate::NO{}, cudf::null_equality::EQUAL, comparator);
auto const lhs_it = cudf::experimental::row::lhs_iterator(0);
auto const rhs_it = cudf::experimental::row::rhs_iterator(0);

auto output = cudf::make_numeric_column(
cudf::data_type(cudf::type_id::BOOL8), lhs.num_rows(), cudf::mask_state::UNALLOCATED);

thrust::transform(rmm::exec_policy(stream),
lhs_it,
lhs_it + lhs.num_rows(),
rhs_it,
output->mutable_view().data<bool>(),
equal_comparator);
return output;
}

TYPED_TEST(TypedTableViewTest, TestLexicographicalComparatorTwoTables)
{
using T = TypeParam;

auto const col1 = fixed_width_column_wrapper<T>{{1, 2, 3, 4}};
auto const col2 = fixed_width_column_wrapper<T>{{0, 1, 4, 3}};
auto const column_order = std::vector{cudf::order::DESCENDING};
auto const lhs = cudf::table_view{{col1}};
auto const rhs = cudf::table_view{{col2}};

auto const expected = fixed_width_column_wrapper<bool>{{1, 1, 0, 1}};
auto const got =
two_table_comparison(lhs, rhs, column_order, lexicographic::physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

auto const sorting_got = two_table_comparison(
lhs, rhs, column_order, lexicographic::sorting_physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorting_got->view());
}

TYPED_TEST(TypedTableViewTest, TestLexicographicalComparatorSameTable)
{
using T = TypeParam;

auto const col1 = fixed_width_column_wrapper<T>{{1, 2, 3, 4}};
auto const column_order = std::vector{cudf::order::DESCENDING};
auto const input_table = cudf::table_view{{col1}};

auto const expected = fixed_width_column_wrapper<bool>{{0, 0, 0, 0}};
auto const got =
self_comparison(input_table, column_order, lexicographic::physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

auto const sorting_got = self_comparison(
input_table, column_order, lexicographic::sorting_physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorting_got->view());
}

template <typename T>
struct NaNTableViewTest : public cudf::test::BaseFixture {
};

TYPED_TEST_SUITE(NaNTableViewTest, FloatingPointTypes);

TYPED_TEST(NaNTableViewTest, TestLexicographicalComparatorTwoTableNaNCase)
{
using T = TypeParam;

auto const col1 = fixed_width_column_wrapper<T>{{T(NAN), T(NAN), T(1), T(1)}};
auto const col2 = fixed_width_column_wrapper<T>{{T(NAN), T(1), T(NAN), T(1)}};
auto const column_order = std::vector{cudf::order::DESCENDING};

auto const lhs = cudf::table_view{{col1}};
auto const rhs = cudf::table_view{{col2}};

auto const expected = fixed_width_column_wrapper<bool>{{0, 0, 0, 0}};
auto const got =
two_table_comparison(lhs, rhs, column_order, lexicographic::physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

auto const sorting_expected = fixed_width_column_wrapper<bool>{{0, 1, 0, 0}};
auto const sorting_got = two_table_comparison(
lhs, rhs, column_order, lexicographic::sorting_physical_element_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(sorting_expected, sorting_got->view());
}

TYPED_TEST(NaNTableViewTest, TestEqualityComparatorTwoTableNaNCase)
{
using T = TypeParam;

auto const col1 = fixed_width_column_wrapper<T>{{T(NAN), T(NAN), T(1), T(1)}};
auto const col2 = fixed_width_column_wrapper<T>{{T(NAN), T(1), T(NAN), T(1)}};
auto const column_order = std::vector{cudf::order::DESCENDING};

auto const lhs = cudf::table_view{{col1}};
auto const rhs = cudf::table_view{{col2}};

auto const expected = fixed_width_column_wrapper<bool>{{0, 0, 0, 1}};
auto const got =
two_table_equality(lhs, rhs, column_order, equality::physical_equality_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());

auto const nan_equal_expected = fixed_width_column_wrapper<bool>{{1, 0, 0, 1}};
auto const nan_equal_got =
two_table_equality(lhs, rhs, column_order, equality::nan_equal_physical_equality_comparator{});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(nan_equal_expected, nan_equal_got->view());
}

0 comments on commit 00f02a4

Please sign in to comment.