Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update tests/column_utilities to use experimental::equality row comparator #12777

Merged
Merged
Changes from 30 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
4a8085a
building equality::self_comparator
divyegala Feb 2, 2023
f71d161
two table comp
divyegala Feb 2, 2023
3ca298c
copyright years
divyegala Feb 2, 2023
7c167a7
centralizing repeated logic
divyegala Feb 2, 2023
0ceb79e
address review to create functors
divyegala Feb 3, 2023
37e7326
updating has_nested_columns docs
divyegala Feb 3, 2023
b44f603
Merge remote-tracking branch 'upstream/branch-23.04' into equality-co…
divyegala Feb 3, 2023
c2ff1fc
address review for underscore prefixes in structs
divyegala Feb 7, 2023
c2ca8ee
Merge remote-tracking branch 'upstream/branch-23.04' into equality-co…
divyegala Feb 7, 2023
ffdf10c
Merge remote-tracking branch 'upstream/branch-23.04' into equality-co…
divyegala Feb 8, 2023
53e918f
add rank
divyegala Feb 8, 2023
65e2bce
fix compile times for rank
divyegala Feb 8, 2023
c6bc7f5
Merge remote-tracking branch 'upstream/branch-23.04' into equality-co…
divyegala Feb 8, 2023
1344e33
Apply suggestions from code review
divyegala Feb 11, 2023
4123379
address review
divyegala Feb 11, 2023
26f38b3
Merge remote-tracking branch 'upstream/branch-23.04' into equality-co…
divyegala Feb 11, 2023
9d0f7a6
address review, mark members of functors as private
divyegala Feb 11, 2023
fe41be8
removing partitioning
divyegala Feb 11, 2023
02dd5c5
simplify lists/contains since it already has a nested-type dispatch m…
divyegala Feb 12, 2023
5db4d03
Merge branch 'branch-23.04' into equality-comp-fast-path
divyegala Feb 13, 2023
03d754d
passing tests
divyegala Feb 14, 2023
22d5f90
copyright year
divyegala Feb 14, 2023
52ee8a3
successful compilation
divyegala Feb 14, 2023
4c1f6a1
merge upstream
divyegala Mar 8, 2023
47fea0c
Merge branch 'branch-23.04' into column_utilities-row-comparator
divyegala Mar 10, 2023
5e3791e
Merge remote-tracking branch 'upstream/branch-23.04' into column_util…
divyegala Mar 16, 2023
4f2c80c
address review
divyegala Mar 16, 2023
d094524
Merge branch 'branch-23.04' into column_utilities-row-comparator
divyegala Mar 16, 2023
f6f3325
fix equivalence
divyegala Mar 16, 2023
1611126
Merge branch 'column_utilities-row-comparator' of github.com:divyegal…
divyegala Mar 16, 2023
1fb8e31
fix failing arrow test
divyegala Mar 16, 2023
7d03fdc
get all tests to pass
divyegala Mar 20, 2023
121e45b
Merge remote-tracking branch 'upstream/branch-23.04' into column_util…
divyegala Mar 20, 2023
611f7ae
address review help to solve test failures
divyegala Mar 21, 2023
e526c00
Merge remote-tracking branch 'upstream/branch-23.04' into column_util…
divyegala Mar 21, 2023
4f77dd7
fix purge non empty null test
divyegala Mar 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 64 additions & 52 deletions cpp/tests/utilities/column_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <cudf/strings/convert/convert_datetime.hpp>
#include <cudf/structs/struct_view.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/bit.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -371,55 +371,54 @@ struct column_property_comparator {
}
};

template <typename DeviceComparator>
class corresponding_rows_unequal {
public:
corresponding_rows_unequal(table_device_view d_lhs,
table_device_view d_rhs,
column_device_view lhs_row_indices_,
corresponding_rows_unequal(column_device_view lhs_row_indices_,
column_device_view rhs_row_indices_,
size_type /*fp_ulps*/)
: comp(cudf::nullate::YES{}, d_lhs, d_rhs, cudf::null_equality::EQUAL),
lhs_row_indices(lhs_row_indices_),
rhs_row_indices(rhs_row_indices_)
size_type /*fp_ulps*/,
DeviceComparator comp_,
column_device_view /*lhs*/,
column_device_view /*rhs*/)
: lhs_row_indices(lhs_row_indices_), rhs_row_indices(rhs_row_indices_), comp(comp_)
{
}

cudf::row_equality_comparator<cudf::nullate::YES> comp;

__device__ bool operator()(size_type index)
{
return !comp(lhs_row_indices.element<size_type>(index),
rhs_row_indices.element<size_type>(index));
using cudf::experimental::row::lhs_index_type;
using cudf::experimental::row::rhs_index_type;

return !comp(lhs_index_type{lhs_row_indices.element<size_type>(index)},
rhs_index_type{rhs_row_indices.element<size_type>(index)});
}

column_device_view lhs_row_indices;
column_device_view rhs_row_indices;
DeviceComparator comp;
};

template <typename DeviceComparator>
class corresponding_rows_not_equivalent {
table_device_view d_lhs;
table_device_view d_rhs;

column_device_view lhs_row_indices;
column_device_view rhs_row_indices;

size_type const fp_ulps;
column_device_view lhs;
column_device_view rhs;

public:
corresponding_rows_not_equivalent(table_device_view d_lhs,
table_device_view d_rhs,
column_device_view lhs_row_indices_,
corresponding_rows_not_equivalent(column_device_view lhs_row_indices_,
column_device_view rhs_row_indices_,
size_type fp_ulps_)
: d_lhs(d_lhs),
d_rhs(d_rhs),
comp(cudf::nullate::YES{}, d_lhs, d_rhs, null_equality::EQUAL),
lhs_row_indices(lhs_row_indices_),
size_type fp_ulps_,
DeviceComparator /*comp*/,
column_device_view lhs_,
column_device_view rhs_)
: lhs_row_indices(lhs_row_indices_),
rhs_row_indices(rhs_row_indices_),
fp_ulps(fp_ulps_)
fp_ulps(fp_ulps_),
lhs(lhs_),
rhs(rhs_)
{
CUDF_EXPECTS(d_lhs.num_columns() == 1 and d_rhs.num_columns() == 1,
"Unsupported number of columns");
}

struct typed_element_not_equivalent {
Expand Down Expand Up @@ -459,23 +458,15 @@ class corresponding_rows_not_equivalent {
}
};

cudf::row_equality_comparator<cudf::nullate::YES> comp;

__device__ bool operator()(size_type index)
{
auto const lhs_index = lhs_row_indices.element<size_type>(index);
auto const rhs_index = rhs_row_indices.element<size_type>(index);

cudf::experimental::row::equality::nan_equal_physical_equality_comparator comp;
if (not comp(lhs_index, rhs_index)) {
auto lhs_col = this->d_lhs.column(0);
auto rhs_col = this->d_rhs.column(0);
return type_dispatcher(lhs_col.type(),
typed_element_not_equivalent{},
lhs_col,
rhs_col,
lhs_index,
rhs_index,
fp_ulps);
return type_dispatcher(
lhs.type(), typed_element_not_equivalent{}, lhs, rhs, lhs_index, rhs_index, fp_ulps);
divyegala marked this conversation as resolved.
Show resolved Hide resolved
}
return false;
}
Expand Down Expand Up @@ -536,28 +527,49 @@ struct column_comparator_impl {
size_type fp_ulps,
int depth)
{
auto d_lhs = cudf::table_device_view::create(table_view{{lhs}});
auto d_rhs = cudf::table_device_view::create(table_view{{rhs}});

auto d_lhs_row_indices = cudf::column_device_view::create(lhs_row_indices);
auto d_rhs_row_indices = cudf::column_device_view::create(rhs_row_indices);

using ComparatorType = std::conditional_t<check_exact_equality,
corresponding_rows_unequal,
corresponding_rows_not_equivalent>;
auto d_lhs = cudf::column_device_view::create(lhs);
auto d_rhs = cudf::column_device_view::create(rhs);

auto differences = rmm::device_uvector<int>(
lhs.size(), cudf::get_default_stream()); // worst case: everything different
auto input_iter = thrust::make_counting_iterator(0);
auto diff_iter = thrust::copy_if(
rmm::exec_policy(cudf::get_default_stream()),
input_iter,
input_iter + lhs_row_indices.size(),
differences.begin(),
ComparatorType(*d_lhs, *d_rhs, *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps));

differences.resize(thrust::distance(differences.begin(), diff_iter),
cudf::get_default_stream()); // shrink back down
auto const comparator_helper = [&](auto const device_comparator) {
using ComparatorType =
std::conditional_t<check_exact_equality,
corresponding_rows_unequal<decltype(device_comparator)>,
corresponding_rows_not_equivalent<decltype(device_comparator)>>;

auto diff_iter = thrust::copy_if(
rmm::exec_policy(cudf::get_default_stream()),
input_iter,
input_iter + lhs_row_indices.size(),
differences.begin(),
ComparatorType(
*d_lhs_row_indices, *d_rhs_row_indices, fp_ulps, device_comparator, *d_lhs, *d_rhs));

differences.resize(thrust::distance(differences.begin(), diff_iter),
cudf::get_default_stream()); // shrink back down
divyegala marked this conversation as resolved.
Show resolved Hide resolved
};

if constexpr (check_exact_equality) {
auto lhs_tview = table_view{{lhs}};
auto rhs_tview = table_view{{rhs}};

auto const comparator = cudf::experimental::row::equality::two_table_comparator{
lhs_tview, rhs_tview, cudf::get_default_stream()};
auto const has_nulls = cudf::has_nested_nulls(lhs_tview) or cudf::has_nested_nulls(rhs_tview);

auto const device_comparator = comparator.equal_to<false>(cudf::nullate::DYNAMIC{has_nulls});
comparator_helper(device_comparator);
} else {
// equivalence can be checked between column of different types,
// but the new comparator does not support that
comparator_helper(int{0});
}
divyegala marked this conversation as resolved.
Show resolved Hide resolved

if (not differences.is_empty()) {
if (verbosity != debug_output_level::QUIET) {
Expand Down