Skip to content

Commit

Permalink
Update tests/column_utilities to use experimental::equality row c…
Browse files Browse the repository at this point in the history
…omparator (#12777)

This PR is a part of #11844

Authors:
  - Divye Gala (https://github.com/divyegala)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)

URL: #12777
  • Loading branch information
divyegala authored Mar 22, 2023
1 parent b2a65e8 commit 913302a
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 58 deletions.
13 changes: 12 additions & 1 deletion cpp/include/cudf/utilities/type_checks.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -36,4 +36,15 @@ namespace cudf {
*/
bool column_types_equal(column_view const& lhs, column_view const& rhs);

/**
* @brief Compare the type IDs of two `column_view`s
* This function returns true if the type of `lhs` equals that of `rhs`.
* - For fixed point types, the scale is ignored.
*
* @param lhs The first `column_view` to compare
* @param rhs The second `column_view` to compare
* @return true if column types match
*/
bool column_types_equivalent(column_view const& lhs, column_view const& rhs);

} // namespace cudf
4 changes: 3 additions & 1 deletion cpp/src/copying/purge_nonempty_nulls.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,6 +38,8 @@ bool has_nonempty_null_rows(cudf::column_view const& input, rmm::cuda_stream_vie
{
if (not input.has_nulls()) { return false; } // No nulls => no dirty rows.

if ((input.size() == input.null_count()) && (input.num_children() == 0)) { return false; }

// Cross-reference nullmask and offsets.
auto const type = input.type().id();
auto const offsets = (type == type_id::STRING) ? (strings_column_view{input}).offsets()
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/table/row_operators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ void check_shape_compatibility(table_view const& lhs, table_view const& rhs)
CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(),
"Cannot compare tables with different number of columns");
for (size_type i = 0; i < lhs.num_columns(); ++i) {
CUDF_EXPECTS(column_types_equal(lhs.column(i), rhs.column(i)),
CUDF_EXPECTS(column_types_equivalent(lhs.column(i), rhs.column(i)),
"Cannot compare tables with different column types");
}
}
Expand Down
8 changes: 7 additions & 1 deletion cpp/src/utilities/type_checks.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -69,4 +69,10 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs)
return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs);
}

bool column_types_equivalent(column_view const& lhs, column_view const& rhs)
{
if (lhs.type().id() != rhs.type().id()) { return false; }
return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs);
}

} // namespace cudf
6 changes: 3 additions & 3 deletions cpp/tests/copying/get_value_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -812,7 +812,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid)
// col fields
cudf::test::fixed_width_column_wrapper<TypeParam> f1{1, 2, 3};
cudf::test::strings_column_wrapper f2{"aa", "bbb", "c"};
cudf::test::dictionary_column_wrapper<TypeParam, uint32_t> f3{42, 42, 24};
cudf::test::dictionary_column_wrapper<TypeParam, int32_t> f3{42, 42, 24};
LCW f4{LCW{8, 8, 8}, LCW{9, 9}, LCW{10}};

cudf::test::structs_column_wrapper col{f1, f2, f3, f4};
Expand All @@ -824,7 +824,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid)
// expect fields
cudf::test::fixed_width_column_wrapper<TypeParam> ef1{3};
cudf::test::strings_column_wrapper ef2{"c"};
cudf::test::dictionary_column_wrapper<int32_t, TypeParam> ef3{24};
cudf::test::dictionary_column_wrapper<TypeParam, int32_t> ef3{24};
LCW ef4{LCW{10}};

cudf::table_view expect_data{{ef1, ef2, ef3, ef4}};
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/interop/from_arrow_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -264,7 +264,7 @@ TEST_F(FromArrowTest, DictionaryIndicesType)
auto arrow_table = arrow::Table::Make(schema, {array1, array2, array3});

std::vector<std::unique_ptr<cudf::column>> columns;
auto col = cudf::test::fixed_width_column_wrapper<int32_t>({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1});
auto col = cudf::test::fixed_width_column_wrapper<int64_t>({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1});
columns.emplace_back(std::move(cudf::dictionary::encode(col)));
columns.emplace_back(std::move(cudf::dictionary::encode(col)));
columns.emplace_back(std::move(cudf::dictionary::encode(col)));
Expand Down
109 changes: 61 additions & 48 deletions cpp/tests/utilities/column_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <cudf/strings/convert/convert_datetime.hpp>
#include <cudf/structs/struct_view.hpp>
#include <cudf/structs/structs_column_view.hpp>
#include <cudf/table/row_operators.cuh>
#include <cudf/table/experimental/row_operators.cuh>
#include <cudf/table/table_device_view.cuh>
#include <cudf/utilities/bit.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand All @@ -46,6 +46,7 @@
#include <thrust/iterator/transform_iterator.h>
#include <thrust/logical.h>
#include <thrust/reduce.h>
#include <thrust/remove.h>
#include <thrust/scan.h>
#include <thrust/scatter.h>
#include <thrust/sequence.h>
Expand Down Expand Up @@ -371,55 +372,56 @@ struct column_property_comparator {
}
};

template <typename DeviceComparator>
class corresponding_rows_unequal {
public:
corresponding_rows_unequal(table_device_view d_lhs,
table_device_view d_rhs,
column_device_view lhs_row_indices_,
corresponding_rows_unequal(column_device_view lhs_row_indices_,
column_device_view rhs_row_indices_,
size_type /*fp_ulps*/)
: comp(cudf::nullate::YES{}, d_lhs, d_rhs, cudf::null_equality::EQUAL),
lhs_row_indices(lhs_row_indices_),
rhs_row_indices(rhs_row_indices_)
size_type /*fp_ulps*/,
DeviceComparator comp_,
column_device_view /*lhs*/,
column_device_view /*rhs*/)
: lhs_row_indices(lhs_row_indices_), rhs_row_indices(rhs_row_indices_), comp(comp_)
{
}

cudf::row_equality_comparator<cudf::nullate::YES> comp;

__device__ bool operator()(size_type index)
{
return !comp(lhs_row_indices.element<size_type>(index),
rhs_row_indices.element<size_type>(index));
using cudf::experimental::row::lhs_index_type;
using cudf::experimental::row::rhs_index_type;

return !comp(lhs_index_type{lhs_row_indices.element<size_type>(index)},
rhs_index_type{rhs_row_indices.element<size_type>(index)});
}

column_device_view lhs_row_indices;
column_device_view rhs_row_indices;
DeviceComparator comp;
};

template <typename DeviceComparator>
class corresponding_rows_not_equivalent {
table_device_view d_lhs;
table_device_view d_rhs;

column_device_view lhs_row_indices;
column_device_view rhs_row_indices;

size_type const fp_ulps;
DeviceComparator comp;
column_device_view lhs;
column_device_view rhs;

public:
corresponding_rows_not_equivalent(table_device_view d_lhs,
table_device_view d_rhs,
column_device_view lhs_row_indices_,
corresponding_rows_not_equivalent(column_device_view lhs_row_indices_,
column_device_view rhs_row_indices_,
size_type fp_ulps_)
: d_lhs(d_lhs),
d_rhs(d_rhs),
comp(cudf::nullate::YES{}, d_lhs, d_rhs, null_equality::EQUAL),
lhs_row_indices(lhs_row_indices_),
size_type fp_ulps_,
DeviceComparator comp_,
column_device_view lhs_,
column_device_view rhs_)
: lhs_row_indices(lhs_row_indices_),
rhs_row_indices(rhs_row_indices_),
fp_ulps(fp_ulps_)
fp_ulps(fp_ulps_),
comp(comp_),
lhs(lhs_),
rhs(rhs_)
{
CUDF_EXPECTS(d_lhs.num_columns() == 1 and d_rhs.num_columns() == 1,
"Unsupported number of columns");
}

struct typed_element_not_equivalent {
Expand Down Expand Up @@ -459,23 +461,17 @@ class corresponding_rows_not_equivalent {
}
};

cudf::row_equality_comparator<cudf::nullate::YES> comp;

__device__ bool operator()(size_type index)
{
using cudf::experimental::row::lhs_index_type;
using cudf::experimental::row::rhs_index_type;

auto const lhs_index = lhs_row_indices.element<size_type>(index);
auto const rhs_index = rhs_row_indices.element<size_type>(index);

if (not comp(lhs_index, rhs_index)) {
auto lhs_col = this->d_lhs.column(0);
auto rhs_col = this->d_rhs.column(0);
return type_dispatcher(lhs_col.type(),
typed_element_not_equivalent{},
lhs_col,
rhs_col,
lhs_index,
rhs_index,
fp_ulps);
if (not comp(lhs_index_type{lhs_index}, rhs_index_type{rhs_index})) {
return type_dispatcher(
lhs.type(), typed_element_not_equivalent{}, lhs, rhs, lhs_index, rhs_index, fp_ulps);
}
return false;
}
Expand Down Expand Up @@ -536,25 +532,42 @@ struct column_comparator_impl {
size_type fp_ulps,
int depth)
{
auto d_lhs = cudf::table_device_view::create(table_view{{lhs}});
auto d_rhs = cudf::table_device_view::create(table_view{{rhs}});

auto d_lhs_row_indices = cudf::column_device_view::create(lhs_row_indices);
auto d_rhs_row_indices = cudf::column_device_view::create(rhs_row_indices);

using ComparatorType = std::conditional_t<check_exact_equality,
corresponding_rows_unequal,
corresponding_rows_not_equivalent>;
auto d_lhs = cudf::column_device_view::create(lhs);
auto d_rhs = cudf::column_device_view::create(rhs);

auto lhs_tview = table_view{{lhs}};
auto rhs_tview = table_view{{rhs}};

auto const comparator = cudf::experimental::row::equality::two_table_comparator{
lhs_tview, rhs_tview, cudf::get_default_stream()};
auto const has_nulls = cudf::has_nested_nulls(lhs_tview) or cudf::has_nested_nulls(rhs_tview);

auto const device_comparator = comparator.equal_to<false>(cudf::nullate::DYNAMIC{has_nulls});

using ComparatorType =
std::conditional_t<check_exact_equality,
corresponding_rows_unequal<decltype(device_comparator)>,
corresponding_rows_not_equivalent<decltype(device_comparator)>>;

auto differences = rmm::device_uvector<int>(
lhs.size(), cudf::get_default_stream()); // worst case: everything different
lhs_row_indices.size(), cudf::get_default_stream()); // worst case: everything different
auto input_iter = thrust::make_counting_iterator(0);
auto diff_iter = thrust::copy_if(

thrust::transform(
rmm::exec_policy(cudf::get_default_stream()),
input_iter,
input_iter + lhs_row_indices.size(),
differences.begin(),
ComparatorType(*d_lhs, *d_rhs, *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps));
ComparatorType(
*d_lhs_row_indices, *d_rhs_row_indices, fp_ulps, device_comparator, *d_lhs, *d_rhs));

auto diff_iter = thrust::remove(rmm::exec_policy(cudf::get_default_stream()),
differences.begin(),
differences.end(),
0); // remove the zero entries

differences.resize(thrust::distance(differences.begin(), diff_iter),
cudf::get_default_stream()); // shrink back down
Expand Down
3 changes: 2 additions & 1 deletion cpp/tests/utilities_tests/type_check_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -147,6 +147,7 @@ TEST_F(ColumnTypeCheckTest, DifferentFixedWidth)
fixed_point_column_wrapper<int32_t> rhs5({10000}, numeric::scale_type{0});

EXPECT_FALSE(column_types_equal(lhs5, rhs5));
EXPECT_TRUE(column_types_equivalent(lhs5, rhs5));

// Different rep, same scale
fixed_point_column_wrapper<int32_t> lhs6({10000}, numeric::scale_type{-1});
Expand Down

0 comments on commit 913302a

Please sign in to comment.