From 913302aefc49db832da2d2d0b053016812805a4b Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 21 Mar 2023 23:08:46 -0400 Subject: [PATCH] Update `tests/column_utilities` to use `experimental::equality` row comparator (#12777) This PR is a part of #11844 Authors: - Divye Gala (https://github.com/divyegala) Approvers: - David Wendt (https://github.com/davidwendt) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/12777 --- cpp/include/cudf/utilities/type_checks.hpp | 13 ++- cpp/src/copying/purge_nonempty_nulls.cu | 4 +- cpp/src/table/row_operators.cu | 2 +- cpp/src/utilities/type_checks.cpp | 8 +- cpp/tests/copying/get_value_tests.cpp | 6 +- cpp/tests/interop/from_arrow_test.cpp | 4 +- cpp/tests/utilities/column_utilities.cu | 109 ++++++++++-------- .../utilities_tests/type_check_tests.cpp | 3 +- 8 files changed, 91 insertions(+), 58 deletions(-) diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 4fa712fe7c3..b925fc8ae92 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,4 +36,15 @@ namespace cudf { */ bool column_types_equal(column_view const& lhs, column_view const& rhs); +/** + * @brief Compare the type IDs of two `column_view`s + * This function returns true if the type of `lhs` equals that of `rhs`. + * - For fixed point types, the scale is ignored. + * + * @param lhs The first `column_view` to compare + * @param rhs The second `column_view` to compare + * @return true if column types match + */ +bool column_types_equivalent(column_view const& lhs, column_view const& rhs); + } // namespace cudf diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu index 5bdf10c8af6..20a8ce986aa 100644 --- a/cpp/src/copying/purge_nonempty_nulls.cu +++ b/cpp/src/copying/purge_nonempty_nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,6 +38,8 @@ bool has_nonempty_null_rows(cudf::column_view const& input, rmm::cuda_stream_vie { if (not input.has_nulls()) { return false; } // No nulls => no dirty rows. + if ((input.size() == input.null_count()) && (input.num_children() == 0)) { return false; } + // Cross-reference nullmask and offsets. auto const type = input.type().id(); auto const offsets = (type == type_id::STRING) ? (strings_column_view{input}).offsets() diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index ae49ad17e53..0c6747f2d12 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -334,7 +334,7 @@ void check_shape_compatibility(table_view const& lhs, table_view const& rhs) CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Cannot compare tables with different number of columns"); for (size_type i = 0; i < lhs.num_columns(); ++i) { - CUDF_EXPECTS(column_types_equal(lhs.column(i), rhs.column(i)), + CUDF_EXPECTS(column_types_equivalent(lhs.column(i), rhs.column(i)), "Cannot compare tables with different column types"); } } diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index d297148de45..d6f5c65593a 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,4 +69,10 @@ bool column_types_equal(column_view const& lhs, column_view const& rhs) return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } +bool column_types_equivalent(column_view const& lhs, column_view const& rhs) +{ + if (lhs.type().id() != rhs.type().id()) { return false; } + return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); +} + } // namespace cudf diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp index 1c51eab1f94..a35bbab0176 100644 --- a/cpp/tests/copying/get_value_tests.cpp +++ b/cpp/tests/copying/get_value_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -812,7 +812,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid) // col fields cudf::test::fixed_width_column_wrapper f1{1, 2, 3}; cudf::test::strings_column_wrapper f2{"aa", "bbb", "c"}; - cudf::test::dictionary_column_wrapper f3{42, 42, 24}; + cudf::test::dictionary_column_wrapper f3{42, 42, 24}; LCW f4{LCW{8, 8, 8}, LCW{9, 9}, LCW{10}}; cudf::test::structs_column_wrapper col{f1, f2, f3, f4}; @@ -824,7 +824,7 @@ TYPED_TEST(StructGetValueTestTyped, mixed_types_valid) // expect fields cudf::test::fixed_width_column_wrapper ef1{3}; cudf::test::strings_column_wrapper ef2{"c"}; - cudf::test::dictionary_column_wrapper ef3{24}; + cudf::test::dictionary_column_wrapper ef3{24}; LCW ef4{LCW{10}}; cudf::table_view expect_data{{ef1, ef2, ef3, ef4}}; diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index d2b159fc208..3f4d5bcf20f 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -264,7 +264,7 @@ TEST_F(FromArrowTest, DictionaryIndicesType) auto arrow_table = arrow::Table::Make(schema, {array1, array2, array3}); std::vector> columns; - auto col = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1}); + auto col = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1}); columns.emplace_back(std::move(cudf::dictionary::encode(col))); columns.emplace_back(std::move(cudf::dictionary::encode(col))); columns.emplace_back(std::move(cudf::dictionary::encode(col))); diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 6c441539621..3a94aac1cc9 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -371,55 +372,56 @@ struct column_property_comparator { } }; +template class corresponding_rows_unequal { public: - corresponding_rows_unequal(table_device_view d_lhs, - table_device_view d_rhs, - column_device_view lhs_row_indices_, + corresponding_rows_unequal(column_device_view lhs_row_indices_, column_device_view rhs_row_indices_, - size_type /*fp_ulps*/) - : comp(cudf::nullate::YES{}, d_lhs, d_rhs, cudf::null_equality::EQUAL), - lhs_row_indices(lhs_row_indices_), - rhs_row_indices(rhs_row_indices_) + size_type /*fp_ulps*/, + DeviceComparator comp_, + column_device_view /*lhs*/, + column_device_view /*rhs*/) + : lhs_row_indices(lhs_row_indices_), rhs_row_indices(rhs_row_indices_), comp(comp_) { } - cudf::row_equality_comparator comp; - __device__ bool operator()(size_type index) { - return !comp(lhs_row_indices.element(index), - rhs_row_indices.element(index)); + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + + return !comp(lhs_index_type{lhs_row_indices.element(index)}, + rhs_index_type{rhs_row_indices.element(index)}); } column_device_view lhs_row_indices; column_device_view rhs_row_indices; + DeviceComparator comp; }; +template class corresponding_rows_not_equivalent { - table_device_view d_lhs; - table_device_view d_rhs; - column_device_view lhs_row_indices; column_device_view rhs_row_indices; - size_type const fp_ulps; + DeviceComparator comp; + column_device_view lhs; + column_device_view rhs; public: - corresponding_rows_not_equivalent(table_device_view d_lhs, - table_device_view d_rhs, - column_device_view lhs_row_indices_, + corresponding_rows_not_equivalent(column_device_view lhs_row_indices_, column_device_view rhs_row_indices_, - size_type fp_ulps_) - : d_lhs(d_lhs), - d_rhs(d_rhs), - comp(cudf::nullate::YES{}, d_lhs, d_rhs, null_equality::EQUAL), - lhs_row_indices(lhs_row_indices_), + size_type fp_ulps_, + DeviceComparator comp_, + column_device_view lhs_, + column_device_view rhs_) + : lhs_row_indices(lhs_row_indices_), rhs_row_indices(rhs_row_indices_), - fp_ulps(fp_ulps_) + fp_ulps(fp_ulps_), + comp(comp_), + lhs(lhs_), + rhs(rhs_) { - CUDF_EXPECTS(d_lhs.num_columns() == 1 and d_rhs.num_columns() == 1, - "Unsupported number of columns"); } struct typed_element_not_equivalent { @@ -459,23 +461,17 @@ class corresponding_rows_not_equivalent { } }; - cudf::row_equality_comparator comp; - __device__ bool operator()(size_type index) { + using cudf::experimental::row::lhs_index_type; + using cudf::experimental::row::rhs_index_type; + auto const lhs_index = lhs_row_indices.element(index); auto const rhs_index = rhs_row_indices.element(index); - if (not comp(lhs_index, rhs_index)) { - auto lhs_col = this->d_lhs.column(0); - auto rhs_col = this->d_rhs.column(0); - return type_dispatcher(lhs_col.type(), - typed_element_not_equivalent{}, - lhs_col, - rhs_col, - lhs_index, - rhs_index, - fp_ulps); + if (not comp(lhs_index_type{lhs_index}, rhs_index_type{rhs_index})) { + return type_dispatcher( + lhs.type(), typed_element_not_equivalent{}, lhs, rhs, lhs_index, rhs_index, fp_ulps); } return false; } @@ -536,25 +532,42 @@ struct column_comparator_impl { size_type fp_ulps, int depth) { - auto d_lhs = cudf::table_device_view::create(table_view{{lhs}}); - auto d_rhs = cudf::table_device_view::create(table_view{{rhs}}); - auto d_lhs_row_indices = cudf::column_device_view::create(lhs_row_indices); auto d_rhs_row_indices = cudf::column_device_view::create(rhs_row_indices); - using ComparatorType = std::conditional_t; + auto d_lhs = cudf::column_device_view::create(lhs); + auto d_rhs = cudf::column_device_view::create(rhs); + + auto lhs_tview = table_view{{lhs}}; + auto rhs_tview = table_view{{rhs}}; + + auto const comparator = cudf::experimental::row::equality::two_table_comparator{ + lhs_tview, rhs_tview, cudf::get_default_stream()}; + auto const has_nulls = cudf::has_nested_nulls(lhs_tview) or cudf::has_nested_nulls(rhs_tview); + + auto const device_comparator = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}); + + using ComparatorType = + std::conditional_t, + corresponding_rows_not_equivalent>; auto differences = rmm::device_uvector( - lhs.size(), cudf::get_default_stream()); // worst case: everything different + lhs_row_indices.size(), cudf::get_default_stream()); // worst case: everything different auto input_iter = thrust::make_counting_iterator(0); - auto diff_iter = thrust::copy_if( + + thrust::transform( rmm::exec_policy(cudf::get_default_stream()), input_iter, input_iter + lhs_row_indices.size(), differences.begin(), - ComparatorType(*d_lhs, *d_rhs, *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps)); + ComparatorType( + *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps, device_comparator, *d_lhs, *d_rhs)); + + auto diff_iter = thrust::remove(rmm::exec_policy(cudf::get_default_stream()), + differences.begin(), + differences.end(), + 0); // remove the zero entries differences.resize(thrust::distance(differences.begin(), diff_iter), cudf::get_default_stream()); // shrink back down diff --git a/cpp/tests/utilities_tests/type_check_tests.cpp b/cpp/tests/utilities_tests/type_check_tests.cpp index 84a2d15d477..f65c3652dc9 100644 --- a/cpp/tests/utilities_tests/type_check_tests.cpp +++ b/cpp/tests/utilities_tests/type_check_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -147,6 +147,7 @@ TEST_F(ColumnTypeCheckTest, DifferentFixedWidth) fixed_point_column_wrapper rhs5({10000}, numeric::scale_type{0}); EXPECT_FALSE(column_types_equal(lhs5, rhs5)); + EXPECT_TRUE(column_types_equivalent(lhs5, rhs5)); // Different rep, same scale fixed_point_column_wrapper lhs6({10000}, numeric::scale_type{-1});