Skip to content

Commit

Permalink
Add more unit tests for cudf::distinct for nested types with sliced…
Browse files Browse the repository at this point in the history
… input (#10860)

This adds more nested types tests for `cudf::distinct`, including cases of `List<Struct<...>>` and `Struct<List<...>>`, and the input columns are sliced.

Partially addresses #10742.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - David Wendt (https://github.com/davidwendt)

URL: #10860
  • Loading branch information
ttnghia authored May 16, 2022
1 parent 09b7045 commit db07df1
Showing 1 changed file with 142 additions and 6 deletions.
148 changes: 142 additions & 6 deletions cpp/tests/stream_compaction/distinct_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,20 @@
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/iterator_utilities.hpp>
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/copying.hpp>
#include <cudf/sorting.hpp>
#include <cudf/stream_compaction.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <algorithm>
#include <cmath>

Expand Down Expand Up @@ -111,6 +112,35 @@ TEST_F(Distinct, NonNullTable)
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, sorted_result->view());
}

TEST_F(Distinct, SlicedNonNullTable)
{
using int32s_col = cudf::test::fixed_width_column_wrapper<int32_t>;
using floats_col = cudf::test::fixed_width_column_wrapper<float>;
auto constexpr dont_care = int32_t{0};

auto const col1 = int32s_col{dont_care, dont_care, 6, 6, 3, 5, 8, 5, dont_care};
auto const col2 = floats_col{dont_care, dont_care, 6, 6, 3, 4, 9, 4, dont_care};
auto const col1_key = int32s_col{dont_care, dont_care, 20, 20, 20, 19, 21, 9, dont_care};
auto const col2_key = int32s_col{dont_care, dont_care, 19, 19, 20, 20, 9, 21, dont_care};

auto const input_original = cudf::table_view{{col1, col2, col1_key, col2_key}};
auto const input = cudf::slice(input_original, {2, 8})[0];
auto const keys = std::vector<cudf::size_type>{2, 3};

// The expected table would be sorted in ascending order with respect to keys.
auto const exp_col1 = int32s_col{{5, 5, 6, 3, 8}};
auto const exp_col2 = floats_col{{4, 4, 6, 3, 9}};
auto const exp_col1_key = int32s_col{{9, 19, 20, 20, 21}};
auto const exp_col2_key = int32s_col{{21, 20, 19, 20, 9}};
auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_col1_key, exp_col2_key}};

auto const result = cudf::distinct(input, keys);
auto const key_view = result->select(keys.begin(), keys.end());
auto const sorted_result = cudf::sort_by_key(result->view(), key_view);

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, sorted_result->view());
}

TEST_F(Distinct, WithNull)
{
cudf::test::fixed_width_column_wrapper<int32_t> col{{5, 4, 4, 1, 8, 1}, {1, 0, 1, 1, 1, 1}};
Expand Down Expand Up @@ -160,6 +190,28 @@ TEST_F(Distinct, BasicList)
CUDF_TEST_EXPECT_TABLES_EQUAL(expect, *sorted_result);
}

TEST_F(Distinct, BasicSlicedLists)
{
using int32s_col = cudf::test::fixed_width_column_wrapper<int32_t>;
using lists_col = cudf::test::lists_column_wrapper<int32_t>;
auto constexpr dont_care = int32_t{0};

auto const idx = int32s_col{dont_care, dont_care, 1, 2, 1, 3, 4, 5, 5, 6, 4, 4, dont_care};
auto const col = lists_col{
{0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {5, 5}};
auto const input_original = cudf::table_view({idx, col});
auto const input = cudf::slice(input_original, {2, 12})[0];

auto const exp_idx = int32s_col{1, 2, 3, 4, 5, 6};
auto const exp_val = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
auto const expected = cudf::table_view({exp_idx, exp_val});

auto const result = cudf::distinct(input, {1});
auto const sorted_result = cudf::sort_by_key(*result, result->select({0}));

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *sorted_result);
}

TEST_F(Distinct, NullableList)
{
using LCW = cudf::test::lists_column_wrapper<uint64_t>;
Expand Down Expand Up @@ -245,6 +297,66 @@ TEST_F(Distinct, ListOfStruct)
CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *sorted_result);
}

TEST_F(Distinct, SlicedListsOfStructs)
{
// Constructing a list of struct of two elements
// 0. [] == <- Don't care
// 1. [] != <- Don't care
// 2. Null == <- Don't care
// 3. Null != <- Don't care
// 4. [Null, Null] != <- Don't care
// 5. [Null] == <- Don't care
// 6. [Null] == <- Don't care
// 7. [Null] != <- Don't care
// 8. [{Null, Null}] !=
// 9. [{1,'a'}, {2,'b'}] !=
// 10. [{0,'a'}, {2,'b'}] !=
// 11. [{0,'a'}, {2,'c'}] ==
// 12. [{0,'a'}, {2,'c'}] !=
// 13. [{0,Null}] ==
// 14. [{0,Null}] !=
// 15. [{Null, 'b'}] == <- Don't care
// 16. [{Null, 'b'}] <- Don't care

using int32s_col = cudf::test::fixed_width_column_wrapper<int32_t>;
using strings_col = cudf::test::strings_column_wrapper;
using structs_col = cudf::test::structs_column_wrapper;
using cudf::test::iterators::nulls_at;

auto const structs = [] {
auto child1 =
int32s_col{{-1, -1, 0, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, 1, 2}, nulls_at({5, 16, 17})};
auto child2 = strings_col{
{"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"},
nulls_at({5, 14, 15})};
return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})};
}();

auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18};
auto const lists_nullmask = std::vector<bool>{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
auto const nullmask_buf =
cudf::test::detail::make_null_mask(lists_nullmask.begin(), lists_nullmask.end());
auto const lists = cudf::column_view(cudf::data_type(cudf::type_id::LIST),
17,
nullptr,
static_cast<cudf::bitmask_type const*>(nullmask_buf.data()),
cudf::UNKNOWN_NULL_COUNT,
0,
{offsets, structs});

auto const idx = int32s_col{1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10};
auto const input_original = cudf::table_view({idx, lists});
auto const input = cudf::slice(input_original, {8, 15})[0];

auto const result = cudf::distinct(input, {1});
auto const sorted_result = cudf::sort_by_key(*result, result->select({0}));

auto const exp_map = cudf::test::fixed_width_column_wrapper<cudf::size_type>{8, 9, 10, 11, 13};
auto const expected_table = cudf::gather(input_original, exp_map);

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected_table, *sorted_result);
}

TEST_F(Distinct, StructOfStruct)
{
using FWCW = cudf::test::fixed_width_column_wrapper<int>;
Expand Down Expand Up @@ -303,6 +415,30 @@ TEST_F(Distinct, StructOfStruct)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(sliced_expect->get_column(1), sorted_sliced_result->get_column(1));
}

TEST_F(Distinct, SlicedStructsOfLists)
{
using lists_col = cudf::test::lists_column_wrapper<int32_t>;
using structs_col = cudf::test::structs_column_wrapper;

auto const structs = [] {
auto child = lists_col{
{0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {5, 5}};
return structs_col{{child}};
}();

auto const input_original = cudf::table_view({structs});
auto const input = cudf::slice(input_original, {2, 12})[0];

auto const expected_structs = [] {
auto child = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
return structs_col{{child}};
}();
auto const expected = cudf::table_view({expected_structs});

auto const result = cudf::distinct(input, {0});
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

TEST_F(Distinct, StructWithNullElement)
{
using FWCW = cudf::test::fixed_width_column_wrapper<int>;
Expand Down

0 comments on commit db07df1

Please sign in to comment.