Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more unit tests for cudf::distinct for nested types with sliced input #10860

Merged
merged 5 commits into from
May 16, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 142 additions & 6 deletions cpp/tests/stream_compaction/distinct_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,20 @@
* limitations under the License.
*/

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/iterator_utilities.hpp>
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <cudf/copying.hpp>
#include <cudf/sorting.hpp>
#include <cudf/stream_compaction.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>
#include <cudf_test/table_utilities.hpp>
#include <cudf_test/type_lists.hpp>

#include <algorithm>
#include <cmath>

Expand Down Expand Up @@ -111,6 +112,35 @@ TEST_F(Distinct, NonNullTable)
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, sorted_result->view());
}

TEST_F(Distinct, SlicedNonNullTable)
{
using int32s_col = cudf::test::fixed_width_column_wrapper<int32_t>;
using floats_col = cudf::test::fixed_width_column_wrapper<float>;
auto constexpr dont_care = int32_t{0};

auto const col1 = int32s_col{dont_care, dont_care, 6, 6, 3, 5, 8, 5, dont_care};
auto const col2 = floats_col{dont_care, dont_care, 6, 6, 3, 4, 9, 4, dont_care};
auto const col1_key = int32s_col{dont_care, dont_care, 20, 20, 20, 19, 21, 9, dont_care};
auto const col2_key = int32s_col{dont_care, dont_care, 19, 19, 20, 20, 9, 21, dont_care};

auto const input_original = cudf::table_view{{col1, col2, col1_key, col2_key}};
auto const input = cudf::slice(input_original, {2, 8})[0];
auto const keys = std::vector<cudf::size_type>{2, 3};

// The expected table would be sorted in ascending order with respect to keys.
auto const exp_col1 = int32s_col{{5, 5, 6, 3, 8}};
auto const exp_col2 = floats_col{{4, 4, 6, 3, 9}};
auto const exp_col1_key = int32s_col{{9, 19, 20, 20, 21}};
auto const exp_col2_key = int32s_col{{21, 20, 19, 20, 9}};
auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_col1_key, exp_col2_key}};

auto const result = cudf::distinct(input, keys);
auto const key_view = result->select(keys.begin(), keys.end());
auto const sorted_result = cudf::sort_by_key(result->view(), key_view);

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, sorted_result->view());
}

TEST_F(Distinct, WithNull)
{
cudf::test::fixed_width_column_wrapper<int32_t> col{{5, 4, 4, 1, 8, 1}, {1, 0, 1, 1, 1, 1}};
Expand Down Expand Up @@ -160,6 +190,28 @@ TEST_F(Distinct, BasicList)
CUDF_TEST_EXPECT_TABLES_EQUAL(expect, *sorted_result);
}

TEST_F(Distinct, BasicSlicedLists)
{
using int32s_col = cudf::test::fixed_width_column_wrapper<int32_t>;
using lists_col = cudf::test::lists_column_wrapper<int32_t>;
auto constexpr dont_care = int32_t{0};

auto const idx = int32s_col{dont_care, dont_care, 1, 2, 1, 3, 4, 5, 5, 6, 4, 4, dont_care};
auto const col = lists_col{
{0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {5, 5}};
auto const input_original = cudf::table_view({idx, col});
auto const input = cudf::slice(input_original, {2, 12})[0];

auto const exp_idx = int32s_col{1, 2, 3, 4, 5, 6};
auto const exp_val = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
auto const expected = cudf::table_view({exp_idx, exp_val});

auto const result = cudf::distinct(input, {1});
auto const sorted_result = cudf::sort_by_key(*result, result->select({0}));

CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *sorted_result);
}

TEST_F(Distinct, NullableList)
{
using LCW = cudf::test::lists_column_wrapper<uint64_t>;
Expand Down Expand Up @@ -245,6 +297,66 @@ TEST_F(Distinct, ListOfStruct)
CUDF_TEST_EXPECT_TABLES_EQUAL(*expect_table, *sorted_result);
}

TEST_F(Distinct, SlicedListsOfStructs)
{
// Constructing a list of struct of two elements
// 0. [] == <- Don't care
// 1. [] != <- Don't care
// 2. Null == <- Don't care
// 3. Null != <- Don't care
// 4. [Null, Null] != <- Don't care
// 5. [Null] == <- Don't care
// 6. [Null] == <- Don't care
// 7. [Null] != <- Don't care
// 8. [{Null, Null}] !=
// 9. [{1,'a'}, {2,'b'}] !=
// 10. [{0,'a'}, {2,'b'}] !=
// 11. [{0,'a'}, {2,'c'}] ==
// 12. [{0,'a'}, {2,'c'}] !=
// 13. [{0,Null}] ==
// 14. [{0,Null}] !=
// 15. [{Null, 'b'}] == <- Don't care
// 16. [{Null, 'b'}] <- Don't care

using int32s_col = cudf::test::fixed_width_column_wrapper<int32_t>;
using strings_col = cudf::test::strings_column_wrapper;
using structs_col = cudf::test::structs_column_wrapper;
using cudf::test::iterators::nulls_at;

auto const structs = [] {
auto child1 =
int32s_col{{-1, -1, 0, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 0, 0, 1, 2}, nulls_at({5, 16, 17})};
auto child2 = strings_col{
{"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"},
nulls_at({5, 14, 15})};
return structs_col{{child1, child2}, nulls_at({0, 1, 2, 3, 4})};
}();

auto const offsets = int32s_col{0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 8, 10, 12, 14, 15, 16, 17, 18};
auto const lists_nullmask = std::vector<bool>{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
auto const nullmask_buf =
cudf::test::detail::make_null_mask(lists_nullmask.begin(), lists_nullmask.end());
auto const lists = cudf::column_view(cudf::data_type(cudf::type_id::LIST),
17,
nullptr,
static_cast<cudf::bitmask_type const*>(nullmask_buf.data()),
cudf::UNKNOWN_NULL_COUNT,
0,
{offsets, structs});

auto const idx = int32s_col{1, 1, 2, 2, 3, 4, 4, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10};
auto const input_original = cudf::table_view({idx, lists});
auto const input = cudf::slice(input_original, {8, 15})[0];

auto const result = cudf::distinct(input, {1});
auto const sorted_result = cudf::sort_by_key(*result, result->select({0}));

auto const exp_map = cudf::test::fixed_width_column_wrapper<cudf::size_type>{8, 9, 10, 11, 13};
auto const expected_table = cudf::gather(input_original, exp_map);

CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*expected_table, *sorted_result);
}

TEST_F(Distinct, StructOfStruct)
{
using FWCW = cudf::test::fixed_width_column_wrapper<int>;
Expand Down Expand Up @@ -303,6 +415,30 @@ TEST_F(Distinct, StructOfStruct)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(sliced_expect->get_column(1), sorted_sliced_result->get_column(1));
}

TEST_F(Distinct, SlicedStructsOfLists)
{
using lists_col = cudf::test::lists_column_wrapper<int32_t>;
using structs_col = cudf::test::structs_column_wrapper;

auto const structs = [] {
auto child = lists_col{
{0, 0}, {0, 0}, {1}, {1, 1}, {1}, {1, 2}, {2, 2}, {2}, {2}, {2, 1}, {2, 2}, {2, 2}, {5, 5}};
return structs_col{{child}};
}();

auto const input_original = cudf::table_view({structs});
auto const input = cudf::slice(input_original, {2, 12})[0];

auto const expected_structs = [] {
auto child = lists_col{{1}, {1, 1}, {1, 2}, {2, 2}, {2}, {2, 1}};
return structs_col{{child}};
}();
auto const expected = cudf::table_view({expected_structs});

auto const result = cudf::distinct(input, {0});
CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result);
}

TEST_F(Distinct, StructWithNullElement)
{
using FWCW = cudf::test::fixed_width_column_wrapper<int>;
Expand Down