Skip to content

Commit

Permalink
Remove unsanitized nulls from MD5 tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice committed Jan 19, 2024
1 parent 8d883e4 commit 0188115
Showing 1 changed file with 31 additions and 59 deletions.
90 changes: 31 additions & 59 deletions cpp/tests/hashing/md5_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -55,10 +55,9 @@ TEST_F(MD5HashTest, MultiValue)
cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
{0, 100, -100, limits::min(), limits::max()});

// Different truth values should be equal
cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 1, 1, 0});
cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
cudf::test::fixed_width_column_wrapper<bool> const bools_col({0, 1, 1, 1, 0});

// Test against known outputs
auto const string_input1 = cudf::table_view({strings_col});
auto const string_input2 = cudf::table_view({strings_col, strings_col});
auto const md5_string_output1 = cudf::hashing::md5(string_input1);
Expand All @@ -68,47 +67,23 @@ TEST_F(MD5HashTest, MultiValue)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(md5_string_output1->view(), md5_string_results1);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(md5_string_output2->view(), md5_string_results2);

auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1});
auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2});
// Test non-string inputs for self-consistency
auto const input1 = cudf::table_view({strings_col, ints_col, bools_col});
auto const input2 = cudf::table_view({strings_col, ints_col, bools_col});
auto const md5_output1 = cudf::hashing::md5(input1);
auto const md5_output2 = cudf::hashing::md5(input2);
EXPECT_EQ(input1.num_rows(), md5_output1->size());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(md5_output1->view(), md5_output2->view());
}

TEST_F(MD5HashTest, MultiValueNulls)
TEST_F(MD5HashTest, EmptyNullEquivalence)
{
// Nulls with different values should be equal
cudf::test::strings_column_wrapper const strings_col1(
{"",
"Different but null!",
"A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the "
"MD5 hash function. This string needed to be longer.",
"All work and no play makes Jack a dull boy",
R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
{1, 0, 0, 1, 0});
cudf::test::strings_column_wrapper const strings_col2(
{"",
"A 60 character string to test MD5's message padding algorithm",
"Very different... but null",
"All work and no play makes Jack a dull boy",
""},
{1, 0, 0, 1, 1}); // empty string is equivalent to null

// Nulls with different values should be equal
using limits = std::numeric_limits<int32_t>;
cudf::test::fixed_width_column_wrapper<int32_t> const ints_col1(
{0, 100, -100, limits::min(), limits::max()}, {1, 0, 0, 1, 1});
cudf::test::fixed_width_column_wrapper<int32_t> const ints_col2(
{0, -200, 200, limits::min(), limits::max()}, {1, 0, 0, 1, 1});
// Test that empty strings hash the same as nulls
cudf::test::strings_column_wrapper const strings_col1({"", ""}, {1, 0});
cudf::test::strings_column_wrapper const strings_col2({"", ""}, {0, 1});

// Nulls with different values should be equal
// Different truth values should be equal
cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1});
cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1});

auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1});
auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2});
auto const input1 = cudf::table_view({strings_col1});
auto const input2 = cudf::table_view({strings_col2});

auto const output1 = cudf::hashing::md5(input1);
auto const output2 = cudf::hashing::md5(input2);
Expand All @@ -117,10 +92,12 @@ TEST_F(MD5HashTest, MultiValueNulls)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
}

TEST_F(MD5HashTest, StringListsNulls)
TEST_F(MD5HashTest, StringLists)
{
auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; });

// Test of data serialization: a string should hash the same as a list of
// strings that concatenate to the same input.
cudf::test::strings_column_wrapper const strings_col(
{"",
"A 60 character string to test MD5's message padding algorithm",
Expand All @@ -131,7 +108,7 @@ TEST_F(MD5HashTest, StringListsNulls)

cudf::test::lists_column_wrapper<cudf::string_view> strings_list_col(
{{""},
{{"NULL", "A 60 character string to test MD5's message padding algorithm"}, validity},
{{"", "A 60 character string to test MD5's message padding algorithm"}, validity},
{"A very long (greater than 128 bytes/char string) to test a multi hash-step data point in "
"the "
"MD5 hash function. This string needed to be longer.",
Expand All @@ -153,7 +130,7 @@ class MD5HashTestTyped : public cudf::test::BaseFixture {};

TYPED_TEST_SUITE(MD5HashTestTyped, cudf::test::NumericTypes);

TYPED_TEST(MD5HashTestTyped, Equality)
TYPED_TEST(MD5HashTestTyped, NoNulls)
{
cudf::test::fixed_width_column_wrapper<TypeParam> const col({0, 127, 1, 2, 8});
auto const input = cudf::table_view({col});
Expand All @@ -166,31 +143,26 @@ TYPED_TEST(MD5HashTestTyped, Equality)
CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
}

TYPED_TEST(MD5HashTestTyped, EqualityNulls)
TYPED_TEST(MD5HashTestTyped, WithNulls)
{
using T = TypeParam;

// Nulls with different values should be equal
cudf::test::fixed_width_column_wrapper<T> const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
cudf::test::fixed_width_column_wrapper<T> const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1});

auto const input1 = cudf::table_view({col1});
auto const input2 = cudf::table_view({col2});
cudf::test::fixed_width_column_wrapper<TypeParam> const col({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
auto const input = cudf::table_view({col});

auto const output1 = cudf::hashing::md5(input1);
auto const output2 = cudf::hashing::md5(input2);
// Hash of same input should be equal
auto const output1 = cudf::hashing::md5(input);
auto const output2 = cudf::hashing::md5(input);

EXPECT_EQ(input1.num_rows(), output1->size());
EXPECT_EQ(input.num_rows(), output1->size());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(output1->view(), output2->view());
}

TEST_F(MD5HashTest, TestBoolListsWithNulls)
{
cudf::test::fixed_width_column_wrapper<bool> const col1({0, 255, 255, 16, 27, 18, 100, 1, 2},
cudf::test::fixed_width_column_wrapper<bool> const col1({0, 0, 0, 0, 1, 1, 1, 0, 0},
{1, 0, 0, 0, 1, 1, 1, 0, 0});
cudf::test::fixed_width_column_wrapper<bool> const col2({0, 255, 255, 32, 81, 68, 3, 101, 4},
cudf::test::fixed_width_column_wrapper<bool> const col2({0, 0, 0, 1, 0, 1, 0, 1, 0},
{1, 0, 0, 1, 0, 1, 0, 1, 0});
cudf::test::fixed_width_column_wrapper<bool> const col3({0, 255, 255, 64, 49, 42, 5, 6, 102},
cudf::test::fixed_width_column_wrapper<bool> const col3({0, 0, 0, 1, 1, 0, 0, 0, 1},
{1, 0, 0, 1, 1, 0, 0, 0, 1});

auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; });
Expand Down Expand Up @@ -218,16 +190,16 @@ TYPED_TEST(MD5HashListTestTyped, TestListsWithNulls)
{
using T = TypeParam;

cudf::test::fixed_width_column_wrapper<T> const col1({0, 255, 255, 16, 27, 18, 100, 1, 2},
cudf::test::fixed_width_column_wrapper<T> const col1({0, 0, 0, 0, 27, 18, 100, 0, 0},
{1, 0, 0, 0, 1, 1, 1, 0, 0});
cudf::test::fixed_width_column_wrapper<T> const col2({0, 255, 255, 32, 81, 68, 3, 101, 4},
cudf::test::fixed_width_column_wrapper<T> const col2({0, 0, 0, 32, 0, 68, 0, 101, 0},
{1, 0, 0, 1, 0, 1, 0, 1, 0});
cudf::test::fixed_width_column_wrapper<T> const col3({0, 255, 255, 64, 49, 42, 5, 6, 102},
cudf::test::fixed_width_column_wrapper<T> const col3({0, 0, 0, 64, 49, 0, 0, 0, 102},
{1, 0, 0, 1, 1, 0, 0, 0, 1});

auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; });
cudf::test::lists_column_wrapper<T> const list_col(
{{0, 0, 0}, {127}, {}, {{32, 127, 64}, validity}, {27, 49}, {18, 68}, {100}, {101}, {102}},
{{0, 0, 0}, {}, {}, {{32, 0, 64}, validity}, {27, 49}, {18, 68}, {100}, {101}, {102}},
validity);

auto const input1 = cudf::table_view({col1, col2, col3});
Expand Down

0 comments on commit 0188115

Please sign in to comment.