Skip to content

Commit

Permalink
add shallow_equal(column_view) and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
karthikeyann committed Sep 7, 2021
1 parent 2365d07 commit 88726a4
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 0 deletions.
17 changes: 17 additions & 0 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,5 +652,22 @@ namespace detail {
* @return The hash value
*/
size_t shallow_hash(column_view const& input);

/**
* @brief Equality operator for column views based on the shallow state of the column view.
*
* Only shallow states used for the hash computation are: type, size, data pointer, null_mask
* pointer, offset and the column_view of the children recursively. Note that `null_count` is not
* used.
*
* Note: This equality function will consider a column not equal to a copy of the same column with
* exactly same contents. It is guarenteed to return true for same column_view only, even if the
* underlying data changes.
*
* @param lhs The left `column_view` to compare
* @param rhs The right `column_view` to compare
* @return true if the shallow states of the two column views are equal
*/
bool shallow_equal(column_view const& lhs, column_view const& rhs);
} // namespace detail
} // namespace cudf
14 changes: 14 additions & 0 deletions cpp/src/column/column_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,20 @@ size_t shallow_hash(column_view const& input)
});
return hash;
}

bool is_shallow_equal(column_view const& lhs, column_view const& rhs)
{
return (lhs.type() == rhs.type()) and (lhs.size() == rhs.size()) and
(lhs.head() == rhs.head()) and (lhs.null_mask() == rhs.null_mask()) and
(lhs.offset() == rhs.offset()) and
std::equal(lhs.child_begin(),
lhs.child_end(),
rhs.child_begin(),
rhs.child_end(),
[](auto const& lhs_child, auto const& rhs_child) {
return is_shallow_equal(lhs_child, rhs_child);
});
}
} // namespace detail

// Immutable view constructor
Expand Down
133 changes: 133 additions & 0 deletions cpp/tests/column/column_view_shallow_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,136 @@ TYPED_TEST(ColumnViewShallowTests, shallow_hash)
}
}
}

TYPED_TEST(ColumnViewShallowTests, shallow_equal)
{
using namespace cudf::detail;
auto col = example_column<TypeParam>();
auto col_view = cudf::column_view{*col};
// same = same hash
{
EXPECT_TRUE(shallow_equal(col_view, col_view));
}
// copy column_view = same hash
{
auto col_view_copy = col_view;
EXPECT_TRUE(shallow_equal(col_view, col_view_copy));
}
// copy column = diff hash
{
auto col_new = std::make_unique<cudf::column>(*col);
auto col_view_copy = col_new->view();
EXPECT_FALSE(shallow_equal(col_view, col_view_copy));
}
// new column_view from column = same hash
{
auto col_view_new = cudf::column_view{*col};
EXPECT_TRUE(shallow_equal(col_view, col_view_new));
}
// update data + new column_view = same hash.
{
// update data by modifying some bits: fixed_width, string, dict, list, struct
if constexpr (cudf::is_fixed_width<TypeParam>()) {
// Update data
auto data = reinterpret_cast<cudf::bitmask_type*>(col->mutable_view().head());
cudf::set_null_mask(data, 2, 64, true);
} else {
// Update child(0).data
auto data = reinterpret_cast<cudf::bitmask_type*>(col->child(0).mutable_view().head());
cudf::set_null_mask(data, 2, 64, true);
}
auto col_view_new = cudf::column_view{*col};
EXPECT_TRUE(shallow_equal(col_view, col_view_new));
}
// add null_mask + new column_view = diff hash.
{
col->set_null_mask(cudf::create_null_mask(col->size(), cudf::mask_state::ALL_VALID));
auto col_view_new = cudf::column_view{*col};
EXPECT_FALSE(shallow_equal(col_view, col_view_new));
col_view_new.null_count();
EXPECT_FALSE(shallow_equal(col_view, col_view_new));
auto col_view_new2 = cudf::column_view{*col};
EXPECT_TRUE(shallow_equal(col_view_new, col_view_new2));
}
col_view = cudf::column_view{*col}; // updating after adding null_mask
// update nulls + new column_view = same hash.
{
cudf::set_null_mask(col->mutable_view().null_mask(), 2, 4, false);
auto col_view_new = cudf::column_view{*col};
EXPECT_TRUE(shallow_equal(col_view, col_view_new));
}
// set_null_count + new column_view = same hash. set_null_count(UNKNOWN_NULL_COUNT)
{
col->set_null_count(cudf::UNKNOWN_NULL_COUNT);
auto col_view_new = cudf::column_view{*col};
EXPECT_TRUE(shallow_equal(col_view, col_view_new));
col->set_null_count(col->size());
auto col_view_new2 = cudf::column_view{*col};
EXPECT_TRUE(shallow_equal(col_view, col_view_new2));
}

// column_view, diff column = diff hash.
{
auto col_diff = example_column<TypeParam>();
auto col_view_diff = cudf::column_view{*col_diff};
EXPECT_FALSE(shallow_equal(col_view, col_view_diff));
}
// column_view, sliced[0, size] = same hash (for split too)
{
auto col_sliced = cudf::slice(col_view, {0, col_view.size()});
EXPECT_TRUE(shallow_equal(col_view, col_sliced[0]));
auto col_split = cudf::split(col_view, {0});
EXPECT_FALSE(shallow_equal(col_view, col_split[0]));
EXPECT_TRUE(shallow_equal(col_view, col_split[1]));
}
// column_view, sliced[n:] = diff hash (for split too)
{
auto col_sliced = cudf::slice(col_view, {1, col_view.size()});
EXPECT_FALSE(shallow_equal(col_view, col_sliced[0]));
auto col_split = cudf::split(col_view, {1});
EXPECT_FALSE(shallow_equal(col_view, col_split[0]));
EXPECT_FALSE(shallow_equal(col_view, col_split[1]));
}
// column_view, bit_cast = diff hash
{
if constexpr (std::is_integral_v<TypeParam> and not std::is_same_v<TypeParam, bool>) {
using newType = std::conditional_t<std::is_signed_v<TypeParam>,
std::make_unsigned_t<TypeParam>,
std::make_signed_t<TypeParam>>;
auto new_type = cudf::data_type(cudf::type_to_id<newType>());
auto col_bitcast = cudf::bit_cast(col_view, new_type);
EXPECT_FALSE(shallow_equal(col_view, col_bitcast));
}
}
// mutable_column_view, column_view = same hash
{
auto col_mutable = cudf::mutable_column_view{*col};
EXPECT_TRUE(shallow_equal(col_mutable, col_view));
}
// mutable_column_view, modified mutable_column_view = same hash
// update the children column data = same hash
{
auto col_mutable = cudf::mutable_column_view{*col};
if constexpr (cudf::is_fixed_width<TypeParam>()) {
// Update data
auto data = reinterpret_cast<cudf::bitmask_type*>(col->mutable_view().head());
cudf::set_null_mask(data, 1, 32, false);
} else {
// Update child(0).data
auto data = reinterpret_cast<cudf::bitmask_type*>(col->child(0).mutable_view().head());
cudf::set_null_mask(data, 1, 32, false);
}
EXPECT_TRUE(shallow_equal(col_view, col_mutable));
auto col_mutable_new = cudf::mutable_column_view{*col};
EXPECT_TRUE(shallow_equal(col_mutable, col_mutable_new));
}
// update the children column_views = diff hash
{
if constexpr (cudf::is_nested<TypeParam>()) {
col->child(0).set_null_mask(
cudf::create_null_mask(col->child(0).size(), cudf::mask_state::ALL_NULL));
auto col_child_updated = cudf::mutable_column_view{*col};
EXPECT_FALSE(shallow_equal(col_view, col_child_updated));
}
}
}

0 comments on commit 88726a4

Please sign in to comment.