Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Add shallow hash function and shallow equality comparison for column_view (#9185)" #9283

Merged
merged 1 commit into from
Sep 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 0 additions & 41 deletions cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -633,45 +633,4 @@ column_view bit_cast(column_view const& input, data_type type);
*/
mutable_column_view bit_cast(mutable_column_view const& input, data_type type);

namespace detail {
/**
* @brief Computes a hash value from the shallow state of the specified column
*
* For any two columns, if `is_shallow_equivalent(c0,c1)` then `shallow_hash(c0) ==
* shallow_hash(c1)`.
*
* The complexity of computing the hash value of `input` is `O( count_descendants(input) )`, i.e.,
* it is independent of the number of elements in the column.
*
* This function does _not_ inspect the elements of `input` nor access any device memory or launch
* any kernels.
*
* @param input The `column_view` to compute hash
* @return The hash value derived from the shallow state of `input`.
*/
std::size_t shallow_hash(column_view const& input);

/**
* @brief Uses only shallow state to determine if two `column_view`s view equivalent columns
*
* Two columns are equivalent if for any operation `F` then:
* ```
* is_shallow_equivalent(c0, c1) ==> The results of F(c0) and F(c1) are equivalent
* ```
* For any two non-empty columns, `is_shallow_equivalent(c0,c1)` is true only if they view the exact
* same physical column. In other words, two physically independent columns may have exactly
* equivalent elements but their shallow state would not be equivalent.
*
* The complexity of this function is `O( min(count_descendants(lhs), count_descendants(rhs)) )`,
* i.e., it is independent of the number of elements in either column.
*
* This function does _not_ inspect the elements of `lhs` or `rhs` nor access any device memory nor
* launch any kernels.
*
* @param lhs The left `column_view` to compare
* @param rhs The right `column_view` to compare
* @return If `lhs` and `rhs` have equivalent shallow state
*/
bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs);
} // namespace detail
} // namespace cudf
36 changes: 0 additions & 36 deletions cpp/include/cudf/detail/hashing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,6 @@

#include <rmm/cuda_stream_view.hpp>

#include <cstddef>
#include <functional>

namespace cudf {
namespace detail {

Expand Down Expand Up @@ -56,38 +53,5 @@ std::unique_ptr<column> serial_murmur_hash3_32(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/* Copyright 2005-2014 Daniel James.
*
* Use, modification and distribution is subject to the Boost Software
* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
/**
* @brief Combines two hashed values into a single hashed value.
*
* Adapted from Boost hash_combine function, modified for 64-bit
* https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html
*
* @param lhs The first hashed value
* @param rhs The second hashed value
* @return Combined hash value
*/
constexpr std::size_t hash_combine(std::size_t lhs, std::size_t rhs)
{
lhs ^= rhs + 0x9e3779b97f4a7c15 + (lhs << 6) + (lhs >> 2);
return lhs;
}
} // namespace detail
} // namespace cudf

// specialization of std::hash for cudf::data_type
namespace std {
template <>
struct hash<cudf::data_type> {
std::size_t operator()(cudf::data_type const& type) const noexcept
{
return cudf::detail::hash_combine(std::hash<int32_t>{}(static_cast<int32_t>(type.id())),
std::hash<int32_t>{}(type.scale()));
}
};
} // namespace std
12 changes: 0 additions & 12 deletions cpp/include/cudf/detail/utilities/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -395,12 +395,6 @@ struct MurmurHash3_32 {
return h;
}

/* Copyright 2005-2014 Daniel James.
*
* Use, modification and distribution is subject to the Boost Software
* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
/**
* @brief Combines two hash values into a new single hash value. Called
* repeatedly to create a hash value from several variables.
Expand Down Expand Up @@ -801,12 +795,6 @@ struct IdentityHash {
IdentityHash() = default;
constexpr IdentityHash(uint32_t seed) : m_seed(seed) {}

/* Copyright 2005-2014 Daniel James.
*
* Use, modification and distribution is subject to the Boost Software
* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
/**
* @brief Combines two hash values into a new single hash value. Called
* repeatedly to create a hash value from several variables.
Expand Down
12 changes: 0 additions & 12 deletions cpp/include/cudf_test/type_lists.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,18 +315,6 @@ using FixedWidthTypesWithoutChrono = Concat<NumericTypes, FixedPointTypes>;
*/
using ComparableTypes = Concat<NumericTypes, ChronoTypes, StringTypes>;

/**
* @brief Provides a list of all compound types for use in GTest typed tests.
*
* Example:
* ```
* // Invokes all typed fixture tests for all compound types in libcudf
* TYPED_TEST_CASE(MyTypedFixture, cudf::test::CompoundTypes);
* ```
*/
using CompoundTypes =
cudf::test::Types<cudf::string_view, cudf::dictionary32, cudf::list_view, cudf::struct_view>;

/**
* @brief Provides a list of all types supported in libcudf for use in a GTest
* typed test.
Expand Down
55 changes: 0 additions & 55 deletions cpp/src/column/column_view.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@
*/

#include <cudf/column/column_view.hpp>
#include <cudf/detail/hashing.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>

#include <thrust/iterator/transform_iterator.h>

#include <algorithm>
#include <exception>
#include <numeric>
#include <vector>
Expand Down Expand Up @@ -78,59 +76,6 @@ size_type column_view_base::null_count(size_type begin, size_type end) const
? 0
: cudf::count_unset_bits(null_mask(), offset() + begin, offset() + end);
}

// Struct to use custom hash combine and fold expression
struct HashValue {
std::size_t hash;
explicit HashValue(std::size_t h) : hash{h} {}
HashValue operator^(HashValue const& other) const
{
return HashValue{hash_combine(hash, other.hash)};
}
};

template <typename... Ts>
constexpr auto hash(Ts&&... ts)
{
return (... ^ HashValue(std::hash<Ts>{}(ts))).hash;
}

std::size_t shallow_hash_impl(column_view const& c, bool is_parent_empty = false)
{
std::size_t const init = (is_parent_empty or c.is_empty())
? hash(c.type(), 0)
: hash(c.type(), c.size(), c.head(), c.null_mask(), c.offset());
return std::accumulate(c.child_begin(),
c.child_end(),
init,
[&c, is_parent_empty](std::size_t hash, auto const& child) {
return hash_combine(
hash, shallow_hash_impl(child, c.is_empty() or is_parent_empty));
});
}

std::size_t shallow_hash(column_view const& input) { return shallow_hash_impl(input); }

bool shallow_equivalent_impl(column_view const& lhs,
column_view const& rhs,
bool is_parent_empty = false)
{
bool const is_empty = (lhs.is_empty() and rhs.is_empty()) or is_parent_empty;
return (lhs.type() == rhs.type()) and
(is_empty or ((lhs.size() == rhs.size()) and (lhs.head() == rhs.head()) and
(lhs.null_mask() == rhs.null_mask()) and (lhs.offset() == rhs.offset()))) and
std::equal(lhs.child_begin(),
lhs.child_end(),
rhs.child_begin(),
rhs.child_end(),
[is_empty](auto const& lhs_child, auto const& rhs_child) {
return shallow_equivalent_impl(lhs_child, rhs_child, is_empty);
});
}
bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs)
{
return shallow_equivalent_impl(lhs, rhs);
}
} // namespace detail

// Immutable view constructor
Expand Down
1 change: 0 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ endfunction()
# - column tests ----------------------------------------------------------------------------------
ConfigureTest(COLUMN_TEST
column/bit_cast_test.cpp
column/column_view_shallow_test.cpp
column/column_test.cu
column/column_device_view_test.cu
column/compound_test.cu)
Expand Down
Loading