Skip to content

Commit

Permalink
Expose max nesting depth in hash function to plugin
Browse files Browse the repository at this point in the history
Signed-off-by: Yan Feng <[email protected]>
  • Loading branch information
ustcfy committed Dec 11, 2024
1 parent bd8b7e6 commit e989b56
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 2 deletions.
9 changes: 9 additions & 0 deletions src/main/cpp/src/HashJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@

extern "C" {

JNIEXPORT jint JNICALL Java_com_nvidia_spark_rapids_jni_Hash_getMaxNestedDepth(JNIEnv* env, jclass)
{
try {
cudf::jni::auto_set_device(env);
return spark_rapids_jni::MAX_NESTED_DEPTH;
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_murmurHash32(
JNIEnv* env, jclass, jint seed, jlongArray column_handles)
{
Expand Down
1 change: 1 addition & 0 deletions src/main/cpp/src/hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
namespace spark_rapids_jni {

constexpr int64_t DEFAULT_XXHASH64_SEED = 42;
constexpr int MAX_NESTED_DEPTH = 8;

/**
* @brief Computes the murmur32 hash value of each row in the input set of columns.
Expand Down
3 changes: 1 addition & 2 deletions src/main/cpp/src/hive_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "hash.cuh"
#include "hash.hpp"

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
Expand All @@ -37,8 +38,6 @@ using hive_hash_value_t = int32_t;
constexpr hive_hash_value_t HIVE_HASH_FACTOR = 31;
constexpr hive_hash_value_t HIVE_INIT_HASH = 0;

constexpr int MAX_NESTED_DEPTH = 8;

hive_hash_value_t __device__ inline compute_int(int32_t key) { return key; }

hive_hash_value_t __device__ inline compute_long(int64_t key)
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/com/nvidia/spark/rapids/jni/Hash.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ public class Hash {
// there doesn't appear to be a useful constant in spark to reference. this could break.
static final long DEFAULT_XXHASH64_SEED = 42;

public static final int MAX_NESTED_DEPTH = getMaxNestedDepth();

static {
NativeDepsLoader.loadNativeDeps();
}
Expand Down Expand Up @@ -101,6 +103,8 @@ public static ColumnVector hiveHash(ColumnView columns[]) {
return new ColumnVector(hiveHash(columnViews));
}

private static native int getMaxNestedDepth();

private static native long murmurHash32(int seed, long[] viewHandles) throws CudfException;

private static native long xxhash64(long seed, long[] viewHandles) throws CudfException;
Expand Down

0 comments on commit e989b56

Please sign in to comment.