Skip to content

Commit

Permalink
replace gxhash with foldhash (#624)
Browse files Browse the repository at this point in the history
Co-authored-by: zhangli20 <[email protected]>
  • Loading branch information
richox and zhangli20 authored Oct 24, 2024
1 parent 99b6c9b commit 716c876
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 16 deletions.
17 changes: 7 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion native-engine/datafusion-ext-plans/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ datafusion-ext-commons = { workspace = true }
datafusion-ext-exprs = { workspace = true }
datafusion-ext-functions = { workspace = true }
derivative = "2.2.0"
foldhash = "0.1.3"
futures = "0.3"
gxhash = "3.4.1"
hashbrown = "0.14.5"
itertools = "0.13.0"
jni = "0.20.0"
Expand Down
5 changes: 4 additions & 1 deletion native-engine/datafusion-ext-plans/src/agg/acc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::{
any::Any,
hash::BuildHasher,
io::{Cursor, Read, Write},
mem::{size_of, size_of_val},
};
Expand Down Expand Up @@ -932,7 +933,9 @@ impl InternalSet {
#[inline]
pub fn acc_hash(value: impl AsRef<[u8]>) -> u64 {
const ACC_HASH_SEED: u32 = 0x7BCB48DA;
gxhash::gxhash64(value.as_ref(), ACC_HASH_SEED as i64)
const HASHER: foldhash::fast::FixedState =
foldhash::fast::FixedState::with_seed(ACC_HASH_SEED as u64);
HASHER.hash_one(value.as_ref())
}

impl AggDynSet {
Expand Down
9 changes: 7 additions & 2 deletions native-engine/datafusion-ext-plans/src/agg/agg_hash_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::simd::{cmp::SimdPartialEq, Simd};
use std::{
hash::BuildHasher,
simd::{cmp::SimdPartialEq, Simd},
};

use datafusion::common::Result;
use datafusion_ext_commons::{
Expand Down Expand Up @@ -236,5 +239,7 @@ impl AggHashMap {
pub fn agg_hash(value: impl AsRef<[u8]>) -> u32 {
// 32-bits non-zero hash
const AGG_HASH_SEED_HASHING: i64 = 0x3F6F1B93;
gxhash::gxhash32(value.as_ref(), AGG_HASH_SEED_HASHING) | 0x80000000
const HASHER: foldhash::fast::FixedState =
foldhash::fast::FixedState::with_seed(AGG_HASH_SEED_HASHING as u64);
HASHER.hash_one(value.as_ref()) as u32 | 0x80000000
}
6 changes: 5 additions & 1 deletion native-engine/datafusion-ext-plans/src/agg/agg_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

use std::{
hash::BuildHasher,
io::{Cursor, Read, Write},
sync::{Arc, Weak},
};
Expand Down Expand Up @@ -819,5 +820,8 @@ impl<'a> KeyForRadixTournamentTree for RecordsSpillCursor<'a> {
#[inline]
fn bucket_id(key: impl AsRef<[u8]>) -> u16 {
const AGG_HASH_SEED_HASHING: i64 = 0xC732BD66;
(gxhash::gxhash32(key.as_ref(), AGG_HASH_SEED_HASHING) % NUM_SPILL_BUCKETS as u32) as u16
const HASHER: foldhash::fast::FixedState =
foldhash::fast::FixedState::with_seed(AGG_HASH_SEED_HASHING as u64);
let hash = HASHER.hash_one(key.as_ref()) as u32;
(hash % NUM_SPILL_BUCKETS as u32) as u16
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

use std::{
fmt::{Debug, Formatter},
hash::{BuildHasher, Hasher},
io::Cursor,
mem::MaybeUninit,
simd::{cmp::SimdPartialEq, Simd},
Expand Down Expand Up @@ -434,8 +435,13 @@ pub fn join_hash_map_schema(data_schema: &SchemaRef) -> SchemaRef {
#[inline]
pub fn join_create_hashes(num_rows: usize, key_columns: &[ArrayRef]) -> Vec<u32> {
const JOIN_HASH_RANDOM_SEED: u32 = 0x1E39FA04;
const HASHER: foldhash::fast::FixedState =
foldhash::fast::FixedState::with_seed(JOIN_HASH_RANDOM_SEED as u64);
let mut hashes = create_hashes(num_rows, key_columns, JOIN_HASH_RANDOM_SEED, |v, h| {
gxhash::gxhash32(v, h as i64)
let mut hasher = HASHER.build_hasher();
hasher.write_u32(h);
hasher.write(v);
hasher.finish() as u32
});

// use 31-bit non-zero hash
Expand Down

0 comments on commit 716c876

Please sign in to comment.