Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(group_by): support two-level hashmap #5075

Merged
merged 5 commits into from
May 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions query/src/common/hashtable/hash_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ use std::alloc::Layout;
use std::marker::PhantomData;
use std::mem;

use crate::common::hashtable::hash_table_grower::Grower;
use crate::common::hashtable::hash_table_grower::HashTableGrower;
use crate::common::HashTableEntity;
use crate::common::HashTableIter;
use crate::common::HashTableIteratorKind;
use crate::common::HashTableKeyable;

pub struct HashTable<Key: HashTableKeyable, Entity: HashTableEntity<Key>> {
pub struct HashTable<Key: HashTableKeyable, Entity: HashTableEntity<Key>, Grower: HashTableGrower> {
size: usize,
grower: Grower,
entities: *mut Entity,
Expand All @@ -35,7 +35,9 @@ pub struct HashTable<Key: HashTableKeyable, Entity: HashTableEntity<Key>> {
generics_hold: PhantomData<Key>,
}

impl<Key: HashTableKeyable, Entity: HashTableEntity<Key>> Drop for HashTable<Key, Entity> {
impl<Key: HashTableKeyable, Entity: HashTableEntity<Key>, Grower: HashTableGrower> Drop
for HashTable<Key, Entity, Grower>
{
fn drop(&mut self) {
unsafe {
let size = (self.grower.max_size() as usize) * mem::size_of::<Entity>();
Expand All @@ -53,8 +55,10 @@ impl<Key: HashTableKeyable, Entity: HashTableEntity<Key>> Drop for HashTable<Key
}
}

impl<Key: HashTableKeyable, Entity: HashTableEntity<Key>> HashTable<Key, Entity> {
pub fn create() -> HashTable<Key, Entity> {
impl<Key: HashTableKeyable, Entity: HashTableEntity<Key>, Grower: HashTableGrower>
HashTable<Key, Entity, Grower>
{
pub fn create() -> HashTable<Key, Entity, Grower> {
let size = (1 << 8) * mem::size_of::<Entity>();
unsafe {
let layout = Layout::from_size_align_unchecked(size, mem::align_of::<Entity>());
Expand Down Expand Up @@ -83,8 +87,12 @@ impl<Key: HashTableKeyable, Entity: HashTableEntity<Key>> HashTable<Key, Entity>
}

#[inline(always)]
pub fn iter(&self) -> HashTableIter<Key, Entity> {
HashTableIter::create(self.grower.max_size(), self.entities, self.zero_entity)
pub fn iter(&self) -> HashTableIteratorKind<Key, Entity> {
HashTableIteratorKind::create_hash_table_iter(
self.grower.max_size(),
self.entities,
self.zero_entity,
)
}

#[inline(always)]
Expand All @@ -96,6 +104,14 @@ impl<Key: HashTableKeyable, Entity: HashTableEntity<Key>> HashTable<Key, Entity>
}
}

#[inline(always)]
pub fn insert_hash_key(&mut self, key: &Key, hash: u64, inserted: &mut bool) -> *mut Entity {
match self.insert_if_zero_key(key, hash, inserted) {
None => self.insert_non_zero_key(key, hash, inserted),
Some(zero_hash_table_entity) => zero_hash_table_entity,
}
}

#[inline(always)]
pub fn find_key(&self, key: &Key) -> Option<*mut Entity> {
if !key.is_zero() {
Expand All @@ -119,6 +135,7 @@ impl<Key: HashTableKeyable, Entity: HashTableEntity<Key>> HashTable<Key, Entity>
let grower = &self.grower;

let mut place_value = grower.place(hash_value);

while !self.entities.offset(place_value).is_zero()
&& !self
.entities
Expand Down
1 change: 1 addition & 0 deletions query/src/common/hashtable/hash_table_entity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ where
Key: HashTableKeyable,
Value: Sized + Copy,
{
#[inline(always)]
unsafe fn is_zero(self: *mut Self) -> bool {
(*self).key.is_zero()
}
Expand Down
69 changes: 60 additions & 9 deletions query/src/common/hashtable/hash_table_grower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,45 +12,96 @@
// See the License for the specific language governing permissions and
// limitations under the License.

pub trait HashTableGrower: Default + Clone {
fn max_size(&self) -> isize;
fn overflow(&self, size: usize) -> bool;
fn place(&self, hash_value: u64) -> isize;
fn next_place(&self, old_place: isize) -> isize;
fn increase_size(&mut self);
}

#[derive(Clone)]
pub struct Grower {
pub struct SingleLevelGrower {
size_degree: u8,
max_size: isize,
}

impl Default for Grower {
impl Default for SingleLevelGrower {
fn default() -> Self {
Grower {
SingleLevelGrower {
size_degree: 8,
max_size: 1_isize << 8,
}
}
}

impl Grower {
impl HashTableGrower for SingleLevelGrower {
#[inline(always)]
pub fn max_size(&self) -> isize {
fn max_size(&self) -> isize {
self.max_size
}

#[inline(always)]
pub fn overflow(&self, size: usize) -> bool {
fn overflow(&self, size: usize) -> bool {
size > ((1_usize) << (self.size_degree - 1))
}

#[inline(always)]
pub fn place(&self, hash_value: u64) -> isize {
fn place(&self, hash_value: u64) -> isize {
hash_value as isize & (self.max_size() - 1)
}

#[inline(always)]
pub fn next_place(&self, old_place: isize) -> isize {
fn next_place(&self, old_place: isize) -> isize {
(old_place + 1) & (self.max_size() - 1)
}

#[inline(always)]
pub fn increase_size(&mut self) {
fn increase_size(&mut self) {
self.size_degree += if self.size_degree >= 23 { 1 } else { 2 };
self.max_size = 1_isize << self.size_degree;
}
}

#[derive(Clone)]
pub struct TwoLevelGrower {
size_degree: u8,
max_size: isize,
}

impl Default for TwoLevelGrower {
fn default() -> Self {
TwoLevelGrower {
size_degree: 8,
max_size: 1_isize << 8,
}
}
}

impl HashTableGrower for TwoLevelGrower {
#[inline(always)]
fn max_size(&self) -> isize {
self.max_size
}

#[inline(always)]
fn overflow(&self, size: usize) -> bool {
size > ((1_usize) << (self.size_degree - 1))
}

#[inline(always)]
fn place(&self, hash_value: u64) -> isize {
hash_value as isize & (self.max_size() - 1)
}

#[inline(always)]
fn next_place(&self, old_place: isize) -> isize {
(old_place + 1) & (self.max_size() - 1)
}

#[inline(always)]
fn increase_size(&mut self) {
self.size_degree += if self.size_degree >= 15 { 1 } else { 2 };
self.max_size = 1_isize << self.size_degree;
}
}
63 changes: 63 additions & 0 deletions query/src/common/hashtable/hash_table_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,41 @@ use std::marker::PhantomData;

use crate::common::HashTableEntity;

pub enum HashTableIteratorKind<Key, Entity: HashTableEntity<Key>> {
HashMapIterator(HashTableIter<Key, Entity>),
TwoLevelHashMapIter(TwoLevelHashTableIter<Key, Entity>),
}

impl<Key, Entity: HashTableEntity<Key>> HashTableIteratorKind<Key, Entity> {
pub fn create_hash_table_iter(
capacity: isize,
entities: *mut Entity,
zero_entity: Option<*mut Entity>,
) -> Self {
Self::HashMapIterator(HashTableIter::<Key, Entity>::create(
capacity,
entities,
zero_entity,
))
}

pub fn create_two_level_hash_table_iter(
iters: Vec<HashTableIteratorKind<Key, Entity>>,
) -> Self {
Self::TwoLevelHashMapIter(TwoLevelHashTableIter::<Key, Entity>::create(iters))
}
}

impl<Key, Entity: HashTableEntity<Key>> Iterator for HashTableIteratorKind<Key, Entity> {
type Item = *mut Entity;
fn next(&mut self) -> Option<Self::Item> {
match self {
HashTableIteratorKind::HashMapIterator(it) => it.next(),
HashTableIteratorKind::TwoLevelHashMapIter(it) => it.next(),
}
}
}

pub struct HashTableIter<Key, Entity: HashTableEntity<Key>> {
idx: isize,
capacity: isize,
Expand Down Expand Up @@ -65,3 +100,31 @@ impl<Key, Entity: HashTableEntity<Key>> Iterator for HashTableIter<Key, Entity>
}
}
}

pub struct TwoLevelHashTableIter<Key, Entity: HashTableEntity<Key>> {
iters: Vec<HashTableIteratorKind<Key, Entity>>,
index: usize,
}

impl<Key, Entity: HashTableEntity<Key>> TwoLevelHashTableIter<Key, Entity> {
pub fn create(iters: Vec<HashTableIteratorKind<Key, Entity>>) -> Self {
Self { iters, index: 0 }
}
}

impl<Key, Entity: HashTableEntity<Key>> Iterator for TwoLevelHashTableIter<Key, Entity> {
type Item = *mut Entity;
fn next(&mut self) -> Option<Self::Item> {
match self.iters[self.index].next() {
Some(x) => Some(x),
None => {
if self.index < self.iters.len() - 1 {
self.index += 1;
self.next()
} else {
None
}
}
}
}
}
17 changes: 14 additions & 3 deletions query/src/common/hashtable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,27 @@
pub use hash_table::HashTable;
pub use hash_table_entity::HashTableEntity;
pub use hash_table_entity::KeyValueEntity;
pub use hash_table_grower::Grower;
pub use hash_table_grower::HashTableGrower;
pub use hash_table_grower::SingleLevelGrower;
pub use hash_table_grower::TwoLevelGrower;
pub use hash_table_iter::HashTableIter;
pub use hash_table_iter::HashTableIteratorKind;
pub use hash_table_iter::TwoLevelHashTableIter;
pub use hash_table_key::HashTableKeyable;
pub use two_level_hash_table::HashTableKind;
pub use two_level_hash_table::TwoLevelHashTable;

mod hash_table;
#[allow(clippy::missing_safety_doc, clippy::not_unsafe_ptr_arg_deref)]
mod hash_table_entity;
mod hash_table_grower;
mod hash_table_iter;
mod hash_table_key;
mod two_level_hash_table;

pub type HashMap<Key, Value> = HashTable<Key, KeyValueEntity<Key, Value>>;
pub type HashMapIterator<Key, Value> = HashTableIter<Key, KeyValueEntity<Key, Value>>;
pub type HashMap<Key, Value> = HashTable<Key, KeyValueEntity<Key, Value>, SingleLevelGrower>;
pub type TwoLevelHashMap<Key, Value> =
TwoLevelHashTable<Key, KeyValueEntity<Key, Value>, TwoLevelGrower>;
pub type HashMapIteratorKind<Key, Value> = HashTableIteratorKind<Key, KeyValueEntity<Key, Value>>;
pub type HashMapKind<Key, Value> =
HashTableKind<Key, KeyValueEntity<Key, Value>, SingleLevelGrower, TwoLevelGrower>;
Loading