Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make hash table memory layout SIMD-independent #16

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ jobs:
run: cargo test --verbose --target ${{ matrix.arch }}-${{ fromJSON(env.target_map)[matrix.os] }} --features=no_simd
- name: Build docs
run: cargo doc --verbose
- name: Run SIMD/no-SIMD tests
run: |
# See crosstest/main.rs for more info on the test
cd crosstest
# Create a no-simd test file in /tmp
cargo run --verbose --features no_simd -- write
cargo clean
# Create a simd-enabled test file and test the no-simd file
cargo run --verbose -- write read
cargo clean
# Test the simd-enabled files we generated in the last step
cargo run --verbose --features no_simd -- read

benchmarks:
strategy:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/target
/crosstest/target
Cargo.lock
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "odht"
version = "0.2.1"
version = "0.3.0"
edition = "2018"
license = "MIT OR Apache-2.0"
exclude = ["/.github/*"]
Expand Down
12 changes: 12 additions & 0 deletions crosstest/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "crosstest"
version = "0.1.0"
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
odht = { package = "odht", path = ".." }

[features]
no_simd = ["odht/no_simd"]
97 changes: 97 additions & 0 deletions crosstest/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// This test makes sure that a hash table generated with SIMD support
// can be loaded on a platform without SIMD support and vice versa.
//
// It works this way:
//
// The executable has two options: "write" and "read". When "write" is
// enabled, it will write a hashtable to /tmp. When "read" is enabled,
// it will try to read a hashtable from /tmp and verify that it contains
// the expected data.
//
// We compile the executable two times, once with and once without SIMD
// support. With both versions we generate a hashtable in /tmp and then
// try to load and verify that hashtable with the respective other
// version of the executable, that is, the SIMD-version will try to load
// the non-SIMD version and vice versa.
//
// See .github/workflows/ci.yml for how this is used.

struct FxConfig;

impl odht::Config for FxConfig {
type Key = u64;
type Value = u32;

type EncodedKey = [u8; 8];
type EncodedValue = [u8; 4];

type H = odht::FxHashFn;

#[inline]
fn encode_key(k: &Self::Key) -> Self::EncodedKey {
k.to_le_bytes()
}

#[inline]
fn encode_value(v: &Self::Value) -> Self::EncodedValue {
v.to_le_bytes()
}

#[inline]
fn decode_key(k: &Self::EncodedKey) -> Self::Key {
u64::from_le_bytes(*k)
}

#[inline]
fn decode_value(v: &Self::EncodedValue) -> Self::Value {
u32::from_le_bytes(*v)
}
}

const FILE_NAME_NO_SIMD: &str = "odht_hash_table_no_simd";
const FILE_NAME_WITH_SIMD: &str = "odht_hash_table_with_simd";

#[cfg(feature = "no_simd")]
const WRITE_FILE_NAME: &str = FILE_NAME_NO_SIMD;
#[cfg(not(feature = "no_simd"))]
const WRITE_FILE_NAME: &str = FILE_NAME_WITH_SIMD;

#[cfg(feature = "no_simd")]
const READ_FILE_NAME: &'static str = FILE_NAME_WITH_SIMD;

#[cfg(not(feature = "no_simd"))]
const READ_FILE_NAME: &'static str = FILE_NAME_NO_SIMD;

fn main() -> Result<(), Box<dyn std::error::Error>> {

let make_entries = || (0 .. 70_000_u64).map(|x| (x * x, x as u32)).collect::<Vec<_>>();

if std::env::args_os().find(|arg| arg == "write").is_some() {
let hash_table = odht::HashTableOwned::<FxConfig>::from_iterator(make_entries(), 85);
let mut path = std::env::temp_dir();
path.push(WRITE_FILE_NAME);
std::fs::write(&path, hash_table.raw_bytes())?;
eprintln!("Wrote hash table with {} bytes to {}", hash_table.raw_bytes().len(), path.display());
}

if std::env::args_os().find(|arg| arg == "read").is_some() {
let mut path = std::env::temp_dir();
path.push(READ_FILE_NAME);
eprintln!("Trying to load hash table from {}", path.display());
let data = std::fs::read(&path)?;
let hash_table = odht::HashTable::<FxConfig, _>::from_raw_bytes(data)?;
eprintln!("Loaded hash table with {} bytes from {}", hash_table.raw_bytes().len(), path.display());
let expected_entries = make_entries();

eprintln!("Comparing hash table to expected values.");
// Check that we can read the data
assert_eq!(hash_table.len(), expected_entries.len());
for (key, value) in expected_entries {
assert_eq!(hash_table.get(&key), Some(value));
}

eprintln!("Success");
}

Ok(())
}
9 changes: 6 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ mod unhash;

use error::Error;
use std::borrow::{Borrow, BorrowMut};
use swisstable_group_query::REFERENCE_GROUP_SIZE;

pub use crate::fxhash::FxHashFn;
pub use crate::unhash::UnHashFn;

use crate::raw_table::{ByteArray, RawIter, RawTable, RawTableMut};
use crate::swisstable_group_query::GROUP_SIZE;

/// This trait provides a complete "configuration" for a hash table, i.e. it
/// defines the key and value types, how these are encoded and what hash
Expand Down Expand Up @@ -527,7 +527,7 @@ fn slots_needed(item_count: usize, max_load_factor: Factor) -> usize {
let slots_needed = max_load_factor.apply_inverse(item_count);
std::cmp::max(
slots_needed.checked_next_power_of_two().unwrap(),
GROUP_SIZE,
REFERENCE_GROUP_SIZE,
)
}

Expand Down Expand Up @@ -708,7 +708,10 @@ mod tests {

#[test]
fn load_factor_and_item_count() {
assert_eq!(slots_needed(0, Factor::from_percent(100)), GROUP_SIZE);
assert_eq!(
slots_needed(0, Factor::from_percent(100)),
REFERENCE_GROUP_SIZE
);
assert_eq!(slots_needed(6, Factor::from_percent(60)), 16);
assert_eq!(slots_needed(5, Factor::from_percent(50)), 16);
assert_eq!(slots_needed(5, Factor::from_percent(49)), 16);
Expand Down
20 changes: 12 additions & 8 deletions src/memory_layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@ use std::{
mem::{align_of, size_of},
};

use crate::Config;
use crate::{
error::Error,
raw_table::{Entry, EntryMetadata, RawTable},
swisstable_group_query::GROUP_SIZE,
Factor,
};
use crate::{swisstable_group_query::REFERENCE_GROUP_SIZE, Config};

const CURRENT_FILE_FORMAT_VERSION: [u8; 4] = [0, 0, 0, 1];
const CURRENT_FILE_FORMAT_VERSION: [u8; 4] = [0, 0, 0, 2];

#[repr(C)]
#[derive(Clone)]
Expand Down Expand Up @@ -63,8 +62,11 @@ impl Header {

if raw_bytes.len() != bytes_needed::<C>(self.slot_count()) {
return Err(Error(format!(
"Provided allocation has wrong size for slot count {}",
self.slot_count()
"Provided allocation has wrong size for slot count {}. \
The allocation's size is {} but the expected size is {}.",
self.slot_count(),
raw_bytes.len(),
bytes_needed::<C>(self.slot_count()),
)));
}

Expand Down Expand Up @@ -223,7 +225,7 @@ where
let entry_metadata = unsafe {
std::slice::from_raw_parts(
raw_bytes.as_ptr().offset(metadata_offset) as *const EntryMetadata,
slot_count + GROUP_SIZE,
slot_count + REFERENCE_GROUP_SIZE,
)
};

Expand Down Expand Up @@ -291,7 +293,7 @@ where
let entry_metadata = unsafe {
std::slice::from_raw_parts_mut(
raw_bytes.as_mut_ptr().offset(metadata_offset) as *mut EntryMetadata,
slot_count + GROUP_SIZE,
slot_count + REFERENCE_GROUP_SIZE,
)
};

Expand Down Expand Up @@ -327,7 +329,9 @@ pub(crate) fn bytes_needed<C: Config>(slot_count: usize) -> usize {
let size_of_entry = size_of::<Entry<C::EncodedKey, C::EncodedValue>>();
let size_of_metadata = size_of::<EntryMetadata>();

HEADER_SIZE + slot_count * size_of_entry + (slot_count + GROUP_SIZE) * size_of_metadata
HEADER_SIZE
+ slot_count * size_of_entry
+ (slot_count + REFERENCE_GROUP_SIZE) * size_of_metadata
}

pub(crate) fn allocate<C: Config>(
Expand Down
16 changes: 8 additions & 8 deletions src/raw_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ where
assert!(std::mem::align_of::<Entry<K, V>>() == 1);

debug_assert!(data.len().is_power_of_two());
debug_assert!(metadata.len() == data.len() + GROUP_SIZE);
debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);

Self {
metadata,
Expand All @@ -198,7 +198,7 @@ where
#[inline]
pub(crate) fn find(&self, key: &K) -> Option<&V> {
debug_assert!(self.data.len().is_power_of_two());
debug_assert!(self.metadata.len() == self.data.len() + GROUP_SIZE);
debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);

let mask = self.data.len() - 1;
let hash = H::hash(key.as_slice());
Expand Down Expand Up @@ -308,7 +308,7 @@ where
assert!(std::mem::align_of::<Entry<K, V>>() == 1);

debug_assert!(data.len().is_power_of_two());
debug_assert_eq!(metadata.len(), data.len() + GROUP_SIZE);
debug_assert_eq!(metadata.len(), data.len() + REFERENCE_GROUP_SIZE);

Self {
metadata,
Expand All @@ -324,7 +324,7 @@ where
#[inline]
pub(crate) fn insert(&mut self, key: K, value: V) -> Option<V> {
debug_assert!(self.data.len().is_power_of_two());
debug_assert!(self.metadata.len() == self.data.len() + GROUP_SIZE);
debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);

let mask = self.data.len() - 1;
let hash = H::hash(key.as_slice());
Expand Down Expand Up @@ -352,11 +352,11 @@ where
*entry_at_mut(self.data, index) = Entry::new(key, value);
*metadata_at_mut(self.metadata, index) = h2;

if index < GROUP_SIZE {
if index < REFERENCE_GROUP_SIZE {
let first_mirror = self.data.len();
*metadata_at_mut(self.metadata, first_mirror + index) = h2;
debug_assert_eq!(
self.metadata[..GROUP_SIZE],
self.metadata[..REFERENCE_GROUP_SIZE],
self.metadata[self.data.len()..]
);
}
Expand Down Expand Up @@ -408,7 +408,7 @@ where
{
pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> RawIter<'a, K, V> {
debug_assert!(data.len().is_power_of_two());
debug_assert!(metadata.len() == data.len() + GROUP_SIZE);
debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);

RawIter {
metadata,
Expand Down Expand Up @@ -529,7 +529,7 @@ mod tests {
) -> (Vec<EntryMetadata>, Vec<Entry<K, V>>) {
let size = xs.size_hint().0.next_power_of_two();
let mut data = vec![Entry::default(); size];
let mut metadata = vec![255; size + GROUP_SIZE];
let mut metadata = vec![255; size + REFERENCE_GROUP_SIZE];

assert!(metadata.iter().all(|b| is_empty_or_deleted(*b)));

Expand Down