Skip to content

Commit

Permalink
improve spark hash and hash join performance
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangli20 committed Sep 6, 2024
1 parent bff25b7 commit 6c52d1b
Show file tree
Hide file tree
Showing 33 changed files with 1,309 additions and 855 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions native-engine/datafusion-ext-commons/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,30 @@ use crate::cast::cast;
mod batch_serde;
mod scalar_serde;

pub fn write_raw_slice<T: Sized + Copy>(
values: &[T],
mut output: impl Write,
) -> std::io::Result<()> {
let raw_item_size = size_of::<T>();
let raw_slice = unsafe {
// safety: transmute copyable slice to bytes slice
std::slice::from_raw_parts(values.as_ptr() as *const u8, raw_item_size * values.len())
};
output.write_all(raw_slice)
}

pub fn read_raw_slice<T: Sized + Copy>(
values: &mut [T],
mut input: impl Read,
) -> std::io::Result<()> {
let raw_item_size = size_of::<T>();
let raw_slice = unsafe {
// safety: transmute copyable slice to bytes slice
std::slice::from_raw_parts_mut(values.as_mut_ptr() as *mut u8, raw_item_size * values.len())
};
input.read_exact(raw_slice)
}

pub fn write_one_batch(num_rows: usize, cols: &[ArrayRef], mut output: impl Write) -> Result<()> {
assert!(cols.iter().all(|col| col.len() == num_rows));

Expand Down
Loading

0 comments on commit 6c52d1b

Please sign in to comment.