Skip to content

Commit

Permalink
fix(storage): use u8 for NippiJar's DataReader::offset_size (#8360)
Browse files Browse the repository at this point in the history
  • Loading branch information
fgimenez authored May 23, 2024
1 parent 8158c28 commit c5bc960
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 16 deletions.
2 changes: 1 addition & 1 deletion crates/storage/nippy-jar/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pub enum NippyJarError {
#[error("the size of an offset must be at most 8 bytes, got {offset_size}")]
OffsetSizeTooBig {
/// The read offset size in number of bytes.
offset_size: u64,
offset_size: u8,
},
#[error("attempted to read an out of bounds offset: {index}")]
OffsetOutOfBounds {
Expand Down
9 changes: 5 additions & 4 deletions crates/storage/nippy-jar/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ pub struct DataReader {
/// Mmap handle for offsets.
offset_mmap: Mmap,
/// Number of bytes that represent one offset.
offset_size: u64,
offset_size: u8,
}

impl DataReader {
Expand All @@ -491,7 +491,7 @@ impl DataReader {
let offset_mmap = unsafe { Mmap::map(&offset_file)? };

// First byte is the size of one offset in bytes
let offset_size = offset_mmap[0] as u64;
let offset_size = offset_mmap[0];

// Ensure that the size of an offset is at most 8 bytes.
if offset_size > 8 {
Expand Down Expand Up @@ -525,7 +525,8 @@ impl DataReader {
/// Returns total number of offsets in the file.
/// The size of one offset is determined by the file itself.
pub fn offsets_count(&self) -> Result<usize, NippyJarError> {
Ok((self.offset_file.metadata()?.len().saturating_sub(1) / self.offset_size) as usize)
Ok((self.offset_file.metadata()?.len().saturating_sub(1) / self.offset_size as u64)
as usize)
}

/// Reads one offset-sized (determined by the offset file) u64 at the provided index.
Expand All @@ -542,7 +543,7 @@ impl DataReader {
}

/// Returns number of bytes that represent one offset.
pub fn offset_size(&self) -> u64 {
pub fn offset_size(&self) -> u8 {
self.offset_size
}

Expand Down
22 changes: 11 additions & 11 deletions crates/storage/nippy-jar/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::{
};

/// Size of one offset in bytes.
const OFFSET_SIZE_BYTES: u64 = 8;
const OFFSET_SIZE_BYTES: u8 = 8;

/// Writer of [`NippyJar`]. Handles table data and offsets only.
///
Expand Down Expand Up @@ -112,7 +112,7 @@ impl<H: NippyJarHeader> NippyJarWriter<H> {
let mut offsets_file = OpenOptions::new().read(true).write(true).open(offsets)?;

// First byte of the offset file is the size of one offset in bytes
offsets_file.write_all(&[OFFSET_SIZE_BYTES as u8])?;
offsets_file.write_all(&[OFFSET_SIZE_BYTES])?;
offsets_file.seek(SeekFrom::End(0))?;

Ok((data_file, offsets_file, is_created))
Expand All @@ -133,9 +133,9 @@ impl<H: NippyJarHeader> NippyJarWriter<H> {
return Err(NippyJarError::FrozenJar)
}

let expected_offsets_file_size = 1 + // first byte is the size of one offset
OFFSET_SIZE_BYTES * self.jar.rows as u64 * self.jar.columns as u64 + // `offset size * num rows * num columns`
OFFSET_SIZE_BYTES; // expected size of the data file
let expected_offsets_file_size: u64 = (1 + // first byte is the size of one offset
OFFSET_SIZE_BYTES as usize* self.jar.rows * self.jar.columns + // `offset size * num rows * num columns`
OFFSET_SIZE_BYTES as usize) as u64; // expected size of the data file
let actual_offsets_file_size = self.offsets_file.get_ref().metadata()?.len();

// Offsets configuration wasn't properly committed
Expand All @@ -151,9 +151,9 @@ impl<H: NippyJarHeader> NippyJarWriter<H> {
// `num rows = (file size - 1 - size of one offset) / num columns`
self.jar.rows = ((actual_offsets_file_size.
saturating_sub(1). // first byte is the size of one offset
saturating_sub(OFFSET_SIZE_BYTES) / // expected size of the data file
saturating_sub(OFFSET_SIZE_BYTES as u64) / // expected size of the data file
(self.jar.columns as u64)) /
OFFSET_SIZE_BYTES) as usize;
OFFSET_SIZE_BYTES as u64) as usize;

// Freeze row count changed
self.jar.freeze_config()?;
Expand Down Expand Up @@ -183,7 +183,7 @@ impl<H: NippyJarHeader> NippyJarWriter<H> {
.get_ref()
.metadata()?
.len()
.saturating_sub(OFFSET_SIZE_BYTES * (index as u64 + 1));
.saturating_sub(OFFSET_SIZE_BYTES as u64 * (index as u64 + 1));
self.offsets_file.get_mut().set_len(new_len)?;

drop(reader);
Expand Down Expand Up @@ -318,7 +318,7 @@ impl<H: NippyJarHeader> NippyJarWriter<H> {
// Handle non-empty offset file
if length > 1 {
// first byte is reserved for `bytes_per_offset`, which is 8 initially.
let num_offsets = (length - 1) / OFFSET_SIZE_BYTES;
let num_offsets = (length - 1) / OFFSET_SIZE_BYTES as u64;

if remaining_to_prune as u64 > num_offsets {
return Err(NippyJarError::InvalidPruning(
Expand All @@ -336,10 +336,10 @@ impl<H: NippyJarHeader> NippyJarWriter<H> {
self.data_file.get_mut().set_len(0)?;
} else {
// Calculate the new length for the on-disk offset list
let new_len = 1 + new_num_offsets * OFFSET_SIZE_BYTES;
let new_len = 1 + new_num_offsets * OFFSET_SIZE_BYTES as u64;
// Seek to the position of the last offset
self.offsets_file
.seek(SeekFrom::Start(new_len.saturating_sub(OFFSET_SIZE_BYTES)))?;
.seek(SeekFrom::Start(new_len.saturating_sub(OFFSET_SIZE_BYTES as u64)))?;
// Read the last offset value
let mut last_offset = [0u8; OFFSET_SIZE_BYTES as usize];
self.offsets_file.get_ref().read_exact(&mut last_offset)?;
Expand Down

0 comments on commit c5bc960

Please sign in to comment.