Skip to content

Commit

Permalink
refactor: Make read_num_bytes a function instead of a macro (#2364)
Browse files Browse the repository at this point in the history
* refactor: Make read_num_bytes a function instead of a macro

Reduces duplication of this code (now it is just once per type)

* chore: cargo clippy
  • Loading branch information
Markus Westerlind authored Aug 9, 2022
1 parent 80a6ef7 commit 56f7904
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 39 deletions.
4 changes: 2 additions & 2 deletions parquet/src/column/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::column::reader::decoder::{
use crate::data_type::*;
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::bit_util::{ceil, num_required_bits};
use crate::util::bit_util::{ceil, num_required_bits, read_num_bytes};
use crate::util::memory::ByteBufferPtr;

pub(crate) mod decoder;
Expand Down Expand Up @@ -520,7 +520,7 @@ fn parse_v1_level(
match encoding {
Encoding::RLE => {
let i32_size = std::mem::size_of::<i32>();
let data_size = read_num_bytes!(i32, i32_size, buf.as_ref()) as usize;
let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
Ok((i32_size + data_size, buf.range(i32_size, data_size)))
}
Encoding::BIT_PACKED => {
Expand Down
25 changes: 5 additions & 20 deletions parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ impl AsBytes for str {

pub(crate) mod private {
use crate::encodings::decoding::PlainDecoderDetails;
use crate::util::bit_util::{BitReader, BitWriter};
use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter};
use crate::util::memory::ByteBufferPtr;

use crate::basic::Type;
Expand Down Expand Up @@ -892,21 +892,6 @@ pub(crate) mod private {
}
}

// TODO - Why does macro importing fail?
/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
/// compile.
/// This is copied and modified from byteorder crate.
macro_rules! read_num_bytes {
($ty:ty, $size:expr, $src:expr) => {{
assert!($size <= $src.len());
let mut buffer =
<$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
<$ty>::from_ne_bytes(buffer)
}};
}

impl ParquetValueType for super::ByteArray {
const PHYSICAL_TYPE: Type = Type::BYTE_ARRAY;

Expand Down Expand Up @@ -946,17 +931,17 @@ pub(crate) mod private {
.as_mut()
.expect("set_data should have been called");
let num_values = std::cmp::min(buffer.len(), decoder.num_values);
for i in 0..num_values {
for val_array in buffer.iter_mut().take(num_values) {
let len: usize =
read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
as usize;
decoder.start += std::mem::size_of::<u32>();

if data.len() < decoder.start + len {
return Err(eof_err!("Not enough bytes to decode"));
}

let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();
let val: &mut Self = val_array.as_mut_any().downcast_mut().unwrap();

val.set_data(data.range(decoder.start, len));
decoder.start += len;
Expand All @@ -975,7 +960,7 @@ pub(crate) mod private {

for _ in 0..num_values {
let len: usize =
read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
as usize;
decoder.start += std::mem::size_of::<u32>() + len;
}
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ impl<T: DataType> Decoder<T> for RleValueDecoder<T> {

// We still need to remove prefix of i32 from the stream.
const I32_SIZE: usize = mem::size_of::<i32>();
let data_size = read_num_bytes!(i32, I32_SIZE, data.as_ref()) as usize;
let data_size = bit_util::read_num_bytes::<i32>(I32_SIZE, data.as_ref()) as usize;
self.decoder = RleDecoder::new(1);
self.decoder.set_data(data.range(I32_SIZE, data_size));
self.values_left = num_values;
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/encodings/levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::basic::Encoding;
use crate::data_type::AsBytes;
use crate::errors::Result;
use crate::util::{
bit_util::{ceil, num_required_bits, BitReader, BitWriter},
bit_util::{ceil, num_required_bits, read_num_bytes, BitReader, BitWriter},
memory::ByteBufferPtr,
};

Expand Down Expand Up @@ -192,7 +192,7 @@ impl LevelDecoder {
LevelDecoder::Rle(ref mut num_values, ref mut decoder) => {
*num_values = Some(num_buffered_values);
let i32_size = mem::size_of::<i32>();
let data_size = read_num_bytes!(i32, i32_size, data.as_ref()) as usize;
let data_size = read_num_bytes::<i32>(i32_size, data.as_ref()) as usize;
decoder.set_data(data.range(i32_size, data_size));
i32_size + data_size
}
Expand Down
28 changes: 14 additions & 14 deletions parquet/src/util/bit_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,17 +88,17 @@ impl FromBytes for bool {

from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64, f32, f64 }

/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
/// compile.
/// Reads `size` of bytes from `src`, and reinterprets them as type `ty`, in
/// little-endian order.
/// This is copied and modified from byteorder crate.
macro_rules! read_num_bytes {
($ty:ty, $size:expr, $src:expr) => {{
assert!($size <= $src.len());
let mut buffer = <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
<$ty>::from_ne_bytes(buffer)
}};
pub(crate) fn read_num_bytes<T>(size: usize, src: &[u8]) -> T
where
T: FromBytes,
{
assert!(size <= src.len());
let mut buffer = <T as FromBytes>::Buffer::default();
buffer.as_mut()[..size].copy_from_slice(&src[..size]);
<T>::from_ne_bytes(buffer)
}

/// Returns the ceil of value/divisor.
Expand Down Expand Up @@ -341,7 +341,7 @@ impl BitReader {
pub fn new(buffer: ByteBufferPtr) -> Self {
let total_bytes = buffer.len();
let num_bytes = cmp::min(8, total_bytes);
let buffered_values = read_num_bytes!(u64, num_bytes, buffer.as_ref());
let buffered_values = read_num_bytes::<u64>(num_bytes, buffer.as_ref());
BitReader {
buffer,
buffered_values,
Expand All @@ -355,7 +355,7 @@ impl BitReader {
self.buffer = buffer;
self.total_bytes = self.buffer.len();
let num_bytes = cmp::min(8, self.total_bytes);
self.buffered_values = read_num_bytes!(u64, num_bytes, self.buffer.as_ref());
self.buffered_values = read_num_bytes::<u64>(num_bytes, self.buffer.as_ref());
self.byte_offset = 0;
self.bit_offset = 0;
}
Expand Down Expand Up @@ -624,7 +624,7 @@ impl BitReader {

// Advance byte_offset to next unread byte and read num_bytes
self.byte_offset += bytes_read;
let v = read_num_bytes!(T, num_bytes, self.buffer.data()[self.byte_offset..]);
let v = read_num_bytes::<T>(num_bytes, &self.buffer.data()[self.byte_offset..]);
self.byte_offset += num_bytes;

// Reset buffered_values
Expand Down Expand Up @@ -675,7 +675,7 @@ impl BitReader {
fn reload_buffer_values(&mut self) {
let bytes_to_read = cmp::min(self.total_bytes - self.byte_offset, 8);
self.buffered_values =
read_num_bytes!(u64, bytes_to_read, self.buffer.data()[self.byte_offset..]);
read_num_bytes::<u64>(bytes_to_read, &self.buffer.data()[self.byte_offset..]);
}
}

Expand Down

0 comments on commit 56f7904

Please sign in to comment.