From 56c9497ee28265ee08b5a4db1fea8a2870680b74 Mon Sep 17 00:00:00 2001 From: Val Lorentz Date: Tue, 6 Aug 2024 18:33:46 +0200 Subject: [PATCH] Relax check on `patch_bits` overflows in delta decoding (#118) For some reason, some files written with pyorc have a `patch_bit_width` larger than needing, causing the previous check to fail, even when decoding to `u64`. This changes the check to only fail when the patch bits actually overflow, instead of checking whether they may overflow. Closes https://github.com/datafusion-contrib/datafusion-orc/issues/97 --- src/reader/decode/rle_v2/patched_base.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/reader/decode/rle_v2/patched_base.rs b/src/reader/decode/rle_v2/patched_base.rs index c33815b2..a524fc91 100644 --- a/src/reader/decode/rle_v2/patched_base.rs +++ b/src/reader/decode/rle_v2/patched_base.rs @@ -30,6 +30,12 @@ impl RleReaderV2 { pub fn read_patched_base(&mut self, header: u8) -> Result<()> { let encoded_bit_width = (header >> 1) & 0x1F; let value_bit_width = rle_v2_decode_bit_width(encoded_bit_width); + let value_bit_width_u32 = u32::try_from(value_bit_width).or_else(|_| { + OutOfSpecSnafu { + msg: "value_bit_width overflows u32", + } + .fail() + })?; let second_byte = read_u8(&mut self.reader)?; let length = extract_run_length_from_header(header, second_byte); @@ -52,12 +58,6 @@ impl RleReaderV2 { } .fail(); } - if (patch_bit_width + value_bit_width) > (N::BYTE_SIZE * 8) { - return OutOfSpecSnafu { - msg: "combined patch width and value width cannot exceed the size of the integer type being decoded", - } - .fail(); - } let patch_list_length = (fourth_byte & 0x1f) as usize; @@ -105,7 +105,12 @@ impl RleReaderV2 { for (idx, value) in self.decoded_ints.iter_mut().enumerate() { if idx == actual_gap as usize { - let patch_bits = current_patch << value_bit_width; + let patch_bits = + current_patch + .checked_shl(value_bit_width_u32) + .context(OutOfSpecSnafu { + msg: "Overflow while shifting patch bits by value_bit_width", + })?; // Safe conversion without loss as we check the bit width prior let patch_bits = N::from_u64(patch_bits); let patched_value = *value | patch_bits;