diff --git a/fastfield_codecs/Cargo.toml b/fastfield_codecs/Cargo.toml index 3b0b584108..9be6d45518 100644 --- a/fastfield_codecs/Cargo.toml +++ b/fastfield_codecs/Cargo.toml @@ -11,6 +11,7 @@ description = "Fast field codecs used by tantivy" [dependencies] common = { version = "0.3", path = "../common/", package = "tantivy-common" } tantivy-bitpacker = { version="0.2", path = "../bitpacker/" } +ownedbytes = { version = "0.3.0", path = "../ownedbytes" } prettytable-rs = {version="0.9.0", optional= true} rand = {version="0.8.3", optional= true} diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs index 088c39efe9..a074169986 100644 --- a/fastfield_codecs/src/bitpacked.rs +++ b/fastfield_codecs/src/bitpacked.rs @@ -1,6 +1,7 @@ use std::io::{self, Write}; use common::BinarySerializable; +use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats}; @@ -9,6 +10,7 @@ use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, /// fast field is required. #[derive(Clone)] pub struct BitpackedFastFieldReader { + data: OwnedBytes, bit_unpacker: BitUnpacker, pub min_value_u64: u64, pub max_value_u64: u64, @@ -16,22 +18,24 @@ pub struct BitpackedFastFieldReader { impl FastFieldCodecReader for BitpackedFastFieldReader { /// Opens a fast field given a file. - fn open_from_bytes(bytes: &[u8]) -> io::Result { - let (_data, mut footer) = bytes.split_at(bytes.len() - 16); + fn open_from_bytes(bytes: OwnedBytes) -> io::Result { + let footer_offset = bytes.len() - 16; + let (data, mut footer) = bytes.split(footer_offset); let min_value = u64::deserialize(&mut footer)?; let amplitude = u64::deserialize(&mut footer)?; let max_value = min_value + amplitude; let num_bits = compute_num_bits(amplitude); let bit_unpacker = BitUnpacker::new(num_bits); Ok(BitpackedFastFieldReader { + data, min_value_u64: min_value, max_value_u64: max_value, bit_unpacker, }) } #[inline] - fn get_u64(&self, doc: u64, data: &[u8]) -> u64 { - self.min_value_u64 + self.bit_unpacker.get(doc, data) + fn get_u64(&self, doc: u64) -> u64 { + self.min_value_u64 + self.bit_unpacker.get(doc, &self.data) } #[inline] fn min_value(&self) -> u64 { diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index b75b76b306..be17a19718 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -5,16 +5,16 @@ extern crate more_asserts; use std::io; use std::io::Write; +use ownedbytes::OwnedBytes; + pub mod bitpacked; pub mod linearinterpol; pub mod multilinearinterpol; pub trait FastFieldCodecReader: Sized { /// reads the metadata and returns the CodecReader - fn open_from_bytes(bytes: &[u8]) -> std::io::Result; - - fn get_u64(&self, doc: u64, data: &[u8]) -> u64; - + fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result; + fn get_u64(&self, doc: u64) -> u64; fn min_value(&self) -> u64; fn max_value(&self) -> u64; } @@ -98,7 +98,7 @@ mod tests { return (f32::MAX, 0.0); } let estimation = S::estimate(&data, crate::tests::stats_from_vec(data)); - let mut out = vec![]; + let mut out: Vec = Vec::new(); S::serialize( &mut out, &data, @@ -108,9 +108,11 @@ mod tests { ) .unwrap(); - let reader = R::open_from_bytes(&out).unwrap(); + let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0); + + let reader = R::open_from_bytes(OwnedBytes::new(out)).unwrap(); for (doc, orig_val) in data.iter().enumerate() { - let val = reader.get_u64(doc as u64, &out); + let val = reader.get_u64(doc as u64); if val != *orig_val { panic!( "val {:?} does not match orig_val {:?}, in data set {}, data {:?}", @@ -118,7 +120,6 @@ mod tests { ); } } - let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0); (estimation, actual_compression) } pub fn get_codec_test_data_sets() -> Vec<(Vec, &'static str)> { diff --git a/fastfield_codecs/src/linearinterpol.rs b/fastfield_codecs/src/linearinterpol.rs index 49da299037..a8ea95672f 100644 --- a/fastfield_codecs/src/linearinterpol.rs +++ b/fastfield_codecs/src/linearinterpol.rs @@ -2,6 +2,7 @@ use std::io::{self, Read, Write}; use std::ops::Sub; use common::{BinarySerializable, FixedSize}; +use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats}; @@ -10,6 +11,7 @@ use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, /// fast field is required. #[derive(Clone)] pub struct LinearInterpolFastFieldReader { + data: OwnedBytes, bit_unpacker: BitUnpacker, pub footer: LinearInterpolFooter, pub slope: f32, @@ -57,23 +59,24 @@ impl FixedSize for LinearInterpolFooter { impl FastFieldCodecReader for LinearInterpolFastFieldReader { /// Opens a fast field given a file. - fn open_from_bytes(bytes: &[u8]) -> io::Result { - let (_data, mut footer) = bytes.split_at(bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES); + fn open_from_bytes(bytes: OwnedBytes) -> io::Result { + let footer_offset = bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES; + let (data, mut footer) = bytes.split(footer_offset); let footer = LinearInterpolFooter::deserialize(&mut footer)?; let slope = get_slope(footer.first_val, footer.last_val, footer.num_vals); - let num_bits = compute_num_bits(footer.relative_max_value); let bit_unpacker = BitUnpacker::new(num_bits); Ok(LinearInterpolFastFieldReader { + data, bit_unpacker, footer, slope, }) } #[inline] - fn get_u64(&self, doc: u64, data: &[u8]) -> u64 { + fn get_u64(&self, doc: u64) -> u64 { let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope); - (calculated_value + self.bit_unpacker.get(doc, data)) - self.footer.offset + (calculated_value + self.bit_unpacker.get(doc, &self.data)) - self.footer.offset } #[inline] diff --git a/fastfield_codecs/src/multilinearinterpol.rs b/fastfield_codecs/src/multilinearinterpol.rs index c60d5b0361..26b7c9e882 100644 --- a/fastfield_codecs/src/multilinearinterpol.rs +++ b/fastfield_codecs/src/multilinearinterpol.rs @@ -14,6 +14,7 @@ use std::io::{self, Read, Write}; use std::ops::Sub; use common::{BinarySerializable, CountingWriter, DeserializeFrom}; +use ownedbytes::OwnedBytes; use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker}; use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats}; @@ -24,6 +25,7 @@ const CHUNK_SIZE: u64 = 512; /// fast field is required. #[derive(Clone)] pub struct MultiLinearInterpolFastFieldReader { + data: OwnedBytes, pub footer: MultiLinearInterpolFooter, } @@ -145,24 +147,23 @@ fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Functio impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader { /// Opens a fast field given a file. - fn open_from_bytes(bytes: &[u8]) -> io::Result { + fn open_from_bytes(bytes: OwnedBytes) -> io::Result { let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?; - - let (_data, mut footer) = bytes.split_at(bytes.len() - (4 + footer_len) as usize); + let footer_offset = bytes.len() - 4 - footer_len as usize; + let (data, mut footer) = bytes.split(footer_offset); let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?; - - Ok(MultiLinearInterpolFastFieldReader { footer }) + Ok(MultiLinearInterpolFastFieldReader { data, footer }) } #[inline] - fn get_u64(&self, doc: u64, data: &[u8]) -> u64 { + fn get_u64(&self, doc: u64) -> u64 { let interpolation = get_interpolation_function(doc, &self.footer.interpolations); let doc = doc - interpolation.start_pos; let calculated_value = get_calculated_value(interpolation.value_start_pos, doc, interpolation.slope); let diff = interpolation .bit_unpacker - .get(doc, &data[interpolation.data_start_offset as usize..]); + .get(doc, &self.data[interpolation.data_start_offset as usize..]); (calculated_value + diff) - interpolation.positive_val_offset } diff --git a/src/fastfield/gcd.rs b/src/fastfield/gcd.rs index ac4fb9ec47..72c5794af0 100644 --- a/src/fastfield/gcd.rs +++ b/src/fastfield/gcd.rs @@ -4,6 +4,7 @@ use common::BinarySerializable; use fastdivide::DividerU64; use fastfield_codecs::FastFieldCodecReader; use gcd::Gcd; +use ownedbytes::OwnedBytes; pub const GCD_DEFAULT: u64 = 1; pub const GCD_CODEC_ID: u8 = 4; @@ -19,12 +20,12 @@ pub struct GCDFastFieldCodec { } impl FastFieldCodecReader for GCDFastFieldCodec { /// Opens a fast field given the bytes. - fn open_from_bytes(bytes: &[u8]) -> std::io::Result { - let (header, mut footer) = bytes.split_at(bytes.len() - 16); + fn open_from_bytes(bytes: OwnedBytes) -> std::io::Result { + let footer_offset = bytes.len() - 16; + let (body, mut footer) = bytes.split(footer_offset); let gcd = u64::deserialize(&mut footer)?; let min_value = u64::deserialize(&mut footer)?; - let reader = C::open_from_bytes(header)?; - + let reader = C::open_from_bytes(body)?; Ok(GCDFastFieldCodec { gcd, min_value, @@ -33,8 +34,8 @@ impl FastFieldCodecReader for GCDFastFieldCodec } #[inline] - fn get_u64(&self, doc: u64, data: &[u8]) -> u64 { - let mut data = self.reader.get_u64(doc, data); + fn get_u64(&self, doc: u64) -> u64 { + let mut data = self.reader.get_u64(doc); data *= self.gcd; data += self.min_value; data diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index 6129ad59d1..8722e9715b 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -216,7 +216,6 @@ impl FastFieldReader for DynamicFastFieldReader { #[derive(Clone)] pub struct FastFieldReaderCodecWrapper { reader: CodecReader, - bytes: OwnedBytes, _phantom: PhantomData, } @@ -235,16 +234,16 @@ impl FastFieldReaderCodecWrapper crate::Result { - let reader = C::open_from_bytes(bytes.as_slice())?; + let reader = C::open_from_bytes(bytes)?; Ok(FastFieldReaderCodecWrapper { reader, - bytes, _phantom: PhantomData, }) } + #[inline] pub(crate) fn get_u64(&self, doc: u64) -> Item { - let data = self.reader.get_u64(doc, self.bytes.as_slice()); + let data = self.reader.get_u64(doc); Item::from_u64(data) }