From 0c88c3813e95b838ffdec691ce3cec6c011506e1 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Thu, 13 Jul 2023 11:58:30 +0200 Subject: [PATCH] Remove support for Brotli and Snappy compression LZ4 provides fast and simple compression whereas Zstd is exceptionally flexible so that the additional support for Brotli and Snappy does not really add any distinct functionality on top of those two algorithms. Removing them reduces our maintenance burden and reduces the number of choices users have to make when setting up their project based on Tantivy. --- .github/workflows/test.yml | 2 +- Cargo.toml | 4 ---- README.md | 2 +- src/core/index_meta.rs | 9 ++----- src/store/compression_brotli.rs | 19 --------------- src/store/compression_snap.rs | 17 ------------- src/store/compressors.rs | 42 --------------------------------- src/store/decompressors.rs | 26 -------------------- src/store/mod.rs | 26 ++++---------------- 9 files changed, 9 insertions(+), 138 deletions(-) delete mode 100644 src/store/compression_brotli.rs delete mode 100644 src/store/compression_snap.rs diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e7ce3f39a6..c2755f9058 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,7 +53,7 @@ jobs: strategy: matrix: features: [ - { label: "all", flags: "mmap,stopwords,brotli-compression,lz4-compression,snappy-compression,zstd-compression,failpoints" }, + { label: "all", flags: "mmap,stopwords,lz4-compression,zstd-compression,failpoints" }, { label: "quickwit", flags: "mmap,quickwit,failpoints" } ] diff --git a/Cargo.toml b/Cargo.toml index 3bcc9a94d4..fc759c3384 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,9 +25,7 @@ aho-corasick = "1.0" tantivy-fst = "0.4.0" memmap2 = { version = "0.7.1", optional = true } lz4_flex = { version = "0.11", default-features = false, optional = true } -brotli = { version = "3.3.4", optional = true } zstd = { version = "0.12", optional = true, default-features = false } -snap = { version = "1.0.5", optional = true } tempfile = { version = "3.3.0", optional = true } log = "0.4.16" serde = { version = "1.0.136", features = ["derive"] } @@ -107,9 +105,7 @@ default = ["mmap", "stopwords", "lz4-compression"] mmap = ["fs4", "tempfile", "memmap2"] stopwords = [] -brotli-compression = ["brotli"] lz4-compression = ["lz4_flex"] -snappy-compression = ["snap"] zstd-compression = ["zstd"] failpoints = ["fail", "fail/failpoints"] diff --git a/README.md b/README.md index e43c4e5d9c..2cd8e8d76e 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Details about the benchmark can be found at this [repository](https://github.com - Single valued and multivalued u64, i64, and f64 fast fields (equivalent of doc values in Lucene) - `&[u8]` fast fields - Text, i64, u64, f64, dates, ip, bool, and hierarchical facet fields -- Compressed document store (LZ4, Zstd, None, Brotli, Snap) +- Compressed document store (LZ4, Zstd, None) - Range queries - Faceted search - Configurable indexing (optional term frequency and position indexing) diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs index fb99101541..0ed61e2a6e 100644 --- a/src/core/index_meta.rs +++ b/src/core/index_meta.rs @@ -485,19 +485,14 @@ mod tests { } #[test] - #[cfg(all( - feature = "lz4-compression", - feature = "brotli-compression", - feature = "snappy-compression", - feature = "zstd-compression" - ))] + #[cfg(all(feature = "lz4-compression", feature = "zstd-compression"))] fn test_serialize_metas_invalid_comp() { let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zsstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#; let err = serde_json::from_str::(json).unwrap_err(); assert_eq!( err.to_string(), - "unknown variant `zsstd`, expected one of `none`, `lz4`, `brotli`, `snappy`, `zstd`, \ + "unknown variant `zsstd`, expected one of `none`, `lz4`, `zstd`, \ `zstd(compression_level=5)` at line 1 column 96" .to_string() ); diff --git a/src/store/compression_brotli.rs b/src/store/compression_brotli.rs deleted file mode 100644 index 33d97815dc..0000000000 --- a/src/store/compression_brotli.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::io; - -#[inline] -pub fn compress(mut uncompressed: &[u8], compressed: &mut Vec) -> io::Result<()> { - let params = brotli::enc::BrotliEncoderParams { - quality: 5, - ..Default::default() - }; - compressed.clear(); - brotli::BrotliCompress(&mut uncompressed, compressed, ¶ms)?; - Ok(()) -} - -#[inline] -pub fn decompress(mut compressed: &[u8], decompressed: &mut Vec) -> io::Result<()> { - decompressed.clear(); - brotli::BrotliDecompress(&mut compressed, decompressed)?; - Ok(()) -} diff --git a/src/store/compression_snap.rs b/src/store/compression_snap.rs deleted file mode 100644 index 4524d1e24a..0000000000 --- a/src/store/compression_snap.rs +++ /dev/null @@ -1,17 +0,0 @@ -use std::io::{self, Read, Write}; - -#[inline] -pub fn compress(uncompressed: &[u8], compressed: &mut Vec) -> io::Result<()> { - compressed.clear(); - let mut encoder = snap::write::FrameEncoder::new(compressed); - encoder.write_all(uncompressed)?; - encoder.flush()?; - Ok(()) -} - -#[inline] -pub fn decompress(compressed: &[u8], decompressed: &mut Vec) -> io::Result<()> { - decompressed.clear(); - snap::read::FrameDecoder::new(compressed).read_to_end(decompressed)?; - Ok(()) -} diff --git a/src/store/compressors.rs b/src/store/compressors.rs index 14088c9cf9..a15c83af79 100644 --- a/src/store/compressors.rs +++ b/src/store/compressors.rs @@ -19,12 +19,6 @@ pub enum Compressor { /// Use the lz4 compressor (block format) #[cfg(feature = "lz4-compression")] Lz4, - /// Use the brotli compressor - #[cfg(feature = "brotli-compression")] - Brotli, - /// Use the snap compressor - #[cfg(feature = "snappy-compression")] - Snappy, /// Use the zstd compressor #[cfg(feature = "zstd-compression")] Zstd(ZstdCompressor), @@ -37,10 +31,6 @@ impl Serialize for Compressor { Compressor::None => serializer.serialize_str("none"), #[cfg(feature = "lz4-compression")] Compressor::Lz4 => serializer.serialize_str("lz4"), - #[cfg(feature = "brotli-compression")] - Compressor::Brotli => serializer.serialize_str("brotli"), - #[cfg(feature = "snappy-compression")] - Compressor::Snappy => serializer.serialize_str("snappy"), #[cfg(feature = "zstd-compression")] Compressor::Zstd(zstd) => serializer.serialize_str(&zstd.ser_to_string()), } @@ -61,24 +51,6 @@ impl<'de> Deserialize<'de> for Compressor { "unsupported variant `lz4`, please enable Tantivy's `lz4-compression` feature", )) } - #[cfg(feature = "brotli-compression")] - "brotli" => Compressor::Brotli, - #[cfg(not(feature = "brotli-compression"))] - "brotli" => { - return Err(serde::de::Error::custom( - "unsupported variant `brotli`, please enable Tantivy's `brotli-compression` \ - feature", - )) - } - #[cfg(feature = "snappy-compression")] - "snappy" => Compressor::Snappy, - #[cfg(not(feature = "snappy-compression"))] - "snappy" => { - return Err(serde::de::Error::custom( - "unsupported variant `snappy`, please enable Tantivy's `snappy-compression` \ - feature", - )) - } #[cfg(feature = "zstd-compression")] _ if buf.starts_with("zstd") => Compressor::Zstd( ZstdCompressor::deser_from_str(&buf).map_err(serde::de::Error::custom)?, @@ -97,10 +69,6 @@ impl<'de> Deserialize<'de> for Compressor { "none", #[cfg(feature = "lz4-compression")] "lz4", - #[cfg(feature = "brotli-compression")] - "brotli", - #[cfg(feature = "snappy-compression")] - "snappy", #[cfg(feature = "zstd-compression")] "zstd", #[cfg(feature = "zstd-compression")] @@ -173,12 +141,6 @@ impl Default for Compressor { #[cfg(feature = "lz4-compression")] return Compressor::Lz4; - #[cfg(feature = "brotli-compression")] - return Compressor::Brotli; - - #[cfg(feature = "snappy-compression")] - return Compressor::Snappy; - #[cfg(feature = "zstd-compression")] return Compressor::Zstd(ZstdCompressor::default()); @@ -201,10 +163,6 @@ impl Compressor { } #[cfg(feature = "lz4-compression")] Self::Lz4 => super::compression_lz4_block::compress(uncompressed, compressed), - #[cfg(feature = "brotli-compression")] - Self::Brotli => super::compression_brotli::compress(uncompressed, compressed), - #[cfg(feature = "snappy-compression")] - Self::Snappy => super::compression_snap::compress(uncompressed, compressed), #[cfg(feature = "zstd-compression")] Self::Zstd(_zstd_compressor) => super::compression_zstd_block::compress( uncompressed, diff --git a/src/store/decompressors.rs b/src/store/decompressors.rs index 474f1d21e6..2c3173ae29 100644 --- a/src/store/decompressors.rs +++ b/src/store/decompressors.rs @@ -18,12 +18,6 @@ pub enum Decompressor { /// Use the lz4 decompressor (block format) #[cfg(feature = "lz4-compression")] Lz4, - /// Use the brotli decompressor - #[cfg(feature = "brotli-compression")] - Brotli, - /// Use the snap decompressor - #[cfg(feature = "snappy-compression")] - Snappy, /// Use the zstd decompressor #[cfg(feature = "zstd-compression")] Zstd, @@ -35,10 +29,6 @@ impl From for Decompressor { Compressor::None => Decompressor::None, #[cfg(feature = "lz4-compression")] Compressor::Lz4 => Decompressor::Lz4, - #[cfg(feature = "brotli-compression")] - Compressor::Brotli => Decompressor::Brotli, - #[cfg(feature = "snappy-compression")] - Compressor::Snappy => Decompressor::Snappy, #[cfg(feature = "zstd-compression")] Compressor::Zstd(_) => Decompressor::Zstd, } @@ -51,10 +41,6 @@ impl Decompressor { 0 => Decompressor::None, #[cfg(feature = "lz4-compression")] 1 => Decompressor::Lz4, - #[cfg(feature = "brotli-compression")] - 2 => Decompressor::Brotli, - #[cfg(feature = "snappy-compression")] - 3 => Decompressor::Snappy, #[cfg(feature = "zstd-compression")] 4 => Decompressor::Zstd, _ => panic!("unknown compressor id {id:?}"), @@ -66,10 +52,6 @@ impl Decompressor { Self::None => 0, #[cfg(feature = "lz4-compression")] Self::Lz4 => 1, - #[cfg(feature = "brotli-compression")] - Self::Brotli => 2, - #[cfg(feature = "snappy-compression")] - Self::Snappy => 3, #[cfg(feature = "zstd-compression")] Self::Zstd => 4, } @@ -95,10 +77,6 @@ impl Decompressor { } #[cfg(feature = "lz4-compression")] Self::Lz4 => super::compression_lz4_block::decompress(compressed, decompressed), - #[cfg(feature = "brotli-compression")] - Self::Brotli => super::compression_brotli::decompress(compressed, decompressed), - #[cfg(feature = "snappy-compression")] - Self::Snappy => super::compression_snap::decompress(compressed, decompressed), #[cfg(feature = "zstd-compression")] Self::Zstd => super::compression_zstd_block::decompress(compressed, decompressed), } @@ -115,10 +93,6 @@ mod tests { assert_eq!(Decompressor::from(Compressor::None), Decompressor::None); #[cfg(feature = "lz4-compression")] assert_eq!(Decompressor::from(Compressor::Lz4), Decompressor::Lz4); - #[cfg(feature = "brotli-compression")] - assert_eq!(Decompressor::from(Compressor::Brotli), Decompressor::Brotli); - #[cfg(feature = "snappy-compression")] - assert_eq!(Decompressor::from(Compressor::Snappy), Decompressor::Snappy); #[cfg(feature = "zstd-compression")] assert_eq!( Decompressor::from(Compressor::Zstd(Default::default())), diff --git a/src/store/mod.rs b/src/store/mod.rs index d64776f13c..5caae38200 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -4,8 +4,8 @@ //! order to be handled in the `Store`. //! //! Internally, documents (or rather their stored fields) are serialized to a buffer. -//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed using `brotli`, -//! `LZ4` or `snappy` and the resulting block is written to disk. +//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed +//! using LZ4 or Zstd and the resulting block is written to disk. //! //! One can then request for a specific `DocId`. //! A skip list helps navigating to the right block, @@ -48,12 +48,6 @@ pub(crate) const DOC_STORE_VERSION: u32 = 1; #[cfg(feature = "lz4-compression")] mod compression_lz4_block; -#[cfg(feature = "brotli-compression")] -mod compression_brotli; - -#[cfg(feature = "snappy-compression")] -mod compression_snap; - #[cfg(feature = "zstd-compression")] mod compression_zstd_block; @@ -200,16 +194,6 @@ pub mod tests { fn test_store_lz4_block() -> crate::Result<()> { test_store(Compressor::Lz4, BLOCK_SIZE, true) } - #[cfg(feature = "snappy-compression")] - #[test] - fn test_store_snap() -> crate::Result<()> { - test_store(Compressor::Snappy, BLOCK_SIZE, true) - } - #[cfg(feature = "brotli-compression")] - #[test] - fn test_store_brotli() -> crate::Result<()> { - test_store(Compressor::Brotli, BLOCK_SIZE, true) - } #[cfg(feature = "zstd-compression")] #[test] @@ -261,8 +245,8 @@ pub mod tests { Ok(()) } - #[cfg(feature = "snappy-compression")] #[cfg(feature = "lz4-compression")] + #[cfg(feature = "zstd-compression")] #[test] fn test_merge_with_changed_compressor() -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); @@ -294,7 +278,7 @@ pub mod tests { ); // Change compressor, this disables stacking on merging let index_settings = index.settings_mut(); - index_settings.docstore_compression = Compressor::Snappy; + index_settings.docstore_compression = Compressor::Zstd(Default::default()); // Merging the segments { let segment_ids = index @@ -316,7 +300,7 @@ pub mod tests { LOREM.to_string() ); } - assert_eq!(store.decompressor(), Decompressor::Snappy); + assert_eq!(store.decompressor(), Decompressor::Zstd); Ok(()) }