Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: Remove support for Brotli and Snappy compression #2123

Merged
merged 1 commit into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
strategy:
matrix:
features: [
{ label: "all", flags: "mmap,stopwords,brotli-compression,lz4-compression,snappy-compression,zstd-compression,failpoints" },
{ label: "all", flags: "mmap,stopwords,lz4-compression,zstd-compression,failpoints" },
{ label: "quickwit", flags: "mmap,quickwit,failpoints" }
]

Expand Down
4 changes: 0 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ aho-corasick = "1.0"
tantivy-fst = "0.4.0"
memmap2 = { version = "0.7.1", optional = true }
lz4_flex = { version = "0.11", default-features = false, optional = true }
brotli = { version = "3.3.4", optional = true }
zstd = { version = "0.12", optional = true, default-features = false }
snap = { version = "1.0.5", optional = true }
tempfile = { version = "3.3.0", optional = true }
log = "0.4.16"
serde = { version = "1.0.136", features = ["derive"] }
Expand Down Expand Up @@ -107,9 +105,7 @@ default = ["mmap", "stopwords", "lz4-compression"]
mmap = ["fs4", "tempfile", "memmap2"]
stopwords = []

brotli-compression = ["brotli"]
lz4-compression = ["lz4_flex"]
snappy-compression = ["snap"]
zstd-compression = ["zstd"]

failpoints = ["fail", "fail/failpoints"]
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Details about the benchmark can be found at this [repository](https://github.com
- Single valued and multivalued u64, i64, and f64 fast fields (equivalent of doc values in Lucene)
- `&[u8]` fast fields
- Text, i64, u64, f64, dates, ip, bool, and hierarchical facet fields
- Compressed document store (LZ4, Zstd, None, Brotli, Snap)
- Compressed document store (LZ4, Zstd, None)
- Range queries
- Faceted search
- Configurable indexing (optional term frequency and position indexing)
Expand Down
9 changes: 2 additions & 7 deletions src/core/index_meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,19 +485,14 @@ mod tests {
}

#[test]
#[cfg(all(
feature = "lz4-compression",
feature = "brotli-compression",
feature = "snappy-compression",
feature = "zstd-compression"
))]
#[cfg(all(feature = "lz4-compression", feature = "zstd-compression"))]
fn test_serialize_metas_invalid_comp() {
let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zsstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;

let err = serde_json::from_str::<UntrackedIndexMeta>(json).unwrap_err();
assert_eq!(
err.to_string(),
"unknown variant `zsstd`, expected one of `none`, `lz4`, `brotli`, `snappy`, `zstd`, \
"unknown variant `zsstd`, expected one of `none`, `lz4`, `zstd`, \
`zstd(compression_level=5)` at line 1 column 96"
.to_string()
);
Expand Down
19 changes: 0 additions & 19 deletions src/store/compression_brotli.rs

This file was deleted.

17 changes: 0 additions & 17 deletions src/store/compression_snap.rs

This file was deleted.

42 changes: 0 additions & 42 deletions src/store/compressors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@ pub enum Compressor {
/// Use the lz4 compressor (block format)
#[cfg(feature = "lz4-compression")]
Lz4,
/// Use the brotli compressor
#[cfg(feature = "brotli-compression")]
Brotli,
/// Use the snap compressor
#[cfg(feature = "snappy-compression")]
Snappy,
/// Use the zstd compressor
#[cfg(feature = "zstd-compression")]
Zstd(ZstdCompressor),
Expand All @@ -37,10 +31,6 @@ impl Serialize for Compressor {
Compressor::None => serializer.serialize_str("none"),
#[cfg(feature = "lz4-compression")]
Compressor::Lz4 => serializer.serialize_str("lz4"),
#[cfg(feature = "brotli-compression")]
Compressor::Brotli => serializer.serialize_str("brotli"),
#[cfg(feature = "snappy-compression")]
Compressor::Snappy => serializer.serialize_str("snappy"),
#[cfg(feature = "zstd-compression")]
Compressor::Zstd(zstd) => serializer.serialize_str(&zstd.ser_to_string()),
}
Expand All @@ -61,24 +51,6 @@ impl<'de> Deserialize<'de> for Compressor {
"unsupported variant `lz4`, please enable Tantivy's `lz4-compression` feature",
))
}
#[cfg(feature = "brotli-compression")]
"brotli" => Compressor::Brotli,
#[cfg(not(feature = "brotli-compression"))]
"brotli" => {
return Err(serde::de::Error::custom(
"unsupported variant `brotli`, please enable Tantivy's `brotli-compression` \
feature",
))
}
#[cfg(feature = "snappy-compression")]
"snappy" => Compressor::Snappy,
#[cfg(not(feature = "snappy-compression"))]
"snappy" => {
return Err(serde::de::Error::custom(
"unsupported variant `snappy`, please enable Tantivy's `snappy-compression` \
feature",
))
}
#[cfg(feature = "zstd-compression")]
_ if buf.starts_with("zstd") => Compressor::Zstd(
ZstdCompressor::deser_from_str(&buf).map_err(serde::de::Error::custom)?,
Expand All @@ -97,10 +69,6 @@ impl<'de> Deserialize<'de> for Compressor {
"none",
#[cfg(feature = "lz4-compression")]
"lz4",
#[cfg(feature = "brotli-compression")]
"brotli",
#[cfg(feature = "snappy-compression")]
"snappy",
#[cfg(feature = "zstd-compression")]
"zstd",
#[cfg(feature = "zstd-compression")]
Expand Down Expand Up @@ -173,12 +141,6 @@ impl Default for Compressor {
#[cfg(feature = "lz4-compression")]
return Compressor::Lz4;

#[cfg(feature = "brotli-compression")]
return Compressor::Brotli;

#[cfg(feature = "snappy-compression")]
return Compressor::Snappy;

#[cfg(feature = "zstd-compression")]
return Compressor::Zstd(ZstdCompressor::default());

Expand All @@ -201,10 +163,6 @@ impl Compressor {
}
#[cfg(feature = "lz4-compression")]
Self::Lz4 => super::compression_lz4_block::compress(uncompressed, compressed),
#[cfg(feature = "brotli-compression")]
Self::Brotli => super::compression_brotli::compress(uncompressed, compressed),
#[cfg(feature = "snappy-compression")]
Self::Snappy => super::compression_snap::compress(uncompressed, compressed),
#[cfg(feature = "zstd-compression")]
Self::Zstd(_zstd_compressor) => super::compression_zstd_block::compress(
uncompressed,
Expand Down
26 changes: 0 additions & 26 deletions src/store/decompressors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,6 @@ pub enum Decompressor {
/// Use the lz4 decompressor (block format)
#[cfg(feature = "lz4-compression")]
Lz4,
/// Use the brotli decompressor
#[cfg(feature = "brotli-compression")]
Brotli,
/// Use the snap decompressor
#[cfg(feature = "snappy-compression")]
Snappy,
/// Use the zstd decompressor
#[cfg(feature = "zstd-compression")]
Zstd,
Expand All @@ -35,10 +29,6 @@ impl From<Compressor> for Decompressor {
Compressor::None => Decompressor::None,
#[cfg(feature = "lz4-compression")]
Compressor::Lz4 => Decompressor::Lz4,
#[cfg(feature = "brotli-compression")]
Compressor::Brotli => Decompressor::Brotli,
#[cfg(feature = "snappy-compression")]
Compressor::Snappy => Decompressor::Snappy,
#[cfg(feature = "zstd-compression")]
Compressor::Zstd(_) => Decompressor::Zstd,
}
Expand All @@ -51,10 +41,6 @@ impl Decompressor {
0 => Decompressor::None,
#[cfg(feature = "lz4-compression")]
1 => Decompressor::Lz4,
#[cfg(feature = "brotli-compression")]
2 => Decompressor::Brotli,
#[cfg(feature = "snappy-compression")]
3 => Decompressor::Snappy,
#[cfg(feature = "zstd-compression")]
4 => Decompressor::Zstd,
_ => panic!("unknown compressor id {id:?}"),
Expand All @@ -66,10 +52,6 @@ impl Decompressor {
Self::None => 0,
#[cfg(feature = "lz4-compression")]
Self::Lz4 => 1,
#[cfg(feature = "brotli-compression")]
Self::Brotli => 2,
#[cfg(feature = "snappy-compression")]
Self::Snappy => 3,
#[cfg(feature = "zstd-compression")]
Self::Zstd => 4,
}
Expand All @@ -95,10 +77,6 @@ impl Decompressor {
}
#[cfg(feature = "lz4-compression")]
Self::Lz4 => super::compression_lz4_block::decompress(compressed, decompressed),
#[cfg(feature = "brotli-compression")]
Self::Brotli => super::compression_brotli::decompress(compressed, decompressed),
#[cfg(feature = "snappy-compression")]
Self::Snappy => super::compression_snap::decompress(compressed, decompressed),
#[cfg(feature = "zstd-compression")]
Self::Zstd => super::compression_zstd_block::decompress(compressed, decompressed),
}
Expand All @@ -115,10 +93,6 @@ mod tests {
assert_eq!(Decompressor::from(Compressor::None), Decompressor::None);
#[cfg(feature = "lz4-compression")]
assert_eq!(Decompressor::from(Compressor::Lz4), Decompressor::Lz4);
#[cfg(feature = "brotli-compression")]
assert_eq!(Decompressor::from(Compressor::Brotli), Decompressor::Brotli);
#[cfg(feature = "snappy-compression")]
assert_eq!(Decompressor::from(Compressor::Snappy), Decompressor::Snappy);
#[cfg(feature = "zstd-compression")]
assert_eq!(
Decompressor::from(Compressor::Zstd(Default::default())),
Expand Down
26 changes: 5 additions & 21 deletions src/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
//! order to be handled in the `Store`.
//!
//! Internally, documents (or rather their stored fields) are serialized to a buffer.
//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed using `brotli`,
//! `LZ4` or `snappy` and the resulting block is written to disk.
//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed
//! using LZ4 or Zstd and the resulting block is written to disk.
//!
//! One can then request for a specific `DocId`.
//! A skip list helps navigating to the right block,
Expand Down Expand Up @@ -48,12 +48,6 @@ pub(crate) const DOC_STORE_VERSION: u32 = 1;
#[cfg(feature = "lz4-compression")]
mod compression_lz4_block;

#[cfg(feature = "brotli-compression")]
mod compression_brotli;

#[cfg(feature = "snappy-compression")]
mod compression_snap;

#[cfg(feature = "zstd-compression")]
mod compression_zstd_block;

Expand Down Expand Up @@ -200,16 +194,6 @@ pub mod tests {
fn test_store_lz4_block() -> crate::Result<()> {
test_store(Compressor::Lz4, BLOCK_SIZE, true)
}
#[cfg(feature = "snappy-compression")]
#[test]
fn test_store_snap() -> crate::Result<()> {
test_store(Compressor::Snappy, BLOCK_SIZE, true)
}
#[cfg(feature = "brotli-compression")]
#[test]
fn test_store_brotli() -> crate::Result<()> {
test_store(Compressor::Brotli, BLOCK_SIZE, true)
}

#[cfg(feature = "zstd-compression")]
#[test]
Expand Down Expand Up @@ -261,8 +245,8 @@ pub mod tests {
Ok(())
}

#[cfg(feature = "snappy-compression")]
#[cfg(feature = "lz4-compression")]
#[cfg(feature = "zstd-compression")]
#[test]
fn test_merge_with_changed_compressor() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
Expand Down Expand Up @@ -294,7 +278,7 @@ pub mod tests {
);
// Change compressor, this disables stacking on merging
let index_settings = index.settings_mut();
index_settings.docstore_compression = Compressor::Snappy;
index_settings.docstore_compression = Compressor::Zstd(Default::default());
// Merging the segments
{
let segment_ids = index
Expand All @@ -316,7 +300,7 @@ pub mod tests {
LOREM.to_string()
);
}
assert_eq!(store.decompressor(), Decompressor::Snappy);
assert_eq!(store.decompressor(), Decompressor::Zstd);

Ok(())
}
Expand Down