diff --git a/Cargo.toml b/Cargo.toml index 3bcc9a94d4..fc759c3384 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,9 +25,7 @@ aho-corasick = "1.0" tantivy-fst = "0.4.0" memmap2 = { version = "0.7.1", optional = true } lz4_flex = { version = "0.11", default-features = false, optional = true } -brotli = { version = "3.3.4", optional = true } zstd = { version = "0.12", optional = true, default-features = false } -snap = { version = "1.0.5", optional = true } tempfile = { version = "3.3.0", optional = true } log = "0.4.16" serde = { version = "1.0.136", features = ["derive"] } @@ -107,9 +105,7 @@ default = ["mmap", "stopwords", "lz4-compression"] mmap = ["fs4", "tempfile", "memmap2"] stopwords = [] -brotli-compression = ["brotli"] lz4-compression = ["lz4_flex"] -snappy-compression = ["snap"] zstd-compression = ["zstd"] failpoints = ["fail", "fail/failpoints"] diff --git a/README.md b/README.md index e43c4e5d9c..2cd8e8d76e 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Details about the benchmark can be found at this [repository](https://github.com - Single valued and multivalued u64, i64, and f64 fast fields (equivalent of doc values in Lucene) - `&[u8]` fast fields - Text, i64, u64, f64, dates, ip, bool, and hierarchical facet fields -- Compressed document store (LZ4, Zstd, None, Brotli, Snap) +- Compressed document store (LZ4, Zstd, None) - Range queries - Faceted search - Configurable indexing (optional term frequency and position indexing) diff --git a/src/core/index_meta.rs b/src/core/index_meta.rs index fb99101541..0ed61e2a6e 100644 --- a/src/core/index_meta.rs +++ b/src/core/index_meta.rs @@ -485,19 +485,14 @@ mod tests { } #[test] - #[cfg(all( - feature = "lz4-compression", - feature = "brotli-compression", - feature = "snappy-compression", - feature = "zstd-compression" - ))] + #[cfg(all(feature = "lz4-compression", feature = "zstd-compression"))] fn test_serialize_metas_invalid_comp() { let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zsstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#; let err = serde_json::from_str::(json).unwrap_err(); assert_eq!( err.to_string(), - "unknown variant `zsstd`, expected one of `none`, `lz4`, `brotli`, `snappy`, `zstd`, \ + "unknown variant `zsstd`, expected one of `none`, `lz4`, `zstd`, \ `zstd(compression_level=5)` at line 1 column 96" .to_string() ); diff --git a/src/store/compression_brotli.rs b/src/store/compression_brotli.rs deleted file mode 100644 index 33d97815dc..0000000000 --- a/src/store/compression_brotli.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::io; - -#[inline] -pub fn compress(mut uncompressed: &[u8], compressed: &mut Vec) -> io::Result<()> { - let params = brotli::enc::BrotliEncoderParams { - quality: 5, - ..Default::default() - }; - compressed.clear(); - brotli::BrotliCompress(&mut uncompressed, compressed, ¶ms)?; - Ok(()) -} - -#[inline] -pub fn decompress(mut compressed: &[u8], decompressed: &mut Vec) -> io::Result<()> { - decompressed.clear(); - brotli::BrotliDecompress(&mut compressed, decompressed)?; - Ok(()) -} diff --git a/src/store/compression_snap.rs b/src/store/compression_snap.rs deleted file mode 100644 index 4524d1e24a..0000000000 --- a/src/store/compression_snap.rs +++ /dev/null @@ -1,17 +0,0 @@ -use std::io::{self, Read, Write}; - -#[inline] -pub fn compress(uncompressed: &[u8], compressed: &mut Vec) -> io::Result<()> { - compressed.clear(); - let mut encoder = snap::write::FrameEncoder::new(compressed); - encoder.write_all(uncompressed)?; - encoder.flush()?; - Ok(()) -} - -#[inline] -pub fn decompress(compressed: &[u8], decompressed: &mut Vec) -> io::Result<()> { - decompressed.clear(); - snap::read::FrameDecoder::new(compressed).read_to_end(decompressed)?; - Ok(()) -} diff --git a/src/store/compressors.rs b/src/store/compressors.rs index 14088c9cf9..a15c83af79 100644 --- a/src/store/compressors.rs +++ b/src/store/compressors.rs @@ -19,12 +19,6 @@ pub enum Compressor { /// Use the lz4 compressor (block format) #[cfg(feature = "lz4-compression")] Lz4, - /// Use the brotli compressor - #[cfg(feature = "brotli-compression")] - Brotli, - /// Use the snap compressor - #[cfg(feature = "snappy-compression")] - Snappy, /// Use the zstd compressor #[cfg(feature = "zstd-compression")] Zstd(ZstdCompressor), @@ -37,10 +31,6 @@ impl Serialize for Compressor { Compressor::None => serializer.serialize_str("none"), #[cfg(feature = "lz4-compression")] Compressor::Lz4 => serializer.serialize_str("lz4"), - #[cfg(feature = "brotli-compression")] - Compressor::Brotli => serializer.serialize_str("brotli"), - #[cfg(feature = "snappy-compression")] - Compressor::Snappy => serializer.serialize_str("snappy"), #[cfg(feature = "zstd-compression")] Compressor::Zstd(zstd) => serializer.serialize_str(&zstd.ser_to_string()), } @@ -61,24 +51,6 @@ impl<'de> Deserialize<'de> for Compressor { "unsupported variant `lz4`, please enable Tantivy's `lz4-compression` feature", )) } - #[cfg(feature = "brotli-compression")] - "brotli" => Compressor::Brotli, - #[cfg(not(feature = "brotli-compression"))] - "brotli" => { - return Err(serde::de::Error::custom( - "unsupported variant `brotli`, please enable Tantivy's `brotli-compression` \ - feature", - )) - } - #[cfg(feature = "snappy-compression")] - "snappy" => Compressor::Snappy, - #[cfg(not(feature = "snappy-compression"))] - "snappy" => { - return Err(serde::de::Error::custom( - "unsupported variant `snappy`, please enable Tantivy's `snappy-compression` \ - feature", - )) - } #[cfg(feature = "zstd-compression")] _ if buf.starts_with("zstd") => Compressor::Zstd( ZstdCompressor::deser_from_str(&buf).map_err(serde::de::Error::custom)?, @@ -97,10 +69,6 @@ impl<'de> Deserialize<'de> for Compressor { "none", #[cfg(feature = "lz4-compression")] "lz4", - #[cfg(feature = "brotli-compression")] - "brotli", - #[cfg(feature = "snappy-compression")] - "snappy", #[cfg(feature = "zstd-compression")] "zstd", #[cfg(feature = "zstd-compression")] @@ -173,12 +141,6 @@ impl Default for Compressor { #[cfg(feature = "lz4-compression")] return Compressor::Lz4; - #[cfg(feature = "brotli-compression")] - return Compressor::Brotli; - - #[cfg(feature = "snappy-compression")] - return Compressor::Snappy; - #[cfg(feature = "zstd-compression")] return Compressor::Zstd(ZstdCompressor::default()); @@ -201,10 +163,6 @@ impl Compressor { } #[cfg(feature = "lz4-compression")] Self::Lz4 => super::compression_lz4_block::compress(uncompressed, compressed), - #[cfg(feature = "brotli-compression")] - Self::Brotli => super::compression_brotli::compress(uncompressed, compressed), - #[cfg(feature = "snappy-compression")] - Self::Snappy => super::compression_snap::compress(uncompressed, compressed), #[cfg(feature = "zstd-compression")] Self::Zstd(_zstd_compressor) => super::compression_zstd_block::compress( uncompressed, diff --git a/src/store/decompressors.rs b/src/store/decompressors.rs index 474f1d21e6..2c3173ae29 100644 --- a/src/store/decompressors.rs +++ b/src/store/decompressors.rs @@ -18,12 +18,6 @@ pub enum Decompressor { /// Use the lz4 decompressor (block format) #[cfg(feature = "lz4-compression")] Lz4, - /// Use the brotli decompressor - #[cfg(feature = "brotli-compression")] - Brotli, - /// Use the snap decompressor - #[cfg(feature = "snappy-compression")] - Snappy, /// Use the zstd decompressor #[cfg(feature = "zstd-compression")] Zstd, @@ -35,10 +29,6 @@ impl From for Decompressor { Compressor::None => Decompressor::None, #[cfg(feature = "lz4-compression")] Compressor::Lz4 => Decompressor::Lz4, - #[cfg(feature = "brotli-compression")] - Compressor::Brotli => Decompressor::Brotli, - #[cfg(feature = "snappy-compression")] - Compressor::Snappy => Decompressor::Snappy, #[cfg(feature = "zstd-compression")] Compressor::Zstd(_) => Decompressor::Zstd, } @@ -51,10 +41,6 @@ impl Decompressor { 0 => Decompressor::None, #[cfg(feature = "lz4-compression")] 1 => Decompressor::Lz4, - #[cfg(feature = "brotli-compression")] - 2 => Decompressor::Brotli, - #[cfg(feature = "snappy-compression")] - 3 => Decompressor::Snappy, #[cfg(feature = "zstd-compression")] 4 => Decompressor::Zstd, _ => panic!("unknown compressor id {id:?}"), @@ -66,10 +52,6 @@ impl Decompressor { Self::None => 0, #[cfg(feature = "lz4-compression")] Self::Lz4 => 1, - #[cfg(feature = "brotli-compression")] - Self::Brotli => 2, - #[cfg(feature = "snappy-compression")] - Self::Snappy => 3, #[cfg(feature = "zstd-compression")] Self::Zstd => 4, } @@ -95,10 +77,6 @@ impl Decompressor { } #[cfg(feature = "lz4-compression")] Self::Lz4 => super::compression_lz4_block::decompress(compressed, decompressed), - #[cfg(feature = "brotli-compression")] - Self::Brotli => super::compression_brotli::decompress(compressed, decompressed), - #[cfg(feature = "snappy-compression")] - Self::Snappy => super::compression_snap::decompress(compressed, decompressed), #[cfg(feature = "zstd-compression")] Self::Zstd => super::compression_zstd_block::decompress(compressed, decompressed), } @@ -115,10 +93,6 @@ mod tests { assert_eq!(Decompressor::from(Compressor::None), Decompressor::None); #[cfg(feature = "lz4-compression")] assert_eq!(Decompressor::from(Compressor::Lz4), Decompressor::Lz4); - #[cfg(feature = "brotli-compression")] - assert_eq!(Decompressor::from(Compressor::Brotli), Decompressor::Brotli); - #[cfg(feature = "snappy-compression")] - assert_eq!(Decompressor::from(Compressor::Snappy), Decompressor::Snappy); #[cfg(feature = "zstd-compression")] assert_eq!( Decompressor::from(Compressor::Zstd(Default::default())), diff --git a/src/store/mod.rs b/src/store/mod.rs index d64776f13c..5caae38200 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -4,8 +4,8 @@ //! order to be handled in the `Store`. //! //! Internally, documents (or rather their stored fields) are serialized to a buffer. -//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed using `brotli`, -//! `LZ4` or `snappy` and the resulting block is written to disk. +//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed +//! using LZ4 or Zstd and the resulting block is written to disk. //! //! One can then request for a specific `DocId`. //! A skip list helps navigating to the right block, @@ -48,12 +48,6 @@ pub(crate) const DOC_STORE_VERSION: u32 = 1; #[cfg(feature = "lz4-compression")] mod compression_lz4_block; -#[cfg(feature = "brotli-compression")] -mod compression_brotli; - -#[cfg(feature = "snappy-compression")] -mod compression_snap; - #[cfg(feature = "zstd-compression")] mod compression_zstd_block; @@ -200,16 +194,6 @@ pub mod tests { fn test_store_lz4_block() -> crate::Result<()> { test_store(Compressor::Lz4, BLOCK_SIZE, true) } - #[cfg(feature = "snappy-compression")] - #[test] - fn test_store_snap() -> crate::Result<()> { - test_store(Compressor::Snappy, BLOCK_SIZE, true) - } - #[cfg(feature = "brotli-compression")] - #[test] - fn test_store_brotli() -> crate::Result<()> { - test_store(Compressor::Brotli, BLOCK_SIZE, true) - } #[cfg(feature = "zstd-compression")] #[test] @@ -261,8 +245,8 @@ pub mod tests { Ok(()) } - #[cfg(feature = "snappy-compression")] #[cfg(feature = "lz4-compression")] + #[cfg(feature = "zstd-compression")] #[test] fn test_merge_with_changed_compressor() -> crate::Result<()> { let mut schema_builder = schema::Schema::builder(); @@ -294,7 +278,7 @@ pub mod tests { ); // Change compressor, this disables stacking on merging let index_settings = index.settings_mut(); - index_settings.docstore_compression = Compressor::Snappy; + index_settings.docstore_compression = Compressor::Zstd(Default::default()); // Merging the segments { let segment_ids = index @@ -316,7 +300,7 @@ pub mod tests { LOREM.to_string() ); } - assert_eq!(store.decompressor(), Decompressor::Snappy); + assert_eq!(store.decompressor(), Decompressor::Zstd); Ok(()) }