Skip to content

Commit

Permalink
Remove support for Brotli and Snappy compression
Browse files Browse the repository at this point in the history
LZ4 provides fast and simple compression whereas Zstd is exceptionally flexible
so that the additional support for Brotli and Snappy does not really add
any distinct functionality on top of those two algorithms.

Removing them reduces our maintenance burden and reduces the number of choices
users have to make when setting up their project based on Tantivy.
  • Loading branch information
adamreichold committed Jul 13, 2023
1 parent f7288b0 commit 864c826
Show file tree
Hide file tree
Showing 8 changed files with 8 additions and 137 deletions.
4 changes: 0 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ aho-corasick = "1.0"
tantivy-fst = "0.4.0"
memmap2 = { version = "0.7.1", optional = true }
lz4_flex = { version = "0.11", default-features = false, optional = true }
brotli = { version = "3.3.4", optional = true }
zstd = { version = "0.12", optional = true, default-features = false }
snap = { version = "1.0.5", optional = true }
tempfile = { version = "3.3.0", optional = true }
log = "0.4.16"
serde = { version = "1.0.136", features = ["derive"] }
Expand Down Expand Up @@ -107,9 +105,7 @@ default = ["mmap", "stopwords", "lz4-compression"]
mmap = ["fs4", "tempfile", "memmap2"]
stopwords = []

brotli-compression = ["brotli"]
lz4-compression = ["lz4_flex"]
snappy-compression = ["snap"]
zstd-compression = ["zstd"]

failpoints = ["fail", "fail/failpoints"]
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Details about the benchmark can be found at this [repository](https://github.com
- Single valued and multivalued u64, i64, and f64 fast fields (equivalent of doc values in Lucene)
- `&[u8]` fast fields
- Text, i64, u64, f64, dates, ip, bool, and hierarchical facet fields
- Compressed document store (LZ4, Zstd, None, Brotli, Snap)
- Compressed document store (LZ4, Zstd, None)
- Range queries
- Faceted search
- Configurable indexing (optional term frequency and position indexing)
Expand Down
9 changes: 2 additions & 7 deletions src/core/index_meta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,19 +485,14 @@ mod tests {
}

#[test]
#[cfg(all(
feature = "lz4-compression",
feature = "brotli-compression",
feature = "snappy-compression",
feature = "zstd-compression"
))]
#[cfg(all(feature = "lz4-compression", feature = "zstd-compression"))]
fn test_serialize_metas_invalid_comp() {
let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zsstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;

let err = serde_json::from_str::<UntrackedIndexMeta>(json).unwrap_err();
assert_eq!(
err.to_string(),
"unknown variant `zsstd`, expected one of `none`, `lz4`, `brotli`, `snappy`, `zstd`, \
"unknown variant `zsstd`, expected one of `none`, `lz4`, `zstd`, \
`zstd(compression_level=5)` at line 1 column 96"
.to_string()
);
Expand Down
19 changes: 0 additions & 19 deletions src/store/compression_brotli.rs

This file was deleted.

17 changes: 0 additions & 17 deletions src/store/compression_snap.rs

This file was deleted.

42 changes: 0 additions & 42 deletions src/store/compressors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@ pub enum Compressor {
/// Use the lz4 compressor (block format)
#[cfg(feature = "lz4-compression")]
Lz4,
/// Use the brotli compressor
#[cfg(feature = "brotli-compression")]
Brotli,
/// Use the snap compressor
#[cfg(feature = "snappy-compression")]
Snappy,
/// Use the zstd compressor
#[cfg(feature = "zstd-compression")]
Zstd(ZstdCompressor),
Expand All @@ -37,10 +31,6 @@ impl Serialize for Compressor {
Compressor::None => serializer.serialize_str("none"),
#[cfg(feature = "lz4-compression")]
Compressor::Lz4 => serializer.serialize_str("lz4"),
#[cfg(feature = "brotli-compression")]
Compressor::Brotli => serializer.serialize_str("brotli"),
#[cfg(feature = "snappy-compression")]
Compressor::Snappy => serializer.serialize_str("snappy"),
#[cfg(feature = "zstd-compression")]
Compressor::Zstd(zstd) => serializer.serialize_str(&zstd.ser_to_string()),
}
Expand All @@ -61,24 +51,6 @@ impl<'de> Deserialize<'de> for Compressor {
"unsupported variant `lz4`, please enable Tantivy's `lz4-compression` feature",
))
}
#[cfg(feature = "brotli-compression")]
"brotli" => Compressor::Brotli,
#[cfg(not(feature = "brotli-compression"))]
"brotli" => {
return Err(serde::de::Error::custom(
"unsupported variant `brotli`, please enable Tantivy's `brotli-compression` \
feature",
))
}
#[cfg(feature = "snappy-compression")]
"snappy" => Compressor::Snappy,
#[cfg(not(feature = "snappy-compression"))]
"snappy" => {
return Err(serde::de::Error::custom(
"unsupported variant `snappy`, please enable Tantivy's `snappy-compression` \
feature",
))
}
#[cfg(feature = "zstd-compression")]
_ if buf.starts_with("zstd") => Compressor::Zstd(
ZstdCompressor::deser_from_str(&buf).map_err(serde::de::Error::custom)?,
Expand All @@ -97,10 +69,6 @@ impl<'de> Deserialize<'de> for Compressor {
"none",
#[cfg(feature = "lz4-compression")]
"lz4",
#[cfg(feature = "brotli-compression")]
"brotli",
#[cfg(feature = "snappy-compression")]
"snappy",
#[cfg(feature = "zstd-compression")]
"zstd",
#[cfg(feature = "zstd-compression")]
Expand Down Expand Up @@ -173,12 +141,6 @@ impl Default for Compressor {
#[cfg(feature = "lz4-compression")]
return Compressor::Lz4;

#[cfg(feature = "brotli-compression")]
return Compressor::Brotli;

#[cfg(feature = "snappy-compression")]
return Compressor::Snappy;

#[cfg(feature = "zstd-compression")]
return Compressor::Zstd(ZstdCompressor::default());

Expand All @@ -201,10 +163,6 @@ impl Compressor {
}
#[cfg(feature = "lz4-compression")]
Self::Lz4 => super::compression_lz4_block::compress(uncompressed, compressed),
#[cfg(feature = "brotli-compression")]
Self::Brotli => super::compression_brotli::compress(uncompressed, compressed),
#[cfg(feature = "snappy-compression")]
Self::Snappy => super::compression_snap::compress(uncompressed, compressed),
#[cfg(feature = "zstd-compression")]
Self::Zstd(_zstd_compressor) => super::compression_zstd_block::compress(
uncompressed,
Expand Down
26 changes: 0 additions & 26 deletions src/store/decompressors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,6 @@ pub enum Decompressor {
/// Use the lz4 decompressor (block format)
#[cfg(feature = "lz4-compression")]
Lz4,
/// Use the brotli decompressor
#[cfg(feature = "brotli-compression")]
Brotli,
/// Use the snap decompressor
#[cfg(feature = "snappy-compression")]
Snappy,
/// Use the zstd decompressor
#[cfg(feature = "zstd-compression")]
Zstd,
Expand All @@ -35,10 +29,6 @@ impl From<Compressor> for Decompressor {
Compressor::None => Decompressor::None,
#[cfg(feature = "lz4-compression")]
Compressor::Lz4 => Decompressor::Lz4,
#[cfg(feature = "brotli-compression")]
Compressor::Brotli => Decompressor::Brotli,
#[cfg(feature = "snappy-compression")]
Compressor::Snappy => Decompressor::Snappy,
#[cfg(feature = "zstd-compression")]
Compressor::Zstd(_) => Decompressor::Zstd,
}
Expand All @@ -51,10 +41,6 @@ impl Decompressor {
0 => Decompressor::None,
#[cfg(feature = "lz4-compression")]
1 => Decompressor::Lz4,
#[cfg(feature = "brotli-compression")]
2 => Decompressor::Brotli,
#[cfg(feature = "snappy-compression")]
3 => Decompressor::Snappy,
#[cfg(feature = "zstd-compression")]
4 => Decompressor::Zstd,
_ => panic!("unknown compressor id {id:?}"),
Expand All @@ -66,10 +52,6 @@ impl Decompressor {
Self::None => 0,
#[cfg(feature = "lz4-compression")]
Self::Lz4 => 1,
#[cfg(feature = "brotli-compression")]
Self::Brotli => 2,
#[cfg(feature = "snappy-compression")]
Self::Snappy => 3,
#[cfg(feature = "zstd-compression")]
Self::Zstd => 4,
}
Expand All @@ -95,10 +77,6 @@ impl Decompressor {
}
#[cfg(feature = "lz4-compression")]
Self::Lz4 => super::compression_lz4_block::decompress(compressed, decompressed),
#[cfg(feature = "brotli-compression")]
Self::Brotli => super::compression_brotli::decompress(compressed, decompressed),
#[cfg(feature = "snappy-compression")]
Self::Snappy => super::compression_snap::decompress(compressed, decompressed),
#[cfg(feature = "zstd-compression")]
Self::Zstd => super::compression_zstd_block::decompress(compressed, decompressed),
}
Expand All @@ -115,10 +93,6 @@ mod tests {
assert_eq!(Decompressor::from(Compressor::None), Decompressor::None);
#[cfg(feature = "lz4-compression")]
assert_eq!(Decompressor::from(Compressor::Lz4), Decompressor::Lz4);
#[cfg(feature = "brotli-compression")]
assert_eq!(Decompressor::from(Compressor::Brotli), Decompressor::Brotli);
#[cfg(feature = "snappy-compression")]
assert_eq!(Decompressor::from(Compressor::Snappy), Decompressor::Snappy);
#[cfg(feature = "zstd-compression")]
assert_eq!(
Decompressor::from(Compressor::Zstd(Default::default())),
Expand Down
26 changes: 5 additions & 21 deletions src/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
//! order to be handled in the `Store`.
//!
//! Internally, documents (or rather their stored fields) are serialized to a buffer.
//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed using `brotli`,
//! `LZ4` or `snappy` and the resulting block is written to disk.
//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed
//! using LZ4 or Zstd and the resulting block is written to disk.
//!
//! One can then request for a specific `DocId`.
//! A skip list helps navigating to the right block,
Expand Down Expand Up @@ -48,12 +48,6 @@ pub(crate) const DOC_STORE_VERSION: u32 = 1;
#[cfg(feature = "lz4-compression")]
mod compression_lz4_block;

#[cfg(feature = "brotli-compression")]
mod compression_brotli;

#[cfg(feature = "snappy-compression")]
mod compression_snap;

#[cfg(feature = "zstd-compression")]
mod compression_zstd_block;

Expand Down Expand Up @@ -200,16 +194,6 @@ pub mod tests {
fn test_store_lz4_block() -> crate::Result<()> {
test_store(Compressor::Lz4, BLOCK_SIZE, true)
}
#[cfg(feature = "snappy-compression")]
#[test]
fn test_store_snap() -> crate::Result<()> {
test_store(Compressor::Snappy, BLOCK_SIZE, true)
}
#[cfg(feature = "brotli-compression")]
#[test]
fn test_store_brotli() -> crate::Result<()> {
test_store(Compressor::Brotli, BLOCK_SIZE, true)
}

#[cfg(feature = "zstd-compression")]
#[test]
Expand Down Expand Up @@ -261,8 +245,8 @@ pub mod tests {
Ok(())
}

#[cfg(feature = "snappy-compression")]
#[cfg(feature = "lz4-compression")]
#[cfg(feature = "zstd-compression")]
#[test]
fn test_merge_with_changed_compressor() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
Expand Down Expand Up @@ -294,7 +278,7 @@ pub mod tests {
);
// Change compressor, this disables stacking on merging
let index_settings = index.settings_mut();
index_settings.docstore_compression = Compressor::Snappy;
index_settings.docstore_compression = Compressor::Zstd(Default::default());
// Merging the segments
{
let segment_ids = index
Expand All @@ -316,7 +300,7 @@ pub mod tests {
LOREM.to_string()
);
}
assert_eq!(store.decompressor(), Decompressor::Snappy);
assert_eq!(store.decompressor(), Decompressor::Zstd);

Ok(())
}
Expand Down

0 comments on commit 864c826

Please sign in to comment.