From 144aad0706d8f09f38200e484596e482469f0121 Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" Date: Tue, 2 Jul 2024 08:15:29 +0000 Subject: [PATCH] refactor: change InvertedIndexWriter method signature to offsets to facilliate caching --- src/index/src/inverted_index/format/reader.rs | 15 ++--- .../src/inverted_index/format/reader/blob.rs | 49 +++++++++------ .../src/inverted_index/format/writer/blob.rs | 46 ++++++++++++--- .../search/fst_values_mapper.rs | 4 +- .../search/index_apply/predicates_apply.rs | 59 +++++++++---------- 5 files changed, 103 insertions(+), 70 deletions(-) diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index de78800ca623..683a56561663 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -17,7 +17,7 @@ mod footer; use async_trait::async_trait; use common_base::BitVec; -use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas}; +use greptime_proto::v1::index::InvertedIndexMetas; use crate::inverted_index::error::Result; pub use crate::inverted_index::format::reader::blob::InvertedIndexBlobReader; @@ -30,14 +30,9 @@ pub trait InvertedIndexReader: Send { /// Retrieve metadata of all inverted indices stored within the blob. async fn metadata(&mut self) -> Result; - /// Retrieve the finite state transducer (FST) map for a given inverted index metadata entry. - async fn fst(&mut self, meta: &InvertedIndexMeta) -> Result; + /// Retrieve the finite state transducer (FST) map from the given offset and size. + async fn fst(&mut self, offset: u64, size: u32) -> Result; - /// Retrieve the bitmap for a given inverted index metadata entry at the specified offset and size. - async fn bitmap( - &mut self, - meta: &InvertedIndexMeta, - relative_offset: u32, - size: u32, - ) -> Result; + /// Retrieve the bitmap from the given offset and size. + async fn bitmap(&mut self, offset: u64, size: u32) -> Result; } diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index b0a5e77db5fb..99f2f93239a3 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -17,7 +17,7 @@ use std::io::SeekFrom; use async_trait::async_trait; use common_base::BitVec; use futures::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt}; -use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas}; +use greptime_proto::v1::index::InvertedIndexMetas; use snafu::{ensure, ResultExt}; use crate::inverted_index::error::{ @@ -61,23 +61,22 @@ impl InvertedIndexReader for InvertedIn footer_reader.metadata().await } - async fn fst(&mut self, meta: &InvertedIndexMeta) -> Result { - let offset = SeekFrom::Start(meta.base_offset + meta.relative_fst_offset as u64); - self.source.seek(offset).await.context(SeekSnafu)?; - let mut buf = vec![0u8; meta.fst_size as usize]; + async fn fst(&mut self, offset: u64, size: u32) -> Result { + self.source + .seek(SeekFrom::Start(offset)) + .await + .context(SeekSnafu)?; + let mut buf = vec![0u8; size as usize]; self.source.read_exact(&mut buf).await.context(ReadSnafu)?; FstMap::new(buf).context(DecodeFstSnafu) } - async fn bitmap( - &mut self, - meta: &InvertedIndexMeta, - relative_offset: u32, - size: u32, - ) -> Result { - let offset = SeekFrom::Start(meta.base_offset + relative_offset as u64); - self.source.seek(offset).await.context(SeekSnafu)?; + async fn bitmap(&mut self, offset: u64, size: u32) -> Result { + self.source + .seek(SeekFrom::Start(offset)) + .await + .context(SeekSnafu)?; let mut buf = vec![0u8; size as usize]; self.source.read_exact(&mut buf).await.context(ReadSnafu)?; @@ -202,13 +201,25 @@ mod tests { let metas = blob_reader.metadata().await.unwrap(); let meta = metas.metas.get("tag0").unwrap(); - let fst_map = blob_reader.fst(meta).await.unwrap(); + let fst_map = blob_reader + .fst( + meta.base_offset + meta.relative_fst_offset as u64, + meta.fst_size, + ) + .await + .unwrap(); assert_eq!(fst_map.len(), 2); assert_eq!(fst_map.get("key1".as_bytes()), Some(1)); assert_eq!(fst_map.get("key2".as_bytes()), Some(2)); let meta = metas.metas.get("tag1").unwrap(); - let fst_map = blob_reader.fst(meta).await.unwrap(); + let fst_map = blob_reader + .fst( + meta.base_offset + meta.relative_fst_offset as u64, + meta.fst_size, + ) + .await + .unwrap(); assert_eq!(fst_map.len(), 2); assert_eq!(fst_map.get("key1".as_bytes()), Some(1)); assert_eq!(fst_map.get("key2".as_bytes()), Some(2)); @@ -222,17 +233,17 @@ mod tests { let metas = blob_reader.metadata().await.unwrap(); let meta = metas.metas.get("tag0").unwrap(); - let bitmap = blob_reader.bitmap(meta, 0, 2).await.unwrap(); + let bitmap = blob_reader.bitmap(meta.base_offset, 2).await.unwrap(); assert_eq!(bitmap.into_vec(), create_fake_bitmap()); - let bitmap = blob_reader.bitmap(meta, 2, 2).await.unwrap(); + let bitmap = blob_reader.bitmap(meta.base_offset + 2, 2).await.unwrap(); assert_eq!(bitmap.into_vec(), create_fake_bitmap()); let metas = blob_reader.metadata().await.unwrap(); let meta = metas.metas.get("tag1").unwrap(); - let bitmap = blob_reader.bitmap(meta, 0, 2).await.unwrap(); + let bitmap = blob_reader.bitmap(meta.base_offset, 2).await.unwrap(); assert_eq!(bitmap.into_vec(), create_fake_bitmap()); - let bitmap = blob_reader.bitmap(meta, 2, 2).await.unwrap(); + let bitmap = blob_reader.bitmap(meta.base_offset + 2, 2).await.unwrap(); assert_eq!(bitmap.into_vec(), create_fake_bitmap()); } } diff --git a/src/index/src/inverted_index/format/writer/blob.rs b/src/index/src/inverted_index/format/writer/blob.rs index 07f39af46ecc..767a7a3412a0 100644 --- a/src/index/src/inverted_index/format/writer/blob.rs +++ b/src/index/src/inverted_index/format/writer/blob.rs @@ -174,16 +174,31 @@ mod tests { assert_eq!(stats0.null_count, 1); assert_eq!(stats0.min_value, Bytes::from("a")); assert_eq!(stats0.max_value, Bytes::from("c")); - let fst0 = reader.fst(tag0).await.unwrap(); + let fst0 = reader + .fst( + tag0.base_offset + tag0.relative_fst_offset as u64, + tag0.fst_size, + ) + .await + .unwrap(); assert_eq!(fst0.len(), 3); let [offset, size] = unpack(fst0.get(b"a").unwrap()); - let bitmap = reader.bitmap(tag0, offset, size).await.unwrap(); + let bitmap = reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); let [offset, size] = unpack(fst0.get(b"b").unwrap()); - let bitmap = reader.bitmap(tag0, offset, size).await.unwrap(); + let bitmap = reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000])); let [offset, size] = unpack(fst0.get(b"c").unwrap()); - let bitmap = reader.bitmap(tag0, offset, size).await.unwrap(); + let bitmap = reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); // tag1 @@ -193,16 +208,31 @@ mod tests { assert_eq!(stats1.null_count, 1); assert_eq!(stats1.min_value, Bytes::from("x")); assert_eq!(stats1.max_value, Bytes::from("z")); - let fst1 = reader.fst(tag1).await.unwrap(); + let fst1 = reader + .fst( + tag1.base_offset + tag1.relative_fst_offset as u64, + tag1.fst_size, + ) + .await + .unwrap(); assert_eq!(fst1.len(), 3); let [offset, size] = unpack(fst1.get(b"x").unwrap()); - let bitmap = reader.bitmap(tag1, offset, size).await.unwrap(); + let bitmap = reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); let [offset, size] = unpack(fst1.get(b"y").unwrap()); - let bitmap = reader.bitmap(tag1, offset, size).await.unwrap(); + let bitmap = reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000])); let [offset, size] = unpack(fst1.get(b"z").unwrap()); - let bitmap = reader.bitmap(tag1, offset, size).await.unwrap(); + let bitmap = reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); } } diff --git a/src/index/src/inverted_index/search/fst_values_mapper.rs b/src/index/src/inverted_index/search/fst_values_mapper.rs index d4675e652d18..f4d159ed0c15 100644 --- a/src/index/src/inverted_index/search/fst_values_mapper.rs +++ b/src/index/src/inverted_index/search/fst_values_mapper.rs @@ -48,7 +48,7 @@ impl<'a> FstValuesMapper<'a> { let bm = self .reader - .bitmap(self.metadata, relative_offset, size) + .bitmap(self.metadata.base_offset + relative_offset as u64, size) .await?; // Ensure the longest BitVec is the left operand to prevent truncation during OR. @@ -79,7 +79,7 @@ mod tests { let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_bitmap() - .returning(|_, offset, size| match (offset, size) { + .returning(|offset, size| match (offset, size) { (1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]), (2, 1) => Ok(bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]), _ => unreachable!(), diff --git a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs index 93cdb201c8d5..85928d9183c2 100644 --- a/src/index/src/inverted_index/search/index_apply/predicates_apply.rs +++ b/src/index/src/inverted_index/search/index_apply/predicates_apply.rs @@ -76,7 +76,9 @@ impl IndexApplier for PredicatesIndexApplier { } }; - let fst = reader.fst(meta).await?; + let fst_offset = meta.base_offset + meta.relative_fst_offset as u64; + let fst_size = meta.fst_size; + let fst = reader.fst(fst_offset, fst_size).await?; let values = fst_applier.apply(&fst); let mut mapper = FstValuesMapper::new(&mut *reader, meta); @@ -159,15 +161,16 @@ mod tests { s.to_owned() } - fn mock_metas(tags: impl IntoIterator) -> InvertedIndexMetas { + fn mock_metas(tags: impl IntoIterator) -> InvertedIndexMetas { let mut metas = InvertedIndexMetas { total_row_count: 8, segment_row_count: 1, ..Default::default() }; - for tag in tags.into_iter() { + for (tag, idx) in tags.into_iter() { let meta = InvertedIndexMeta { name: s(tag), + relative_fst_offset: idx, ..Default::default() }; metas.metas.insert(s(tag), meta); @@ -198,19 +201,16 @@ mod tests { let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() - .returning(|| Ok(mock_metas(["tag-0"]))); + .returning(|| Ok(mock_metas([("tag-0", 0)]))); + mock_reader.expect_fst().returning(|_offset, _size| { + Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(2, 1))]).unwrap()) + }); mock_reader - .expect_fst() - .returning(|meta| match meta.name.as_str() { - "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(2, 1))]).unwrap()), + .expect_bitmap() + .returning(|offset, size| match (offset, size) { + (2, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), _ => unreachable!(), }); - mock_reader.expect_bitmap().returning(|meta, offset, size| { - match (meta.name.as_str(), offset, size) { - ("tag-0", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), - _ => unreachable!(), - } - }); let output = applier .apply(SearchContext::default(), &mut mock_reader) .await @@ -224,13 +224,10 @@ mod tests { let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() - .returning(|| Ok(mock_metas(["tag-0"]))); - mock_reader - .expect_fst() - .returning(|meta| match meta.name.as_str() { - "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap()), - _ => unreachable!(), - }); + .returning(|| Ok(mock_metas([("tag-0", 0)]))); + mock_reader.expect_fst().returning(|_offset, _size| { + Ok(FstMap::from_iter([(b"tag-0_value-1", fst_value(2, 1))]).unwrap()) + }); let output = applier .apply(SearchContext::default(), &mut mock_reader) .await @@ -252,21 +249,21 @@ mod tests { let mut mock_reader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() - .returning(|| Ok(mock_metas(["tag-0", "tag-1"]))); + .returning(|| Ok(mock_metas([("tag-0", 0), ("tag-1", 1)]))); mock_reader .expect_fst() - .returning(|meta| match meta.name.as_str() { - "tag-0" => Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(1, 1))]).unwrap()), - "tag-1" => Ok(FstMap::from_iter([(b"tag-1_value-a", fst_value(2, 1))]).unwrap()), + .returning(|offset, _size| match offset { + 0 => Ok(FstMap::from_iter([(b"tag-0_value-0", fst_value(1, 1))]).unwrap()), + 1 => Ok(FstMap::from_iter([(b"tag-1_value-a", fst_value(2, 1))]).unwrap()), _ => unreachable!(), }); - mock_reader.expect_bitmap().returning(|meta, offset, size| { - match (meta.name.as_str(), offset, size) { - ("tag-0", 1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), - ("tag-1", 2, 1) => Ok(bitvec![u8, Lsb0; 1, 1, 0, 1, 1, 0, 1, 1]), + mock_reader + .expect_bitmap() + .returning(|offset, size| match (offset, size) { + (1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1, 0]), + (2, 1) => Ok(bitvec![u8, Lsb0; 1, 1, 0, 1, 1, 0, 1, 1]), _ => unreachable!(), - } - }); + }); let output = applier .apply(SearchContext::default(), &mut mock_reader) @@ -287,7 +284,7 @@ mod tests { let mut mock_reader: MockInvertedIndexReader = MockInvertedIndexReader::new(); mock_reader .expect_metadata() - .returning(|| Ok(mock_metas(["tag-0"]))); + .returning(|| Ok(mock_metas([("tag-0", 0)]))); let output = applier .apply(SearchContext::default(), &mut mock_reader)