Skip to content

Commit

Permalink
fix: don't always include first doc
Browse files Browse the repository at this point in the history
  • Loading branch information
wjones127 committed Sep 30, 2024
1 parent 396b4d1 commit cd16d12
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 2 deletions.
25 changes: 25 additions & 0 deletions python/python/tests/test_scalar_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,31 @@ def test_filter_with_fts_index(dataset):
assert query == row.as_py()


def test_indexed_filter_with_fts_index(tmp_path):
data = pa.table(
{
"text": [
"Frodo was a puppy",
"There were several kittens playing",
"Frodo was a happy puppy",
"Frodo was a very happy puppy",
],
"sentiment": ["neutral", "neutral", "positive", "positive"],
}
)
ds = lance.write_dataset(data, tmp_path, mode="overwrite")
ds.create_scalar_index("text", "INVERTED")
ds.create_scalar_index("sentiment", "BITMAP")

results = ds.to_table(
full_text_query="puppy",
filter="sentiment='positive'",
prefilter=True,
with_row_id=True,
)
assert results["_rowid"].to_pylist() == [2, 3]


def test_fts_with_postfilter(tmp_path):
tab = pa.table({"text": ["Frodo the puppy"] * 100, "id": range(100)})
dataset = lance.write_dataset(tab, tmp_path)
Expand Down
2 changes: 1 addition & 1 deletion rust/lance-index/src/scalar/inverted/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ impl InvertedIndex {
position as i32,
posting,
self.docs.len(),
mask.clone(),
mask,
))
})
// Use compute count since data hopefully cached
Expand Down
10 changes: 9 additions & 1 deletion rust/lance-index/src/scalar/inverted/wand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,19 @@ impl PostingIterator {
Some(max_score) => max_score,
None => idf(list.len(), num_doc) * (K1 + 1.0),
};

// move the iterator to the first selected document. This is important
// because caller might directly call `doc()` without calling `next()`.
let mut index = 0;
while index < list.len() && !mask.selected(list.row_id(index)) {
index += 1;
}

Self {
token_id,
position,
list,
index: 0,
index,
mask,
approximate_upper_bound,
}
Expand Down

0 comments on commit cd16d12

Please sign in to comment.