Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collapse overlapped highlighted ranges #1473

Merged
merged 7 commits into from
Aug 26, 2022
Merged
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 82 additions & 3 deletions src/snippet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl Snippet {
let mut html = String::new();
let mut start_from: usize = 0;

for item in self.highlighted.iter() {
for item in collapse_overlapped_ranges(&self.highlighted) {
html.push_str(&encode_minimal(&self.fragment[start_from..item.start]));
html.push_str(HIGHLIGHTEN_PREFIX);
html.push_str(&encode_minimal(&self.fragment[item.clone()]));
Expand Down Expand Up @@ -186,6 +186,53 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str)
}
}

/// Returns ranges that are collapsed into non-overlapped ranges.
Copy link
Collaborator

@fulmicoton fulmicoton Aug 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add your example in the comment.

///
/// ## Examples
/// - [0..1, 2..3] -> [0..1, 2..3] # no overlap
/// - [0..1, 1..2] -> [0..1, 1..2] # no overlap
/// - [0..2, 1..2] -> [0..2] # collapsed
/// - [0..2, 1..3] -> [0..3] # collapsed
/// - [0..3, 1..2] -> [0..3] # second range's end is also inside of the first range
///
/// Note: This function assumes `ranges` is sorted by `Range.start` in ascending order.
fn collapse_overlapped_ranges(ranges: &[Range<usize>]) -> Vec<Range<usize>> {
debug_assert!(is_sorted(ranges.iter().map(|range| range.start)));

let mut result = Vec::new();
fulmicoton marked this conversation as resolved.
Show resolved Hide resolved
let mut ranges_it = ranges.iter();

let mut current = match ranges_it.next() {
Some(range) => range.clone(),
None => return result,
};

for range in ranges {
if current.end > range.start {
current = current.start..std::cmp::max(current.end, range.end);
} else {
result.push(current);
current = range.clone();
}
}

result.push(current);
result
}

fn is_sorted(mut it: impl Iterator<Item=usize>) -> bool {
if let Some(first) = it.next() {
let mut prev = first;
for item in it {
if item < prev {
return false;
}
prev = item;
}
}
true
}

/// `SnippetGenerator`
///
/// # Example
Expand Down Expand Up @@ -320,10 +367,10 @@ mod tests {

use maplit::btreemap;

use super::{search_fragments, select_best_fragment_combination};
use super::{search_fragments, select_best_fragment_combination, collapse_overlapped_ranges};
use crate::query::QueryParser;
use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions, TEXT};
use crate::tokenizer::SimpleTokenizer;
use crate::tokenizer::{SimpleTokenizer, NgramTokenizer};
use crate::{Index, SnippetGenerator};

const TEST_TEXT: &str = r#"Rust is a systems programming language sponsored by
Expand Down Expand Up @@ -588,4 +635,36 @@ Survey in 2016, 2017, and 2018."#;
}
Ok(())
}

#[test]
fn test_collapse_overlapped_ranges() {
assert_eq!(collapse_overlapped_ranges(&vec![0..1, 2..3, ]), vec![0..1, 2..3]);
assert_eq!(collapse_overlapped_ranges(&vec![0..1, 1..2, ]), vec![0..1, 1..2]);
assert_eq!(collapse_overlapped_ranges(&vec![0..2, 1..2, ]), vec![0..2]);
assert_eq!(collapse_overlapped_ranges(&vec![0..2, 1..3, ]), vec![0..3]);
assert_eq!(collapse_overlapped_ranges(&vec![0..3, 1..2, ]), vec![0..3]);
}

#[test]
fn test_snippet_with_overlapped_highlighted_ranges() {
let text = "abc";

let mut terms = BTreeMap::new();
terms.insert(String::from("ab"), 0.9);
terms.insert(String::from("bc"), 1.0);

let fragments = search_fragments(&From::from(NgramTokenizer::all_ngrams(2, 2)), text, &terms, 3);

assert_eq!(fragments.len(), 1);
{
let first = &fragments[0];
assert_eq!(first.score, 1.9);
assert_eq!(first.start_offset, 0);
assert_eq!(first.stop_offset, 3);
}

let snippet = select_best_fragment_combination(&fragments[..], text);
assert_eq!(snippet.fragment, "abc");
assert_eq!(snippet.to_html(), "<b>abc</b>");
}
}