Skip to content

Commit

Permalink
Add subscorer for Rust extension
Browse files Browse the repository at this point in the history
Follow up of #159
  • Loading branch information
liuchengxu committed Dec 21, 2019
1 parent 73bdb3c commit cbfbb23
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 2 deletions.
64 changes: 62 additions & 2 deletions pythonx/clap/fuzzymatch-rs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,58 @@
#![feature(pattern)]

use pyo3::prelude::*;
use pyo3::wrap_pyfunction;
use rff::match_and_score_with_positions;

use std::str::pattern::Pattern;

#[inline]
fn find_start_at<'a, P: Pattern<'a>>(slice: &'a str, at: usize, pat: P) -> Option<usize> {
slice[at..].find(pat).map(|i| at + i)
}

fn substr_scorer(niddle: &str, haystack: &str) -> Option<(f64, Vec<usize>)> {
let niddle = niddle.to_lowercase();
let haystack = haystack.to_lowercase();
let indices: Vec<usize> = (0..haystack.len()).collect();
let haystack = haystack.as_str();

let mut offset = 0;
let mut positions = Vec::new();
for sub_niddle in niddle.split_whitespace() {
match find_start_at(haystack, offset, sub_niddle) {
Some(idx) => {
offset = idx;
let niddle_len = sub_niddle.len();
positions.extend_from_slice(&indices[offset..offset + niddle_len]);
offset += niddle_len;
}
None => return None,
}
}

if positions.is_empty() {
return Some((0f64, positions));
}

let last_pos = positions.last().unwrap();
let match_len = (last_pos + 1 - positions[0]) as f64;

Some((
((2f64 / (positions[0] + 1) as f64) + 1f64 / (last_pos + 1) as f64 - match_len),
positions,
))
}

#[pyfunction]
/// Filter the candidates given query using the fzy algorithm
fn fuzzy_match(query: &str, candidates: Vec<String>) -> PyResult<(Vec<Vec<usize>>, Vec<String>)> {
let scorer = |line: &str| {
match_and_score_with_positions(query, line).map(|(_, score, indices)| (score, indices))
let scorer: Box<dyn Fn(&str) -> Option<(f64, Vec<usize>)>> = if query.contains(" ") {
Box::new(|line: &str| substr_scorer(query, line))
} else {
Box::new(|line: &str| {
match_and_score_with_positions(query, line).map(|(_, score, indices)| (score, indices))
})
};

let mut ranked = candidates
Expand All @@ -33,3 +79,17 @@ fn fuzzymatch_rs(_py: Python, m: &PyModule) -> PyResult<()> {

Ok(())
}

#[test]
fn substr_scorer_should_work() {
let niddle = "su ork";
let haystack = "substr_scorer_should_work";
println!("{:?}", substr_scorer(niddle, haystack));
let niddle = "su ou";
let haystack = "substr_scorer_should_work";
println!("{:?}", substr_scorer(niddle, haystack));

let niddle = "su ou";
let haystack = "substr_scorer_should_work";
println!("{:?}", fuzzy_match(niddle, vec![haystack.into()]));
}
9 changes: 9 additions & 0 deletions pythonx/clap/scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,12 @@ def substr_scorer(niddle, haystack):
match_len = positions[-1] + 1 - positions[0]
return -match_len + 2 / (positions[0] + 1) + 1 / (
positions[-1] + 1), positions


if __name__ == '__main__':
niddle = "su ork"
haystack = "substr_scorer_should_work"
print(substr_scorer(niddle, haystack))
niddle = "su ou"
haystack = "substr_scorer_should_work"
print(substr_scorer(niddle, haystack))

0 comments on commit cbfbb23

Please sign in to comment.