Skip to content

Commit

Permalink
fix(Perf)
Browse files Browse the repository at this point in the history
Signed-off-by: Simon Paitrault <contact@freyskeyd.fr>
  • Loading branch information
Simon Paitrault committed Dec 2, 2015
1 parent 250c189 commit fac78e2
Show file tree
Hide file tree
Showing 6 changed files with 10,071 additions and 0 deletions.
37 changes: 37 additions & 0 deletions benches/distance.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#![feature(test)]

extern crate nlp;
use nlp::distance::*;

use std::io::prelude::*;
use std::fs::File;
use std::path::Path;
extern crate test;

use test::Bencher;

#[bench]
fn bench_levenshtein(b: &mut Bencher) {
b.iter(|| levenshtein("rubert", "rupert"));
}

// #[bench]
// fn bench_levenshtein_fast(b: &mut Bencher) {
// b.iter(|| levenshtein_fast("rubert", "rupert"));
// }

#[bench]
fn bench_levenshtein_fast_10K(b: &mut Bencher) {

let path = Path::new("words2.txt");
let mut s = String::new();
let mut file = File::open(&path).unwrap();
file.read_to_string(&mut s).unwrap();

let wbyl: Vec<String> = s.lines().map(|s| s.to_owned()).collect();
b.iter(|| {
for i in &wbyl {
levenshtein("rubert", &i);
}
})
}
4 changes: 4 additions & 0 deletions src/distance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,19 +127,23 @@ pub fn levenshtein(a: &str, b: &str) -> usize {
curr_distances.push(0);
}

println!("================================== {} {}", a, b);
for (i, a_char) in a.chars().enumerate() {
curr_distances[0] = i + 1;

for (j, b_char) in b.chars().enumerate() {

let cost = if a_char == b_char {
0
} else {
1
};
curr_distances[j + 1] = min(curr_distances[j] + 1,
min(prev_distances[j + 1] + 1, prev_distances[j] + cost));
println!("{:?}", curr_distances);
}

println!("prev become: {:?}", prev_distances);
prev_distances.clone_from(&curr_distances);
}

Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,7 @@
/// Distance module (Levenshtein, Jaro, Jaro-winkler)
pub mod distance;

// pub mod tokenize;

/// Phonetics module (Soundex)
pub mod phonetics;
21 changes: 21 additions & 0 deletions src/tokenize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// use core::str::CharEq;

// fn tokenize(text: &str) -> Vec<&str> {
// text.split(Splitter).filter(|s| s.len() > 0).collect()
// }

// struct Splitter;

// impl CharEq for Splitter {
// fn matches(&mut self, c: char) -> bool {
// match c {
// ' ' | ',' | '.' | '!' | '?' | ';' | '\'' | '"'
// | ':' | '\t' | '\n' | '(' | ')' | '-' => true,
// _ => false
// }
// }

// fn only_ascii(&self) -> bool {
// true
// }
// }
7 changes: 7 additions & 0 deletions tests/tokenize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
extern crate nlp;
use nlp::tokenize::tokenize;

#[test]
fn test_tokenize() {
assert_eq!(tokenize("hello, world!"), vec!["hello", "world"]);
}
Loading

0 comments on commit fac78e2

Please sign in to comment.