-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(DoubleMetaphone): add double metaphone
Signed-off-by: Simon Paitrault <contact@freyskeyd.fr>
- Loading branch information
Simon Paitrault
committed
Dec 7, 2015
1 parent
8f29f73
commit e5bd269
Showing
11 changed files
with
2,101 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// Conditions | ||
use phonetics::utils::metaphone_utils::{is_vowel, contains, contains_at_least_one, char_at, char_at_match_after}; | ||
|
||
static L_R_N_M_B_H_F_V_W_SPACE: &'static [&'static str] = &["L", "R", "N", "M", "B", "H", "F", "V", "W", "SPACE"]; | ||
|
||
pub fn condition_c0(value: &String, index: usize) -> bool { | ||
if contains(&value, index, 4, "CHIA") { | ||
true | ||
} else if index <= 1 { | ||
false | ||
} else if is_vowel(&value, index - 2) || | ||
!contains(&value, index -1, 3, "ACH") { | ||
false | ||
} else { | ||
(!char_at_match_after(&value, index, 2, 'I') && !char_at_match_after(&value, index, 2, 'E')) || contains_at_least_one(&value, index - 2, 6, &["BACHER", "MACHER"]) | ||
} | ||
} | ||
|
||
pub fn condition_ch0(value: &String, index: usize) -> bool { | ||
if index != 0 { | ||
false | ||
} else if | ||
!contains_at_least_one(&value, index + 1, 5, &["HARAC", "HARIS"]) && | ||
!contains_at_least_one(&value, index + 1, 3, &["HOR", "HYM", "HIA", "HEM"]) { | ||
false | ||
} else if contains(&value, 0, 5, "CHORE") { | ||
false | ||
} else { | ||
true | ||
} | ||
} | ||
|
||
pub fn condition_ch1(value: &String, index: usize) -> bool { | ||
let a = contains_at_least_one(&value, 0, 4, &["VAN", "VON"]); | ||
let b = index > 1 && contains_at_least_one(&value, index - 2, 6, &["ORCHES", "ARCHIT", "ORCHID"]); | ||
let c = contains_at_least_one(&value, index + 2, 1, &["T", "S"]); | ||
let d = index > 0 && contains_at_least_one(&value, index - 1, 1, &["A", "O", "U", "E"]) || index == 0; | ||
let e = contains_at_least_one(&value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.len() - 1; | ||
|
||
a || b || c || (d && e) | ||
} | ||
|
||
pub fn condition_m0(value: &String, index: usize) -> bool { | ||
let current_char = match char_at(&value, index + 1) { | ||
Some(v) => v, | ||
_ => '_' | ||
}; | ||
|
||
if current_char == 'M' { | ||
true | ||
} else { | ||
index > 0 && contains(&value, index - 1, 3, "UMB") && (index + 1 == value.len() -1 || contains(&value, index + 2, 2, "ER")) | ||
} | ||
} | ||
|
||
pub fn condition_l0(value: &String, index: usize) -> bool { | ||
if index == value.len() - 3 && contains_at_least_one(&value, index - 1, 4, &["ILLO", "ILLA", "ALLE"]) { | ||
true | ||
} else if (contains_at_least_one(&value, value.len() - 2, 2, &["AS", "OS"]) || contains_at_least_one(&value, value.len() - 1, 1, &["A", "O"])) && contains(&value, index - 1, 4, "ALLE") { | ||
true | ||
} else { | ||
false | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
use phonetics::utils::metaphone_utils::char_at; | ||
use phonetics::metaphone::handler::*; | ||
use phonetics::metaphone::double_metaphone_result::DoubleMetaphoneResult; | ||
|
||
static SILENT_START: &'static [&'static str] = &["GN", "KN", "PN", "WR", "PS"]; | ||
// static ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER: &'static Vec<&str> = ["ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER"]; | ||
// static L_T_K_S_N_M_B_Z: &'static Vec<&str> = ["L", "T", "K", "S", "N", "M", "B", "Z"]; | ||
|
||
const MAX_LEN: i32 = 4; | ||
|
||
/// Double metaphone with alternate | ||
pub fn double_metaphone(value: &str) -> Option<DoubleMetaphoneResult> { | ||
let value:String = match clean_input(&value) { | ||
Some(v) => v, | ||
None => return None | ||
}; | ||
|
||
let slavo_germanic: bool = is_slavo_germanic(&value); | ||
let mut index: usize = if is_silent_start(&value) { | ||
1 | ||
} else { | ||
0 | ||
}; | ||
|
||
let mut result = DoubleMetaphoneResult::new(MAX_LEN); | ||
|
||
while !result.is_complete() && index <= value.len() - 1 { | ||
|
||
let current_char = match char_at(&value, index) { | ||
Some(v) => v, | ||
None => {break;} | ||
}; | ||
|
||
index = match current_char { | ||
'A'| 'E'| 'I'| 'O'| 'U'| 'Y' => handle_aeiouy(&mut result, index), | ||
'B' => handle_b(&mut result, &value, index), | ||
'Ç' => handle_c_cedilla(&mut result, index), | ||
'C' => handle_c(&mut result, &value, index), | ||
'D' => handle_d(&mut result, &value, index), | ||
'F' => handle_f(&mut result, &value, index), | ||
'G' => handle_g(&mut result, &value, index, slavo_germanic), | ||
'H' => handle_h(&mut result, &value, index), | ||
'J' => handle_j(&mut result, &value, index, slavo_germanic), | ||
'K' => handle_k(&mut result, &value, index), | ||
'L' => handle_l(&mut result, &value, index), | ||
'M' => handle_m(&mut result, &value, index), | ||
'N' => handle_n(&mut result, &value, index), | ||
'P' => handle_p(&mut result, &value, index), | ||
'Q' => handle_q(&mut result, &value, index), | ||
'R' => handle_r(&mut result, &value, index, slavo_germanic), | ||
'S' => handle_s(&mut result, &value, index, slavo_germanic), | ||
'T' => handle_t(&mut result, &value, index), | ||
'V' => handle_v(&mut result, &value, index), | ||
'W' => handle_w(&mut result, &value, index), | ||
'X' => handle_x(&mut result, &value, index), | ||
'Z' => handle_z(&mut result, &value, index, slavo_germanic), | ||
'Ñ'|_ => index + 1 | ||
} | ||
} | ||
|
||
result.cleanup(); | ||
|
||
Some(result) | ||
} | ||
|
||
// Handler | ||
// Private methods | ||
fn handle_aeiouy(result: &mut DoubleMetaphoneResult, index: usize) -> usize { | ||
if index == 0 { | ||
result.append('A'); | ||
} | ||
|
||
index + 1 | ||
} | ||
|
||
fn is_slavo_germanic(value: &str) -> bool { | ||
value.contains("W") || value.contains("K") || value.contains("CZ") || value.contains("WITZ") | ||
} | ||
|
||
fn is_silent_start(value: &str) -> bool { | ||
SILENT_START | ||
.iter() | ||
.map(|silent: &&str| value.starts_with(silent)) | ||
.fold(false, |acc, x| acc || x) | ||
} | ||
|
||
fn clean_input(value: &str) -> Option<String> { | ||
if value.len() == 0 { | ||
return None; | ||
} | ||
|
||
let value = value.trim(); | ||
|
||
if value.len() == 0 { | ||
return None; | ||
} | ||
|
||
Some(value.to_uppercase()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/// Metaphone Double result | ||
#[derive(Debug, PartialEq)] | ||
pub struct DoubleMetaphoneResult { | ||
/// Primary metaphone | ||
pub primary: String, | ||
/// Alternate metaphone | ||
pub alternate: String, | ||
max_length: usize | ||
} | ||
|
||
impl DoubleMetaphoneResult { | ||
pub fn new(length: i32) -> DoubleMetaphoneResult { | ||
DoubleMetaphoneResult { primary: String::with_capacity(length as usize), alternate: String::with_capacity(length as usize), max_length: length as usize } | ||
} | ||
|
||
pub fn is_complete(&mut self) -> bool { | ||
self.primary.len() >= self.max_length && self.alternate.len() >= self.max_length | ||
} | ||
|
||
pub fn append_primary(&mut self, letter: char) { | ||
self.primary.push(letter); | ||
} | ||
|
||
pub fn append_alternate(&mut self, letter: char) { | ||
self.alternate.push(letter); | ||
} | ||
|
||
pub fn append(&mut self, letter: char) { | ||
self.primary.push(letter); | ||
self.alternate.push(letter); | ||
} | ||
|
||
pub fn cleanup(&mut self) { | ||
if self.primary.len() > self.max_length { | ||
self.primary.truncate(self.max_length); | ||
} | ||
if self.alternate.len() > self.max_length { | ||
self.alternate.truncate(self.max_length); | ||
} | ||
} | ||
} |
Oops, something went wrong.