Skip to content

Commit

Permalink
feat(DoubleMetaphone): add double metaphone
Browse files Browse the repository at this point in the history
Signed-off-by: Simon Paitrault <contact@freyskeyd.fr>
  • Loading branch information
Simon Paitrault committed Dec 7, 2015
1 parent 8f29f73 commit e5bd269
Show file tree
Hide file tree
Showing 11 changed files with 2,101 additions and 10 deletions.
64 changes: 64 additions & 0 deletions src/phonetics/metaphone/conditions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Conditions
use phonetics::utils::metaphone_utils::{is_vowel, contains, contains_at_least_one, char_at, char_at_match_after};

static L_R_N_M_B_H_F_V_W_SPACE: &'static [&'static str] = &["L", "R", "N", "M", "B", "H", "F", "V", "W", "SPACE"];

pub fn condition_c0(value: &String, index: usize) -> bool {
if contains(&value, index, 4, "CHIA") {
true
} else if index <= 1 {
false
} else if is_vowel(&value, index - 2) ||
!contains(&value, index -1, 3, "ACH") {
false
} else {
(!char_at_match_after(&value, index, 2, 'I') && !char_at_match_after(&value, index, 2, 'E')) || contains_at_least_one(&value, index - 2, 6, &["BACHER", "MACHER"])
}
}

pub fn condition_ch0(value: &String, index: usize) -> bool {
if index != 0 {
false
} else if
!contains_at_least_one(&value, index + 1, 5, &["HARAC", "HARIS"]) &&
!contains_at_least_one(&value, index + 1, 3, &["HOR", "HYM", "HIA", "HEM"]) {
false
} else if contains(&value, 0, 5, "CHORE") {
false
} else {
true
}
}

pub fn condition_ch1(value: &String, index: usize) -> bool {
let a = contains_at_least_one(&value, 0, 4, &["VAN", "VON"]);
let b = index > 1 && contains_at_least_one(&value, index - 2, 6, &["ORCHES", "ARCHIT", "ORCHID"]);
let c = contains_at_least_one(&value, index + 2, 1, &["T", "S"]);
let d = index > 0 && contains_at_least_one(&value, index - 1, 1, &["A", "O", "U", "E"]) || index == 0;
let e = contains_at_least_one(&value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.len() - 1;

a || b || c || (d && e)
}

pub fn condition_m0(value: &String, index: usize) -> bool {
let current_char = match char_at(&value, index + 1) {
Some(v) => v,
_ => '_'
};

if current_char == 'M' {
true
} else {
index > 0 && contains(&value, index - 1, 3, "UMB") && (index + 1 == value.len() -1 || contains(&value, index + 2, 2, "ER"))
}
}

pub fn condition_l0(value: &String, index: usize) -> bool {
if index == value.len() - 3 && contains_at_least_one(&value, index - 1, 4, &["ILLO", "ILLA", "ALLE"]) {
true
} else if (contains_at_least_one(&value, value.len() - 2, 2, &["AS", "OS"]) || contains_at_least_one(&value, value.len() - 1, 1, &["A", "O"])) && contains(&value, index - 1, 4, "ALLE") {
true
} else {
false
}
}
99 changes: 99 additions & 0 deletions src/phonetics/metaphone/double_metaphone.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use phonetics::utils::metaphone_utils::char_at;
use phonetics::metaphone::handler::*;
use phonetics::metaphone::double_metaphone_result::DoubleMetaphoneResult;

static SILENT_START: &'static [&'static str] = &["GN", "KN", "PN", "WR", "PS"];
// static ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER: &'static Vec<&str> = ["ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER"];
// static L_T_K_S_N_M_B_Z: &'static Vec<&str> = ["L", "T", "K", "S", "N", "M", "B", "Z"];

const MAX_LEN: i32 = 4;

/// Double metaphone with alternate
pub fn double_metaphone(value: &str) -> Option<DoubleMetaphoneResult> {
let value:String = match clean_input(&value) {
Some(v) => v,
None => return None
};

let slavo_germanic: bool = is_slavo_germanic(&value);
let mut index: usize = if is_silent_start(&value) {
1
} else {
0
};

let mut result = DoubleMetaphoneResult::new(MAX_LEN);

while !result.is_complete() && index <= value.len() - 1 {

let current_char = match char_at(&value, index) {
Some(v) => v,
None => {break;}
};

index = match current_char {
'A'| 'E'| 'I'| 'O'| 'U'| 'Y' => handle_aeiouy(&mut result, index),
'B' => handle_b(&mut result, &value, index),
'Ç' => handle_c_cedilla(&mut result, index),
'C' => handle_c(&mut result, &value, index),
'D' => handle_d(&mut result, &value, index),
'F' => handle_f(&mut result, &value, index),
'G' => handle_g(&mut result, &value, index, slavo_germanic),
'H' => handle_h(&mut result, &value, index),
'J' => handle_j(&mut result, &value, index, slavo_germanic),
'K' => handle_k(&mut result, &value, index),
'L' => handle_l(&mut result, &value, index),
'M' => handle_m(&mut result, &value, index),
'N' => handle_n(&mut result, &value, index),
'P' => handle_p(&mut result, &value, index),
'Q' => handle_q(&mut result, &value, index),
'R' => handle_r(&mut result, &value, index, slavo_germanic),
'S' => handle_s(&mut result, &value, index, slavo_germanic),
'T' => handle_t(&mut result, &value, index),
'V' => handle_v(&mut result, &value, index),
'W' => handle_w(&mut result, &value, index),
'X' => handle_x(&mut result, &value, index),
'Z' => handle_z(&mut result, &value, index, slavo_germanic),
'Ñ'|_ => index + 1
}
}

result.cleanup();

Some(result)
}

// Handler
// Private methods
fn handle_aeiouy(result: &mut DoubleMetaphoneResult, index: usize) -> usize {
if index == 0 {
result.append('A');
}

index + 1
}

fn is_slavo_germanic(value: &str) -> bool {
value.contains("W") || value.contains("K") || value.contains("CZ") || value.contains("WITZ")
}

fn is_silent_start(value: &str) -> bool {
SILENT_START
.iter()
.map(|silent: &&str| value.starts_with(silent))
.fold(false, |acc, x| acc || x)
}

fn clean_input(value: &str) -> Option<String> {
if value.len() == 0 {
return None;
}

let value = value.trim();

if value.len() == 0 {
return None;
}

Some(value.to_uppercase())
}
41 changes: 41 additions & 0 deletions src/phonetics/metaphone/double_metaphone_result.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/// Metaphone Double result
#[derive(Debug, PartialEq)]
pub struct DoubleMetaphoneResult {
/// Primary metaphone
pub primary: String,
/// Alternate metaphone
pub alternate: String,
max_length: usize
}

impl DoubleMetaphoneResult {
pub fn new(length: i32) -> DoubleMetaphoneResult {
DoubleMetaphoneResult { primary: String::with_capacity(length as usize), alternate: String::with_capacity(length as usize), max_length: length as usize }
}

pub fn is_complete(&mut self) -> bool {
self.primary.len() >= self.max_length && self.alternate.len() >= self.max_length
}

pub fn append_primary(&mut self, letter: char) {
self.primary.push(letter);
}

pub fn append_alternate(&mut self, letter: char) {
self.alternate.push(letter);
}

pub fn append(&mut self, letter: char) {
self.primary.push(letter);
self.alternate.push(letter);
}

pub fn cleanup(&mut self) {
if self.primary.len() > self.max_length {
self.primary.truncate(self.max_length);
}
if self.alternate.len() > self.max_length {
self.alternate.truncate(self.max_length);
}
}
}
Loading

0 comments on commit e5bd269

Please sign in to comment.