From d3aba9efa913f9c1bd20b4cf4452e2dde8b31da8 Mon Sep 17 00:00:00 2001 From: striderDM <51991544+StriderDM@users.noreply.github.com> Date: Thu, 18 Nov 2021 17:06:53 +0200 Subject: [PATCH] feat: language detection for mnemonic seed words Implementation of a basic language detection algorithm for mnemonic words Review comments Added tests Simplification --- base_layer/key_manager/src/mnemonic.rs | 99 +++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 11 deletions(-) diff --git a/base_layer/key_manager/src/mnemonic.rs b/base_layer/key_manager/src/mnemonic.rs index 4226c8c250..6e7e6ef8f3 100644 --- a/base_layer/key_manager/src/mnemonic.rs +++ b/base_layer/key_manager/src/mnemonic.rs @@ -25,7 +25,7 @@ use crate::{ error::{KeyManagerError, MnemonicError}, mnemonic_wordlists::*, }; -use std::slice::Iter; +use std::{cmp::Ordering, slice::Iter}; use strum_macros::{Display, EnumString}; use tari_crypto::tari_utilities::bit::*; @@ -33,7 +33,7 @@ use tari_crypto::tari_utilities::bit::*; /// It can autodetect the language of the Mnemonic word sequence // TODO: Develop a language autodetection mechanism to distinguish between ChineseTraditional and ChineseSimplified -#[derive(Clone, Debug, PartialEq, EnumString, Display)] +#[derive(Clone, Debug, PartialEq, EnumString, Display, Copy)] pub enum MnemonicLanguage { ChineseSimplified, English, @@ -47,12 +47,8 @@ pub enum MnemonicLanguage { impl MnemonicLanguage { /// Detects the mnemonic language of a specific word by searching all defined mnemonic word lists pub fn from(mnemonic_word: &str) -> Result { - for language in MnemonicLanguage::iterator() { - if find_mnemonic_index_from_word(mnemonic_word, language).is_ok() { - return Ok((*language).clone()); - } - } - Err(MnemonicError::UnknownLanguage) + let words = vec![mnemonic_word.to_string()]; + detect_language(&words) } /// Returns an iterator for the MnemonicLanguage enum group to allow iteration over all defined languages @@ -158,11 +154,54 @@ pub fn from_bytes(bytes: Vec, language: &MnemonicLanguage) -> Result Result { + let count = words.iter().len(); + match count.cmp(&1) { + Ordering::Less => { + return Err(MnemonicError::UnknownLanguage); + }, + Ordering::Equal => { + let word = words.get(0).ok_or(MnemonicError::EncodeInvalidLength)?; + for language in MnemonicLanguage::iterator() { + if find_mnemonic_index_from_word(word, language).is_ok() { + return Ok(*language); + } + } + return Err(MnemonicError::UnknownLanguage); + }, + Ordering::Greater => { + for word in words { + let mut languages = Vec::with_capacity(MnemonicLanguage::iterator().len()); + // detect all languages in which a word falls into + for language in MnemonicLanguage::iterator() { + if find_mnemonic_index_from_word(word, language).is_ok() { + languages.push(*language); + } + } + // check if at least one of the languages is consistent for all other words against languages yielded + // from the initial word for this iteration + for language in languages { + let mut consistent = true; + for compare in words { + if compare != word && find_mnemonic_index_from_word(compare, &language).is_err() { + consistent = false; + } + } + if consistent { + return Ok(language); + } + } + } + }, + } + + Err(MnemonicError::UnknownLanguage) +} + /// Generates a vector of bytes that represent the provided mnemonic sequence of words, the language of the mnemonic -/// sequence is autodetected +/// sequence is detected pub fn to_bytes(mnemonic_seq: &[String]) -> Result, MnemonicError> { - let first_word = mnemonic_seq.get(0).ok_or(MnemonicError::EncodeInvalidLength)?; - let language = MnemonicLanguage::from(first_word)?; // Autodetect language + let language = self::detect_language(mnemonic_seq)?; to_bytes_with_language(mnemonic_seq, &language) } @@ -289,6 +328,44 @@ mod test { assert!(MnemonicLanguage::from(&"おかあさん".to_string()).is_err()); // Invalid Mnemonic Japanese word assert!(MnemonicLanguage::from(&"답정너".to_string()).is_err()); // Invalid Mnemonic Korean word assert!(MnemonicLanguage::from(&"desvelado".to_string()).is_err()); // Invalid Mnemonic Spanish word + + // English/Spanish + English/French -> English + let words1 = vec![ + "album".to_string(), + "area".to_string(), + "opera".to_string(), + "abandon".to_string(), + ]; + assert_eq!(detect_language(&words1), Ok(MnemonicLanguage::English)); + + // English/Spanish + English/French + Italian/Spanish + let words2 = vec![ + "album".to_string(), + "area".to_string(), + "opera".to_string(), + "abandon".to_string(), + "tipico".to_string(), + ]; + assert_eq!(detect_language(&words2).is_err(), true); + + // bounds check (last word is invalid) + let words3 = vec![ + "album".to_string(), + "area".to_string(), + "opera".to_string(), + "abandon".to_string(), + "topazio".to_string(), + ]; + assert_eq!(detect_language(&words3).is_err(), true); + + // building up a word list: English/French + French -> French + let mut words = Vec::with_capacity(3); + words.push("concert".to_string()); + assert_eq!(detect_language(&words), Ok(MnemonicLanguage::English)); + words.push("abandon".to_string()); + assert_eq!(detect_language(&words), Ok(MnemonicLanguage::English)); + words.push("barbier".to_string()); + assert_eq!(detect_language(&words), Ok(MnemonicLanguage::French)); } #[test]