From 887d822497204f900ecb7cb2fe671bbac0f90106 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 5 Apr 2023 09:35:57 +0200 Subject: [PATCH 1/4] fix: renaming AA3_TO_AA1_LUT to AA3_TO_AA1 --- src/sequences.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/sequences.rs b/src/sequences.rs index 6d35a3d..dbb663d 100644 --- a/src/sequences.rs +++ b/src/sequences.rs @@ -117,7 +117,7 @@ fn dna3_to_2bit(c: &[u8]) -> Option { } lazy_static::lazy_static! { - static ref AA3_TO_AA1_LUT_VEC: Vec<(&'static str, &'static str)> = vec![ + static ref AA3_TO_AA1_VEC: Vec<(&'static str, &'static str)> = vec![ ("Ala", "A"), ("Arg", "R"), ("Asn", "N"), @@ -507,17 +507,17 @@ lazy_static::lazy_static! { ("YTR", "L"), ]; - static ref AA1_TO_AA3_LUT: FxHashMap<&'static [u8], &'static str> = { + static ref AA1_TO_AA3: FxHashMap<&'static [u8], &'static str> = { let mut m = FxHashMap::default(); - for (aa3, aa1) in AA3_TO_AA1_LUT_VEC.iter() { + for (aa3, aa1) in AA3_TO_AA1_VEC.iter() { m.insert(aa1.as_bytes(), *aa3); } m }; - static ref AA3_TO_AA1_LUT: FxHashMap<&'static [u8], &'static str> = { + static ref AA3_TO_AA1: FxHashMap<&'static [u8], &'static str> = { let mut m = FxHashMap::default(); - for (aa3, aa1) in AA3_TO_AA1_LUT_VEC.iter() { + for (aa3, aa1) in AA3_TO_AA1_VEC.iter() { m.insert(aa3.as_bytes(), *aa1); } m @@ -633,7 +633,7 @@ pub fn aa1_to_aa3(seq: &str) -> Result { let mut result = String::with_capacity(seq.len() * 3); for (i, aa1) in seq.as_bytes().chunks(1).enumerate() { - let aa3 = AA1_TO_AA3_LUT.get(aa1).ok_or_else(|| { + let aa3 = AA1_TO_AA3.get(aa1).ok_or_else(|| { anyhow::anyhow!("Invalid 1-letter amino acid: {:?} at {}", aa1, i + 1) })?; result.push_str(aa3); @@ -664,7 +664,7 @@ pub fn aa3_to_aa1(seq: &str) -> Result { let mut result = String::with_capacity(seq.len() / 3); for (i, aa3) in seq.as_bytes().chunks(3).enumerate() { - let aa1 = AA3_TO_AA1_LUT.get(aa3).ok_or(anyhow::anyhow!( + let aa1 = AA3_TO_AA1.get(aa3).ok_or(anyhow::anyhow!( "Invalid 3-letter amino acid: {:?} at {}", &aa3, i + 1 @@ -1086,7 +1086,7 @@ mod test { fn exercise_lazy_ds() { assert!(DNA_ASCII_MAP[0] == b'\0'); assert!(DNA_ASCII_TO_2BIT[b'A' as usize] == 0); - assert!(AA3_TO_AA1_LUT_VEC[0] == ("Ala", "A")); + assert!(AA3_TO_AA1_VEC[0] == ("Ala", "A")); assert!(DNA_TO_AA1_LUT_VEC[0] == ("AAA", "K")); assert!(DNA_TO_AA1_SEC_VEC[0] == ("AAA", "K")); } From 0691c67068d310b8d3d84cf44972d63ba6914531 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 5 Apr 2023 09:37:36 +0200 Subject: [PATCH 2/4] cosmetic change to docs --- src/data/mod.rs | 3 ++- src/mapper/mod.rs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/data/mod.rs b/src/data/mod.rs index a79aa06..586ccad 100644 --- a/src/data/mod.rs +++ b/src/data/mod.rs @@ -1,5 +1,6 @@ +//! Datatypes, interfaces, and data acess. + pub mod cdot; -///! Datatypes, interfaces, and data acess. pub mod interface; pub mod uta; pub mod uta_sr; diff --git a/src/mapper/mod.rs b/src/mapper/mod.rs index 51590e0..6af99e5 100644 --- a/src/mapper/mod.rs +++ b/src/mapper/mod.rs @@ -1,3 +1,5 @@ +//! Code supporting mapping between coordinate systems. + pub mod alignment; pub(crate) mod altseq; pub mod assembly; From bd0840a5133afbf4d103a79c5bb0093aa8625331 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 5 Apr 2023 09:40:50 +0200 Subject: [PATCH 3/4] make data structure public --- src/sequences.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sequences.rs b/src/sequences.rs index dbb663d..6ba943e 100644 --- a/src/sequences.rs +++ b/src/sequences.rs @@ -117,7 +117,7 @@ fn dna3_to_2bit(c: &[u8]) -> Option { } lazy_static::lazy_static! { - static ref AA3_TO_AA1_VEC: Vec<(&'static str, &'static str)> = vec![ + pub static ref AA3_TO_AA1_VEC: Vec<(&'static str, &'static str)> = vec![ ("Ala", "A"), ("Arg", "R"), ("Asn", "N"), @@ -144,7 +144,7 @@ lazy_static::lazy_static! { ]; /// NCBI standard translation table. - static ref DNA_TO_AA1_LUT_VEC: Vec<(&'static str, &'static str)> = vec![ + pub static ref DNA_TO_AA1_LUT_VEC: Vec<(&'static str, &'static str)> = vec![ ("AAA", "K"), ("AAC", "N"), ("AAG", "K"), @@ -326,7 +326,7 @@ lazy_static::lazy_static! { ]; /// Translation table for selenocysteine. - static ref DNA_TO_AA1_SEC_VEC: Vec<(&'static str, &'static str)> = vec![ + pub static ref DNA_TO_AA1_SEC_VEC: Vec<(&'static str, &'static str)> = vec![ ("AAA", "K"), ("AAC", "N"), ("AAG", "K"), From e0945f6f35291509d4bf714bb36a4dfff7be1e14 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Wed, 5 Apr 2023 09:42:15 +0200 Subject: [PATCH 4/4] cleanup --- src/sequences.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/sequences.rs b/src/sequences.rs index 6ba943e..c63aeac 100644 --- a/src/sequences.rs +++ b/src/sequences.rs @@ -715,7 +715,7 @@ impl CodonTranslator { Self { dna_ascii_map: &DNA_ASCII_MAP, dna_ascii_to_2bit: &DNA_ASCII_TO_2BIT, - iupac_ambiguity_codes: &IUPAC_AMBIGUITY_CODES, + iupac_ambiguity_codes: IUPAC_AMBIGUITY_CODES, codon_2bit_to_aa1: match table { TranslationTable::Standard => &CODON_2BIT_TO_AA1_LUT, @@ -748,7 +748,7 @@ impl CodonTranslator { } if let Some(aa) = self.full_dna_to_aa1.get(&self.codon) { // Fast translation fails, but slower hash map succeeded. - return Ok(*aa); + Ok(*aa) } else { // If this contains an ambiguous code, set aa to X, otherwise, throw error for c in codon.iter() { @@ -758,16 +758,16 @@ impl CodonTranslator { } anyhow::bail!( "Codon {:?} is undefined in codon table", - std::str::from_utf8(&codon).unwrap(), + std::str::from_utf8(codon).unwrap(), ) } } fn dna3_to_2bit(&self, c: &[u8]) -> Option { let mut result = 0; - for i in 0..3 { + for i in c.iter().take(3) { result <<= 2; - let tmp = self.dna_ascii_to_2bit[c[i] as usize]; + let tmp = self.dna_ascii_to_2bit[*i as usize]; if tmp == 255 { return None; }