Skip to content

Commit

Permalink
feat: make some tables visible in hgvs::sequences (#87)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Apr 5, 2023
1 parent 9c75735 commit d81bf8c
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 16 deletions.
3 changes: 2 additions & 1 deletion src/data/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Datatypes, interfaces, and data acess.
pub mod cdot;
///! Datatypes, interfaces, and data acess.
pub mod interface;
pub mod uta;
pub mod uta_sr;
2 changes: 2 additions & 0 deletions src/mapper/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! Code supporting mapping between coordinate systems.
pub mod alignment;
pub(crate) mod altseq;
pub mod assembly;
Expand Down
30 changes: 15 additions & 15 deletions src/sequences.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ fn dna3_to_2bit(c: &[u8]) -> Option<u8> {
}

lazy_static::lazy_static! {
static ref AA3_TO_AA1_LUT_VEC: Vec<(&'static str, &'static str)> = vec![
pub static ref AA3_TO_AA1_VEC: Vec<(&'static str, &'static str)> = vec![
("Ala", "A"),
("Arg", "R"),
("Asn", "N"),
Expand All @@ -144,7 +144,7 @@ lazy_static::lazy_static! {
];

/// NCBI standard translation table.
static ref DNA_TO_AA1_LUT_VEC: Vec<(&'static str, &'static str)> = vec![
pub static ref DNA_TO_AA1_LUT_VEC: Vec<(&'static str, &'static str)> = vec![
("AAA", "K"),
("AAC", "N"),
("AAG", "K"),
Expand Down Expand Up @@ -326,7 +326,7 @@ lazy_static::lazy_static! {
];

/// Translation table for selenocysteine.
static ref DNA_TO_AA1_SEC_VEC: Vec<(&'static str, &'static str)> = vec![
pub static ref DNA_TO_AA1_SEC_VEC: Vec<(&'static str, &'static str)> = vec![
("AAA", "K"),
("AAC", "N"),
("AAG", "K"),
Expand Down Expand Up @@ -507,17 +507,17 @@ lazy_static::lazy_static! {
("YTR", "L"),
];

static ref AA1_TO_AA3_LUT: FxHashMap<&'static [u8], &'static str> = {
static ref AA1_TO_AA3: FxHashMap<&'static [u8], &'static str> = {
let mut m = FxHashMap::default();
for (aa3, aa1) in AA3_TO_AA1_LUT_VEC.iter() {
for (aa3, aa1) in AA3_TO_AA1_VEC.iter() {
m.insert(aa1.as_bytes(), *aa3);
}
m
};

static ref AA3_TO_AA1_LUT: FxHashMap<&'static [u8], &'static str> = {
static ref AA3_TO_AA1: FxHashMap<&'static [u8], &'static str> = {
let mut m = FxHashMap::default();
for (aa3, aa1) in AA3_TO_AA1_LUT_VEC.iter() {
for (aa3, aa1) in AA3_TO_AA1_VEC.iter() {
m.insert(aa3.as_bytes(), *aa1);
}
m
Expand Down Expand Up @@ -633,7 +633,7 @@ pub fn aa1_to_aa3(seq: &str) -> Result<String, anyhow::Error> {
let mut result = String::with_capacity(seq.len() * 3);

for (i, aa1) in seq.as_bytes().chunks(1).enumerate() {
let aa3 = AA1_TO_AA3_LUT.get(aa1).ok_or_else(|| {
let aa3 = AA1_TO_AA3.get(aa1).ok_or_else(|| {
anyhow::anyhow!("Invalid 1-letter amino acid: {:?} at {}", aa1, i + 1)
})?;
result.push_str(aa3);
Expand Down Expand Up @@ -664,7 +664,7 @@ pub fn aa3_to_aa1(seq: &str) -> Result<String, anyhow::Error> {
let mut result = String::with_capacity(seq.len() / 3);

for (i, aa3) in seq.as_bytes().chunks(3).enumerate() {
let aa1 = AA3_TO_AA1_LUT.get(aa3).ok_or(anyhow::anyhow!(
let aa1 = AA3_TO_AA1.get(aa3).ok_or(anyhow::anyhow!(
"Invalid 3-letter amino acid: {:?} at {}",
&aa3,
i + 1
Expand Down Expand Up @@ -715,7 +715,7 @@ impl CodonTranslator {
Self {
dna_ascii_map: &DNA_ASCII_MAP,
dna_ascii_to_2bit: &DNA_ASCII_TO_2BIT,
iupac_ambiguity_codes: &IUPAC_AMBIGUITY_CODES,
iupac_ambiguity_codes: IUPAC_AMBIGUITY_CODES,

codon_2bit_to_aa1: match table {
TranslationTable::Standard => &CODON_2BIT_TO_AA1_LUT,
Expand Down Expand Up @@ -748,7 +748,7 @@ impl CodonTranslator {
}
if let Some(aa) = self.full_dna_to_aa1.get(&self.codon) {
// Fast translation fails, but slower hash map succeeded.
return Ok(*aa);
Ok(*aa)
} else {
// If this contains an ambiguous code, set aa to X, otherwise, throw error
for c in codon.iter() {
Expand All @@ -758,16 +758,16 @@ impl CodonTranslator {
}
anyhow::bail!(
"Codon {:?} is undefined in codon table",
std::str::from_utf8(&codon).unwrap(),
std::str::from_utf8(codon).unwrap(),
)
}
}

fn dna3_to_2bit(&self, c: &[u8]) -> Option<u8> {
let mut result = 0;
for i in 0..3 {
for i in c.iter().take(3) {
result <<= 2;
let tmp = self.dna_ascii_to_2bit[c[i] as usize];
let tmp = self.dna_ascii_to_2bit[*i as usize];
if tmp == 255 {
return None;
}
Expand Down Expand Up @@ -1086,7 +1086,7 @@ mod test {
fn exercise_lazy_ds() {
assert!(DNA_ASCII_MAP[0] == b'\0');
assert!(DNA_ASCII_TO_2BIT[b'A' as usize] == 0);
assert!(AA3_TO_AA1_LUT_VEC[0] == ("Ala", "A"));
assert!(AA3_TO_AA1_VEC[0] == ("Ala", "A"));
assert!(DNA_TO_AA1_LUT_VEC[0] == ("AAA", "K"));
assert!(DNA_TO_AA1_SEC_VEC[0] == ("AAA", "K"));
}
Expand Down

0 comments on commit d81bf8c

Please sign in to comment.