Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: problem with annotating stop_retained insertions (#131) #132

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,14 @@ seqrepo = { version = "0.8", features = ["cached"] }
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
thiserror = "1.0"
indexmap = { version = "2.0.0", features = ["serde"] }
indexmap = { version = "2", features = ["serde"] }

[dev-dependencies]
anyhow = "1.0"
criterion = "0.5"
csv = "1.2"
env_logger = "0.10"
insta = { version = "1", features = ["yaml"] }
pretty_assertions = "1.3"
rstest = "0.18"
test-log = "0.2"
Expand Down
13 changes: 9 additions & 4 deletions src/mapper/altseq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ impl RefTranscriptData {
}
}

#[derive(Debug, Clone)]
pub struct AltTranscriptData {
/// Transcript nucleotide sequence.
#[allow(dead_code)]
Expand Down Expand Up @@ -697,10 +698,14 @@ impl AltSeqToHgvsp {
.last()
.expect("should not happen; checked for being non-empty above")
+ 1;
(
format!("{}{}", deletion, &ref_sub[..max_diff]),
format!("{}{}", insertion, &alt_sub[..max_diff]),
)
if max_diff > ref_sub.len() || max_diff > alt_sub.len() {
(deletion.clone(), insertion.clone())
} else {
(
format!("{}{}", deletion, &ref_sub[..max_diff]),
format!("{}{}", insertion, &alt_sub[..max_diff]),
)
}
} else {
(deletion, insertion)
};
Expand Down
10 changes: 10 additions & 0 deletions src/mapper/snapshots/hgvs__mapper__variant__test__issue_131.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: src/mapper/variant.rs
expression: "&var_p_test"
---
ProtVariant:
accession:
value: NP_001240838.1
gene_symbol: ~
loc_edit: NoChange

13 changes: 13 additions & 0 deletions src/mapper/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,19 @@ mod test {

use super::{Config, Mapper};

#[test]
fn issue_131() -> Result<(), Error> {
let mapper = build_mapper()?;

let var_c = HgvsVariant::from_str("NM_001253909.2:c.416_417insGTG")?;
let var_p_test = mapper.c_to_p(&var_c, None)?;

assert_eq!(format!("{}", &var_p_test), "NP_001240838.1:p.=");
insta::assert_yaml_snapshot!(&var_p_test);

Ok(())
}

#[test]
fn test_sync() {
fn is_sync<T: Sync>() {}
Expand Down
52 changes: 26 additions & 26 deletions src/parser/ds.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::parser::error::Error;
use log::warn;

/// Expression of "maybe uncertain".
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum Mu<T> {
/// Certain variant of `T`.
Certain(T),
Expand Down Expand Up @@ -53,7 +53,7 @@ impl<T> Mu<T> {
}

/// Representation of gene symbol, e.g., `TTN` or `Ttn`.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct GeneSymbol {
pub value: String,
}
Expand All @@ -79,7 +79,7 @@ impl Deref for GeneSymbol {
}

/// Edit of nucleic acids.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum NaEdit {
/// A substitution where both reference and alternative allele are nucleic acid strings
/// (or empty).
Expand Down Expand Up @@ -189,7 +189,7 @@ impl NaEdit {
}

/// Uncertain change through extension.
#[derive(Clone, Debug, PartialEq, Default)]
#[derive(Clone, Debug, PartialEq, Default, serde::Serialize, serde::Deserialize)]
pub enum UncertainLengthChange {
#[default]
None,
Expand All @@ -198,7 +198,7 @@ pub enum UncertainLengthChange {
}

/// Representation of accession, e.g., `NM_01234.5`.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct Accession {
pub value: String,
}
Expand All @@ -224,7 +224,7 @@ impl Accession {
}

/// Protein edit with interval end edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum ProteinEdit {
Fs {
alternative: Option<String>,
Expand Down Expand Up @@ -259,7 +259,7 @@ pub enum ProteinEdit {
}

/// A HGVS variant specification.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum HgvsVariant {
/// Variant specification with `c.` location.
CdsVariant {
Expand Down Expand Up @@ -563,7 +563,7 @@ impl HgvsVariant {
}

/// Coding sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CdsLocEdit {
/// Location on the CDS.
pub loc: Mu<CdsInterval>,
Expand Down Expand Up @@ -596,7 +596,7 @@ impl CdsLocEdit {
}

/// CDS position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CdsInterval {
/// Start position
pub start: CdsPos,
Expand Down Expand Up @@ -627,14 +627,14 @@ impl TryFrom<CdsInterval> for Range<i32> {

/// Specifies whether the CDS position is relative to the CDS start or
/// CDS end.
#[derive(Clone, Copy, Debug, PartialEq)]
#[derive(Clone, Copy, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum CdsFrom {
Start,
End,
}

/// CDS position.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct CdsPos {
/// Base position.
pub base: i32,
Expand All @@ -645,7 +645,7 @@ pub struct CdsPos {
}

/// Genome sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct GenomeLocEdit {
/// Location on the genome.
pub loc: Mu<GenomeInterval>,
Expand Down Expand Up @@ -678,7 +678,7 @@ impl GenomeLocEdit {
}

/// Genome position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct GenomeInterval {
/// Start position
pub start: Option<i32>,
Expand All @@ -701,7 +701,7 @@ impl TryInto<Range<i32>> for GenomeInterval {
}

/// Mitochondrial sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct MtLocEdit {
/// Location on the mitochondrium.
pub loc: Mu<MtInterval>,
Expand Down Expand Up @@ -733,7 +733,7 @@ impl MtLocEdit {
}
}
/// Mitochondrial position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct MtInterval {
/// Start position
pub start: Option<i32>,
Expand All @@ -756,7 +756,7 @@ impl TryInto<Range<i32>> for MtInterval {
}

/// Transcript sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TxLocEdit {
/// Loction on a transcript.
pub loc: Mu<TxInterval>,
Expand Down Expand Up @@ -789,7 +789,7 @@ impl TxLocEdit {
}

/// Transcript position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TxInterval {
/// Start position
pub start: TxPos,
Expand All @@ -813,7 +813,7 @@ impl From<TxInterval> for Range<i32> {
}

/// Transcript position.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TxPos {
/// Base position.
pub base: i32,
Expand All @@ -822,7 +822,7 @@ pub struct TxPos {
}

/// RNA sequence location with edit.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct RnaLocEdit {
/// Location on a transcript.
pub loc: Mu<RnaInterval>,
Expand Down Expand Up @@ -854,7 +854,7 @@ impl RnaLocEdit {
}
}
/// RNA position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct RnaInterval {
/// Start position
pub start: RnaPos,
Expand All @@ -878,7 +878,7 @@ impl From<RnaInterval> for Range<i32> {
}

/// RNA position.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct RnaPos {
/// Base position.
pub base: i32,
Expand All @@ -887,7 +887,7 @@ pub struct RnaPos {
}

/// Protein sequence location with edit or special.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub enum ProtLocEdit {
Ordinary {
loc: Mu<ProtInterval>,
Expand All @@ -908,7 +908,7 @@ pub enum ProtLocEdit {
}

/// Protein position interval.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct ProtInterval {
/// Start position
pub start: ProtPos,
Expand All @@ -927,7 +927,7 @@ impl From<ProtInterval> for Range<i32> {
}

/// Protein position.
#[derive(Clone, Debug, PartialEq, Default)]
#[derive(Clone, Debug, PartialEq, Default, serde::Serialize, serde::Deserialize)]
pub struct ProtPos {
/// Amino acid value.
pub aa: String,
Expand Down Expand Up @@ -973,13 +973,13 @@ mod test {
assert_eq!(Mu::from(Some(1), false), Mu::Uncertain(Some(1)));
}

#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TestInterval {
pub start: TestPos,
pub end: TestPos,
}

#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, Debug, PartialEq, serde::Serialize, serde::Deserialize)]
pub struct TestPos {
pub base: i32,
pub offset: Option<i32>,
Expand Down
1 change: 1 addition & 0 deletions tests/data/data/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ ADGRL3
ADRA2B
ADRB2
AGBL5
AKR1C3
ALG9
AOAH
ASB18
Expand Down
4 changes: 2 additions & 2 deletions tests/data/data/uta_20210129-subset.pgd.gz
Git LFS file not shown
4 changes: 2 additions & 2 deletions tests/data/seqrepo_cache.fasta
Git LFS file not shown
Loading