From db80cf32514dafda5c58ff04f1312d0b44bcf85c Mon Sep 17 00:00:00 2001 From: Till Hartmann Date: Fri, 4 Oct 2024 09:40:12 +0200 Subject: [PATCH] Revert "track shuffle_direction and window_size in all configs" This reverts commit 80e835a5d22c118cffceb6e8a24f37e18922c064. --- src/mapper/altseq.rs | 15 ++------------- src/mapper/assembly.rs | 9 +-------- src/mapper/variant.rs | 29 ++++------------------------- src/normalizer.rs | 5 ++--- src/validator/mod.rs | 2 -- 5 files changed, 9 insertions(+), 51 deletions(-) diff --git a/src/mapper/altseq.rs b/src/mapper/altseq.rs index ee2db73..7cd6caa 100644 --- a/src/mapper/altseq.rs +++ b/src/mapper/altseq.rs @@ -437,28 +437,17 @@ impl AltSeqBuilder { // Incorporate the variant into the sequence (depending on the type). let mut is_substitution = false; - let range = if end >= seq.len() && reference.is_some() { - log::warn!( - "Altered sequence range {:?} is incompatible with sequence length {:?}, clamping. Variant description is {}", - start..end, - seq.len(), - &self.var_c - ); - start..seq.len() - } else { - start..end - }; match (reference, alternative) { (Some(reference), Some(alternative)) => { // delins or SNP - seq.replace_range(range, &alternative); + seq.replace_range(start..end, &alternative); if reference.len() == 1 && alternative.len() == 1 { is_substitution = true; } } (Some(_reference), None) => { // deletion - seq.replace_range(range, ""); + seq.replace_range(start..end, ""); } (None, Some(alternative)) => { // insertion diff --git a/src/mapper/assembly.rs b/src/mapper/assembly.rs index 4887fc6..b1de930 100644 --- a/src/mapper/assembly.rs +++ b/src/mapper/assembly.rs @@ -7,7 +7,6 @@ use std::sync::Arc; use crate::mapper::error::Error; use crate::mapper::variant; -use crate::normalizer::Direction; use crate::parser::HgvsVariant; use crate::{data::interface::Provider, validator::ValidationLevel}; use biocommons_bioutils::assemblies::Assembly; @@ -48,8 +47,6 @@ pub struct Config { /// Use the genome sequence in case of uncertain g-to-n projections. This /// can be switched off so genome sequence does not have to be available. pub genome_seq_available: bool, - pub shuffle_direction: Direction, - pub window_size: usize, } impl Default for Config { @@ -66,8 +63,6 @@ impl Default for Config { add_gene_symbol: false, renormalize_g: true, genome_seq_available: true, - shuffle_direction: Default::default(), - window_size: 20, } } } @@ -116,8 +111,6 @@ impl Mapper { strict_bounds: config.strict_bounds, renormalize_g: config.renormalize_g, genome_seq_available: config.genome_seq_available, - shuffle_direction: config.shuffle_direction, - window_size: config.window_size, }; let inner = variant::Mapper::new(&inner_config, provider.clone()); let asm_accessions = provider @@ -288,7 +281,7 @@ impl Mapper { /// Normalize variant if requested and ignore errors. This is better than checking whether /// the variant is intronic because future UTAs will support LRG, which will enable checking /// intronic variants. - pub fn maybe_normalize(&self, var: &HgvsVariant) -> Result { + fn maybe_normalize(&self, var: &HgvsVariant) -> Result { if self.config.normalize { let normalizer = self.inner.normalizer()?; normalizer.normalize(var).or_else(|_| { diff --git a/src/mapper/variant.rs b/src/mapper/variant.rs index 49ada0b..a01ba6e 100644 --- a/src/mapper/variant.rs +++ b/src/mapper/variant.rs @@ -7,8 +7,6 @@ use cached::proc_macro::cached; use cached::SizedCache; use log::{debug, info}; -use super::alignment; -use crate::normalizer::Direction; use crate::{ data::interface::Provider, mapper::Error, @@ -21,6 +19,8 @@ use crate::{ validator::{ValidationLevel, Validator}, }; +use super::alignment; + /// Configuration for Mapper. /// /// Defaults are taken from `hgvs` Python library. @@ -37,8 +37,6 @@ pub struct Config { /// Use the genome sequence in case of uncertain g-to-n projections. This /// can be switched off so genome sequence does not have to be available. pub genome_seq_available: bool, - pub shuffle_direction: Direction, - pub window_size: usize, } impl Default for Config { @@ -51,8 +49,6 @@ impl Default for Config { strict_bounds: true, renormalize_g: true, genome_seq_available: true, - shuffle_direction: Default::default(), - window_size: 20, } } } @@ -155,8 +151,6 @@ impl Mapper { self.validator.clone(), normalizer::Config { replace_reference: self.config.replace_reference, - shuffle_direction: self.config.shuffle_direction, - window_size: self.config.window_size, ..Default::default() }, )) @@ -261,7 +255,7 @@ impl Mapper { (Mu::Certain((*pos_n).clone()), edit_n) } } else { - // This is how the original code handles uncertain positions. We will reach + // This is the how the original code handles uncertain positions. We will reach // here if the position is uncertain and we have the genome sequence. let pos_g = mapper.n_to_g(pos_n)?; let edit_n = NaEdit::RefAlt { @@ -792,17 +786,6 @@ impl Mapper { .loc_range() .ok_or(Error::NoAlteredSequenceForMissingPositions)?; let r = ((r.start - interval.start) as usize)..((r.end - interval.start) as usize); - let r = if r.end >= seq.len() { - log::warn!( - "Altered sequence range {:?} is incompatible with sequence length {:?}, clamping. Variant description is {}", - r, - seq.len(), - &var - ); - r.start..seq.len() - } else { - r - }; let na_edit = var.na_edit().ok_or(Error::NaEditMissing)?; @@ -810,11 +793,7 @@ impl Mapper { NaEdit::RefAlt { alternative, .. } | NaEdit::NumAlt { alternative, .. } => { seq.replace_range(r, alternative) } - NaEdit::DelRef { .. } | NaEdit::DelNum { .. } => { - // FIXME the original code in python simply does `del seq[pos_start:pos_end]`, - // which does not error if `pos_end > len(seq)`. Check if this is intended or not. - seq.replace_range(r, "") - } + NaEdit::DelRef { .. } | NaEdit::DelNum { .. } => seq.replace_range(r, ""), NaEdit::Ins { alternative } => { seq.replace_range((r.start + 1)..(r.start + 1), alternative) } diff --git a/src/normalizer.rs b/src/normalizer.rs index 43c9e3c..04d13a5 100644 --- a/src/normalizer.rs +++ b/src/normalizer.rs @@ -52,9 +52,8 @@ mod error { } /// A direction with respect to a sequence. -#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] +#[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum Direction { - #[default] ThreeToFive, FiveToThree, } @@ -78,7 +77,7 @@ impl Default for Config { Self { alt_aln_method: "splign".to_string(), cross_boundaries: false, - shuffle_direction: Default::default(), + shuffle_direction: Direction::FiveToThree, replace_reference: true, validate: true, window_size: 20, diff --git a/src/validator/mod.rs b/src/validator/mod.rs index c73cf36..81e6a77 100644 --- a/src/validator/mod.rs +++ b/src/validator/mod.rs @@ -134,8 +134,6 @@ impl ExtrinsicValidator { strict_bounds: true, renormalize_g: false, genome_seq_available: true, - shuffle_direction: Default::default(), - window_size: 20, }; Self { strict,