Skip to content

Commit

Permalink
switch to CigarHit
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Oct 21, 2024
1 parent 25aeb3b commit d67cd0c
Showing 1 changed file with 48 additions and 48 deletions.
96 changes: 48 additions & 48 deletions micall/utils/consensus_aligner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import csv
import os
import logging
from aligntools import CigarActions
from aligntools import CigarActions, CigarHit, Cigar

from gotoh import align_it, align_it_aa
from mappy import Alignment, Aligner
Expand Down Expand Up @@ -173,7 +173,7 @@ def __init__(self,
self.coordinate_name = self.consensus = self.amino_consensus = ''
self.algorithm = ''
self.consensus_offset = 0
self.alignments: List[Alignment] = []
self.alignments: List[CigarHit] = []
self.reading_frames: Dict[int, List[SeedAmino]] = {}
self.seed_nucs: List[SeedNucleotide] = []
self.amino_alignments: List[AminoAlignment] = []
Expand Down Expand Up @@ -270,15 +270,19 @@ def start_contig(self,
except KeyError:
coordinate_seq = self.projects.getReference(coordinate_name)
aligner = Aligner(seq=coordinate_seq, preset='map-ont')
self.alignments = list(aligner.map(self.consensus))
if self.alignments or 10_000 < len(self.consensus):
mappy_alignments = list(aligner.map(self.consensus))
if mappy_alignments or 10_000 < len(self.consensus):
self.algorithm = 'minimap2'
else:
self.algorithm = 'gotoh'
self.align_gotoh(coordinate_seq, self.consensus)
self.alignments = [alignment
for alignment in self.alignments
if alignment.is_primary]
mappy_alignments = [alignment
for alignment in mappy_alignments
if alignment.is_primary]
self.alignments = [CigarHit(Cigar(x.cigar),
r_st=x.r_st, r_ei=x.r_en - 1,
q_st=x.q_st, q_ei=x.q_en - 1)
for x in mappy_alignments]
self.alignments.sort(key=attrgetter('q_st'))

if self.overall_alignments_writer is not None:
Expand All @@ -290,7 +294,7 @@ def start_contig(self,
"consensus_offset": self.consensus_offset,
"ref_start": alignment.r_st,
"ref_end": alignment.r_en,
"cigar_str": alignment.cigar_str}
"cigar_str": str(alignment.cigar)}
self.overall_alignments_writer.writerow(row)

def align_gotoh(self, coordinate_seq: str, consensus: str):
Expand Down Expand Up @@ -355,7 +359,7 @@ def find_amino_alignments(self,
amino_sections = []
query_end = alignment.q_st + self.consensus_offset
ref_end = alignment.r_st
for cigar_index, (size, action) in enumerate(alignment.cigar):
for action in alignment.cigar.iterate_operations():
ref_reading_frame = (ref_end - (start_pos-1)) % 3
conseq_codon_start = query_end - ref_reading_frame
reading_frame = -conseq_codon_start % 3
Expand All @@ -366,13 +370,13 @@ def find_amino_alignments(self,
action,
reading_frame)
if action == CigarActions.MATCH:
amino_alignment.query_end += size
amino_alignment.ref_end += size
amino_alignment.query_end += 1
amino_alignment.ref_end += 1
elif action == CigarActions.INSERT:
amino_alignment.query_end += size
amino_alignment.query_end += 1
else:
assert action == CigarActions.DELETE
amino_alignment.ref_end += size
amino_alignment.ref_end += 1
query_end = amino_alignment.query_end
ref_end = amino_alignment.ref_end
if amino_alignment.has_overlap(start_pos, end_pos):
Expand Down Expand Up @@ -857,40 +861,37 @@ def build_nucleotide_report(self,
consensus_nuc_index = alignment.q_st + self.consensus_offset
source_amino_index = 0
seed_aminos = self.reading_frames[0]
for section_size, section_action in alignment.cigar:
for section_action in alignment.cigar.iterate_operations():
if section_action == CigarActions.INSERT:
for _ in range(section_size):
if start_pos - 1 <= ref_nuc_index < end_pos:
self.inserts.add(consensus_nuc_index)
consensus_nuc_index += 1
if start_pos - 1 <= ref_nuc_index < end_pos:
self.inserts.add(consensus_nuc_index)
consensus_nuc_index += 1
continue
if section_action == CigarActions.DELETE:
coverage = self.get_deletion_coverage(consensus_nuc_index)
seed_nuc = SeedNucleotide()
seed_nuc.count_nucleotides('-', coverage)
for _ in range(section_size):
if start_pos - 1 <= ref_nuc_index < end_pos:
target_nuc_index = ref_nuc_index - start_pos + 1
report_nuc = report_nucleotides[target_nuc_index]
report_nuc.seed_nucleotide.add(seed_nuc)
ref_nuc_index += 1
continue
assert section_action == CigarActions.MATCH, section_action
for _ in range(section_size):
if start_pos - 1 <= ref_nuc_index < end_pos:
target_nuc_index = ref_nuc_index - start_pos + 1
while True:
seed_amino = seed_aminos[source_amino_index]
codon_nuc_index = (consensus_nuc_index -
seed_amino.consensus_nuc_index)
if codon_nuc_index < 3:
break
source_amino_index += 1
seed_nuc = seed_amino.nucleotides[codon_nuc_index]
report_nuc = report_nucleotides[target_nuc_index]
report_nuc.seed_nucleotide.add(seed_nuc)
ref_nuc_index += 1
consensus_nuc_index += 1
continue
assert section_action == CigarActions.MATCH, section_action
if start_pos - 1 <= ref_nuc_index < end_pos:
target_nuc_index = ref_nuc_index - start_pos + 1
while True:
seed_amino = seed_aminos[source_amino_index]
codon_nuc_index = (consensus_nuc_index -
seed_amino.consensus_nuc_index)
if codon_nuc_index < 3:
break
source_amino_index += 1
seed_nuc = seed_amino.nucleotides[codon_nuc_index]
report_nuc = report_nucleotides[target_nuc_index]
report_nuc.seed_nucleotide.add(seed_nuc)
ref_nuc_index += 1
consensus_nuc_index += 1

def seed_concordance(self, seed_name, projects, seed_coordinates, excluded_regions, included_regions=None):
if self.seed_concordance_writer is None:
Expand Down Expand Up @@ -925,22 +926,21 @@ def coord_concordance(self, half_window_size: int = 10) -> List[float]:
for alignment in coord_alignments:
ref_progress = alignment.r_st
query_progress = alignment.q_st
for cigar_index, (size, action) in enumerate(alignment.cigar):
for action in alignment.cigar.iterate_operations():
if action == CigarActions.INSERT:
query_progress += size
query_progress += 1
elif action == CigarActions.DELETE:
ref_progress += size
ref_progress += 1
else:
assert action == CigarActions.MATCH
for pos in range(0, size):
ref_pos = ref_progress + pos
query_pos = query_progress + pos
if self.consensus[query_pos] == coord_ref[ref_pos]:
query_matches[query_pos] = 1
else:
query_matches[query_pos] = 0
ref_progress += size
query_progress += size
ref_pos = ref_progress
query_pos = query_progress
if self.consensus[query_pos] == coord_ref[ref_pos]:
query_matches[query_pos] = 1
else:
query_matches[query_pos] = 0
ref_progress += 1
query_progress += 1

for pos in range(0, len(self.consensus)):
start = max(0, pos - half_window_size)
Expand Down

0 comments on commit d67cd0c

Please sign in to comment.