Skip to content

Commit

Permalink
Merge pull request #328 from ksahlin/mapq
Browse files Browse the repository at this point in the history
Rework single-end MAPQ computation
  • Loading branch information
marcelm authored Nov 15, 2023
2 parents 3578904 + 9120362 commit 8c74210
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* #321: Fix: For paired-end reads that cannot be placed as proper pairs, we now
prefer placing them onto the same chrosome instead of on different ones if
there is a choice.
* #328: Adjust MAPQ computation for single-end reads.
* #318: Added a `--details` option mainly intended for debugging. When used,
some strobealign-specific tags are added to the SAM output that inform about
things like no. of seeds found, whether mate rescue was performed etc.
Expand Down
24 changes: 9 additions & 15 deletions src/aln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,11 @@ static inline void align_SE(
Nam n_max = nams[0];

int best_edit_distance = std::numeric_limits<int>::max();
int best_score = -1000;
int best_score = 0;
int second_best_score = 0;

Alignment best_alignment;
best_alignment.score = -100000;
best_alignment.is_unaligned = true;
int min_mapq_diff = best_edit_distance;

for (auto &nam : nams) {
float score_dropoff = (float) nam.n_hits / n_max.n_hits;
Expand All @@ -124,25 +123,25 @@ static inline void align_SE(
details.tried_alignment++;
details.gapped += alignment.gapped;

int diff_to_best = std::abs(best_score - alignment.score);
min_mapq_diff = std::min(min_mapq_diff, diff_to_best);

if (max_secondary > 0) {
alignments.emplace_back(alignment);
}
if (alignment.score > best_score) {
min_mapq_diff = std::max(0, alignment.score - best_score); // new distance to next best match
second_best_score = best_score;
best_score = alignment.score;
best_alignment = std::move(alignment);
if (max_secondary == 0) {
best_edit_distance = best_alignment.global_ed;
}
} else if (alignment.score > second_best_score) {
second_best_score = alignment.score;
}
tries++;
}
uint8_t mapq = (60.0 * (best_score - second_best_score) + best_score - 1) / best_score;

if (max_secondary == 0) {
best_alignment.mapq = std::min(min_mapq_diff, 60);
sam.add(best_alignment, record, read.rc, true, details);
sam.add(best_alignment, record, read.rc, mapq, true, details);
return;
}
// Sort alignments by score, highest first
Expand All @@ -159,12 +158,7 @@ static inline void align_SE(
break;
}
bool is_primary = i == 0;
if (is_primary) {
alignment.mapq = std::min(min_mapq_diff, 60);
} else {
alignment.mapq = 255;
}
sam.add(alignment, record, read.rc, is_primary, details);
sam.add(alignment, record, read.rc, mapq, is_primary, details);
}
}

Expand Down
4 changes: 3 additions & 1 deletion src/sam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ void Sam::add(
const Alignment& alignment,
const KSeq& record,
const std::string& sequence_rc,
uint8_t mapq,
bool is_primary,
const Details& details
) {
Expand All @@ -131,8 +132,9 @@ void Sam::add(
}
if (!is_primary) {
flags |= SECONDARY;
mapq = 255;
}
add_record(record.name, flags, references.names[alignment.ref_id], alignment.ref_start, alignment.mapq, alignment.cigar, "*", -1, 0, record.seq, sequence_rc, record.qual, alignment.edit_distance, alignment.score, details);
add_record(record.name, flags, references.names[alignment.ref_id], alignment.ref_start, mapq, alignment.cigar, "*", -1, 0, record.seq, sequence_rc, record.qual, alignment.edit_distance, alignment.score, details);
}

// Add one individual record
Expand Down
3 changes: 1 addition & 2 deletions src/sam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ struct Alignment {
int edit_distance;
int global_ed;
int score;
uint8_t mapq;
int length;
bool is_rc;
bool is_unaligned{false};
Expand Down Expand Up @@ -79,7 +78,7 @@ class Sam {
}

/* Add an alignment */
void add(const Alignment& alignment, const klibpp::KSeq& record, const std::string& sequence_rc, bool is_primary, const Details& details);
void add(const Alignment& alignment, const klibpp::KSeq& record, const std::string& sequence_rc, uint8_t mapq, bool is_primary, const Details& details);
void add_pair(const Alignment& alignment1, const Alignment& alignment2, const klibpp::KSeq& record1, const klibpp::KSeq& record2, const std::string& read1_rc, const std::string& read2_rc, uint8_t mapq1, uint8_t mapq2, bool is_proper, bool is_primary, const std::array<Details, 2>& details);
void add_unmapped(const klibpp::KSeq& record, uint16_t flags = UNMAP);
void add_unmapped_pair(const klibpp::KSeq& r1, const klibpp::KSeq& r2);
Expand Down
5 changes: 2 additions & 3 deletions tests/test_sam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ TEST_CASE("Sam::add") {
aln.ref_start = 2;
aln.edit_distance = 3;
aln.score = 9;
aln.mapq = 55;
aln.cigar = Cigar("2S2=1X3=3S");

std::string read_rc = reverse_complement(record.seq);
Expand All @@ -61,15 +60,15 @@ TEST_CASE("Sam::add") {
SUBCASE("Cigar =/X") {
std::string sam_string;
Sam sam(sam_string, references);
sam.add(aln, record, read_rc, is_primary, details);
sam.add(aln, record, read_rc, 55, is_primary, details);
CHECK(sam_string ==
"readname\t16\tcontig1\t3\t55\t2S2=1X3=3S\t*\t0\t0\tACGTT\tBB#>\tNM:i:3\tAS:i:9\n"
);
}
SUBCASE("Cigar M") {
std::string sam_string;
Sam sam(sam_string, references, CigarOps::M);
sam.add(aln, record, read_rc, is_primary, details);
sam.add(aln, record, read_rc, 55, is_primary, details);
CHECK(sam_string ==
"readname\t16\tcontig1\t3\t55\t2S6M3S\t*\t0\t0\tACGTT\tBB#>\tNM:i:3\tAS:i:9\n"
);
Expand Down

0 comments on commit 8c74210

Please sign in to comment.