Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework single-end MAPQ computation #328

Merged
merged 4 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
* #321: Fix: For paired-end reads that cannot be placed as proper pairs, we now
prefer placing them onto the same chrosome instead of on different ones if
there is a choice.
* #328: Adjust MAPQ computation for single-end reads.
* #318: Added a `--details` option mainly intended for debugging. When used,
some strobealign-specific tags are added to the SAM output that inform about
things like no. of seeds found, whether mate rescue was performed etc.
Expand Down
24 changes: 9 additions & 15 deletions src/aln.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,11 @@ static inline void align_SE(
Nam n_max = nams[0];

int best_edit_distance = std::numeric_limits<int>::max();
int best_score = -1000;
int best_score = 0;
int second_best_score = 0;

Alignment best_alignment;
best_alignment.score = -100000;
best_alignment.is_unaligned = true;
int min_mapq_diff = best_edit_distance;

for (auto &nam : nams) {
float score_dropoff = (float) nam.n_hits / n_max.n_hits;
Expand All @@ -124,25 +123,25 @@ static inline void align_SE(
details.tried_alignment++;
details.gapped += alignment.gapped;

int diff_to_best = std::abs(best_score - alignment.score);
min_mapq_diff = std::min(min_mapq_diff, diff_to_best);

if (max_secondary > 0) {
alignments.emplace_back(alignment);
}
if (alignment.score > best_score) {
min_mapq_diff = std::max(0, alignment.score - best_score); // new distance to next best match
second_best_score = best_score;
best_score = alignment.score;
best_alignment = std::move(alignment);
if (max_secondary == 0) {
best_edit_distance = best_alignment.global_ed;
}
} else if (alignment.score > second_best_score) {
second_best_score = alignment.score;
}
tries++;
}
uint8_t mapq = (60.0 * (best_score - second_best_score) + best_score - 1) / best_score;

if (max_secondary == 0) {
best_alignment.mapq = std::min(min_mapq_diff, 60);
sam.add(best_alignment, record, read.rc, true, details);
sam.add(best_alignment, record, read.rc, mapq, true, details);
return;
}
// Sort alignments by score, highest first
Expand All @@ -159,12 +158,7 @@ static inline void align_SE(
break;
}
bool is_primary = i == 0;
if (is_primary) {
alignment.mapq = std::min(min_mapq_diff, 60);
} else {
alignment.mapq = 255;
}
sam.add(alignment, record, read.rc, is_primary, details);
sam.add(alignment, record, read.rc, mapq, is_primary, details);
}
}

Expand Down
4 changes: 3 additions & 1 deletion src/sam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ void Sam::add(
const Alignment& alignment,
const KSeq& record,
const std::string& sequence_rc,
uint8_t mapq,
bool is_primary,
const Details& details
) {
Expand All @@ -131,8 +132,9 @@ void Sam::add(
}
if (!is_primary) {
flags |= SECONDARY;
mapq = 255;
}
add_record(record.name, flags, references.names[alignment.ref_id], alignment.ref_start, alignment.mapq, alignment.cigar, "*", -1, 0, record.seq, sequence_rc, record.qual, alignment.edit_distance, alignment.score, details);
add_record(record.name, flags, references.names[alignment.ref_id], alignment.ref_start, mapq, alignment.cigar, "*", -1, 0, record.seq, sequence_rc, record.qual, alignment.edit_distance, alignment.score, details);
}

// Add one individual record
Expand Down
3 changes: 1 addition & 2 deletions src/sam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ struct Alignment {
int edit_distance;
int global_ed;
int score;
uint8_t mapq;
int length;
bool is_rc;
bool is_unaligned{false};
Expand Down Expand Up @@ -79,7 +78,7 @@ class Sam {
}

/* Add an alignment */
void add(const Alignment& alignment, const klibpp::KSeq& record, const std::string& sequence_rc, bool is_primary, const Details& details);
void add(const Alignment& alignment, const klibpp::KSeq& record, const std::string& sequence_rc, uint8_t mapq, bool is_primary, const Details& details);
void add_pair(const Alignment& alignment1, const Alignment& alignment2, const klibpp::KSeq& record1, const klibpp::KSeq& record2, const std::string& read1_rc, const std::string& read2_rc, uint8_t mapq1, uint8_t mapq2, bool is_proper, bool is_primary, const std::array<Details, 2>& details);
void add_unmapped(const klibpp::KSeq& record, uint16_t flags = UNMAP);
void add_unmapped_pair(const klibpp::KSeq& r1, const klibpp::KSeq& r2);
Expand Down
5 changes: 2 additions & 3 deletions tests/test_sam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ TEST_CASE("Sam::add") {
aln.ref_start = 2;
aln.edit_distance = 3;
aln.score = 9;
aln.mapq = 55;
aln.cigar = Cigar("2S2=1X3=3S");

std::string read_rc = reverse_complement(record.seq);
Expand All @@ -61,15 +60,15 @@ TEST_CASE("Sam::add") {
SUBCASE("Cigar =/X") {
std::string sam_string;
Sam sam(sam_string, references);
sam.add(aln, record, read_rc, is_primary, details);
sam.add(aln, record, read_rc, 55, is_primary, details);
CHECK(sam_string ==
"readname\t16\tcontig1\t3\t55\t2S2=1X3=3S\t*\t0\t0\tACGTT\tBB#>\tNM:i:3\tAS:i:9\n"
);
}
SUBCASE("Cigar M") {
std::string sam_string;
Sam sam(sam_string, references, CigarOps::M);
sam.add(aln, record, read_rc, is_primary, details);
sam.add(aln, record, read_rc, 55, is_primary, details);
CHECK(sam_string ==
"readname\t16\tcontig1\t3\t55\t2S6M3S\t*\t0\t0\tACGTT\tBB#>\tNM:i:3\tAS:i:9\n"
);
Expand Down