diff --git a/CHANGES.md b/CHANGES.md index 74a2e869..2efbe8e1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,7 @@ * #321: Fix: For paired-end reads that cannot be placed as proper pairs, we now prefer placing them onto the same chrosome instead of on different ones if there is a choice. +* #328: Adjust MAPQ computation for single-end reads. * #318: Added a `--details` option mainly intended for debugging. When used, some strobealign-specific tags are added to the SAM output that inform about things like no. of seeds found, whether mate rescue was performed etc. diff --git a/src/aln.cpp b/src/aln.cpp index aa392309..76350004 100644 --- a/src/aln.cpp +++ b/src/aln.cpp @@ -106,12 +106,11 @@ static inline void align_SE( Nam n_max = nams[0]; int best_edit_distance = std::numeric_limits::max(); - int best_score = -1000; + int best_score = 0; + int second_best_score = 0; Alignment best_alignment; - best_alignment.score = -100000; best_alignment.is_unaligned = true; - int min_mapq_diff = best_edit_distance; for (auto &nam : nams) { float score_dropoff = (float) nam.n_hits / n_max.n_hits; @@ -124,25 +123,25 @@ static inline void align_SE( details.tried_alignment++; details.gapped += alignment.gapped; - int diff_to_best = std::abs(best_score - alignment.score); - min_mapq_diff = std::min(min_mapq_diff, diff_to_best); - if (max_secondary > 0) { alignments.emplace_back(alignment); } if (alignment.score > best_score) { - min_mapq_diff = std::max(0, alignment.score - best_score); // new distance to next best match + second_best_score = best_score; best_score = alignment.score; best_alignment = std::move(alignment); if (max_secondary == 0) { best_edit_distance = best_alignment.global_ed; } + } else if (alignment.score > second_best_score) { + second_best_score = alignment.score; } tries++; } + uint8_t mapq = (60.0 * (best_score - second_best_score) + best_score - 1) / best_score; + if (max_secondary == 0) { - best_alignment.mapq = std::min(min_mapq_diff, 60); - sam.add(best_alignment, record, read.rc, true, details); + sam.add(best_alignment, record, read.rc, mapq, true, details); return; } // Sort alignments by score, highest first @@ -159,12 +158,7 @@ static inline void align_SE( break; } bool is_primary = i == 0; - if (is_primary) { - alignment.mapq = std::min(min_mapq_diff, 60); - } else { - alignment.mapq = 255; - } - sam.add(alignment, record, read.rc, is_primary, details); + sam.add(alignment, record, read.rc, mapq, is_primary, details); } } diff --git a/src/sam.cpp b/src/sam.cpp index 962ff9ce..0fb5a272 100644 --- a/src/sam.cpp +++ b/src/sam.cpp @@ -120,6 +120,7 @@ void Sam::add( const Alignment& alignment, const KSeq& record, const std::string& sequence_rc, + uint8_t mapq, bool is_primary, const Details& details ) { @@ -131,8 +132,9 @@ void Sam::add( } if (!is_primary) { flags |= SECONDARY; + mapq = 255; } - add_record(record.name, flags, references.names[alignment.ref_id], alignment.ref_start, alignment.mapq, alignment.cigar, "*", -1, 0, record.seq, sequence_rc, record.qual, alignment.edit_distance, alignment.score, details); + add_record(record.name, flags, references.names[alignment.ref_id], alignment.ref_start, mapq, alignment.cigar, "*", -1, 0, record.seq, sequence_rc, record.qual, alignment.edit_distance, alignment.score, details); } // Add one individual record diff --git a/src/sam.hpp b/src/sam.hpp index c214dac4..ae9fb62a 100644 --- a/src/sam.hpp +++ b/src/sam.hpp @@ -15,7 +15,6 @@ struct Alignment { int edit_distance; int global_ed; int score; - uint8_t mapq; int length; bool is_rc; bool is_unaligned{false}; @@ -79,7 +78,7 @@ class Sam { } /* Add an alignment */ - void add(const Alignment& alignment, const klibpp::KSeq& record, const std::string& sequence_rc, bool is_primary, const Details& details); + void add(const Alignment& alignment, const klibpp::KSeq& record, const std::string& sequence_rc, uint8_t mapq, bool is_primary, const Details& details); void add_pair(const Alignment& alignment1, const Alignment& alignment2, const klibpp::KSeq& record1, const klibpp::KSeq& record2, const std::string& read1_rc, const std::string& read2_rc, uint8_t mapq1, uint8_t mapq2, bool is_proper, bool is_primary, const std::array& details); void add_unmapped(const klibpp::KSeq& record, uint16_t flags = UNMAP); void add_unmapped_pair(const klibpp::KSeq& r1, const klibpp::KSeq& r2); diff --git a/tests/test_sam.cpp b/tests/test_sam.cpp index b54a195d..d023d699 100644 --- a/tests/test_sam.cpp +++ b/tests/test_sam.cpp @@ -52,7 +52,6 @@ TEST_CASE("Sam::add") { aln.ref_start = 2; aln.edit_distance = 3; aln.score = 9; - aln.mapq = 55; aln.cigar = Cigar("2S2=1X3=3S"); std::string read_rc = reverse_complement(record.seq); @@ -61,7 +60,7 @@ TEST_CASE("Sam::add") { SUBCASE("Cigar =/X") { std::string sam_string; Sam sam(sam_string, references); - sam.add(aln, record, read_rc, is_primary, details); + sam.add(aln, record, read_rc, 55, is_primary, details); CHECK(sam_string == "readname\t16\tcontig1\t3\t55\t2S2=1X3=3S\t*\t0\t0\tACGTT\tBB#>\tNM:i:3\tAS:i:9\n" ); @@ -69,7 +68,7 @@ TEST_CASE("Sam::add") { SUBCASE("Cigar M") { std::string sam_string; Sam sam(sam_string, references, CigarOps::M); - sam.add(aln, record, read_rc, is_primary, details); + sam.add(aln, record, read_rc, 55, is_primary, details); CHECK(sam_string == "readname\t16\tcontig1\t3\t55\t2S6M3S\t*\t0\t0\tACGTT\tBB#>\tNM:i:3\tAS:i:9\n" );