From 5640c8841f9f5ba88d3c20bc664afd3dfeba382e Mon Sep 17 00:00:00 2001 From: Marcel Martin Date: Tue, 14 Nov 2023 12:36:19 +0100 Subject: [PATCH] Fix matches ending too early in PAF output Closes #333 --- CHANGES.md | 1 + src/aln.cpp | 4 +--- src/paf.cpp | 12 ++++++------ src/paf.hpp | 4 ++-- tests/phix.pe.paf | 22 +++++++++++----------- tests/phix.se.paf | 22 +++++++++++----------- 6 files changed, 32 insertions(+), 33 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 70bf27d3..2f7b36a2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -14,6 +14,7 @@ * #318: Added a `--details` option mainly intended for debugging. When used, some strobealign-specific tags are added to the SAM output that inform about things like no. of seeds found, whether mate rescue was performed etc. +* #333: Fix matches ending too early in PAF output. ## v0.11.0 (2023-06-22) diff --git a/src/aln.cpp b/src/aln.cpp index 9fadc01f..aa392309 100644 --- a/src/aln.cpp +++ b/src/aln.cpp @@ -1019,11 +1019,9 @@ void align_PE_read( nam_read2); output_hits_paf_PE(outstring, nam_read1, record1.name, references, - index_parameters.syncmer.k, record1.seq.length()); output_hits_paf_PE(outstring, nam_read2, record2.name, references, - index_parameters.syncmer.k, record2.seq.length()); } else { align_PE(aligner, sam, nams1, nams2, record1, @@ -1075,7 +1073,7 @@ void align_SE_read( Timer extend_timer; if (!map_param.is_sam_out) { - output_hits_paf(outstring, nams, record.name, references, index_parameters.syncmer.k, + output_hits_paf(outstring, nams, record.name, references, record.seq.length()); } else { align_SE( diff --git a/src/paf.cpp b/src/paf.cpp index 112dea3a..69b18ce5 100644 --- a/src/paf.cpp +++ b/src/paf.cpp @@ -14,7 +14,7 @@ * 11 alignment block length * 12 mapping quality (0-255; 255 for missing) */ -void output_hits_paf_PE(std::string &paf_output, const Nam &n, const std::string &query_name, const References& references, int k, int read_len) { +void output_hits_paf_PE(std::string &paf_output, const Nam &n, const std::string &query_name, const References& references, int read_len) { if (n.ref_start < 0 ) { return; } @@ -24,7 +24,7 @@ void output_hits_paf_PE(std::string &paf_output, const Nam &n, const std::string paf_output.append("\t"); paf_output.append(std::to_string(n.query_start)); paf_output.append("\t"); - paf_output.append(std::to_string(n.query_prev_hit_startpos + k)); + paf_output.append(std::to_string(n.query_end)); paf_output.append("\t"); paf_output.append(n.is_rc ? "-" : "+"); paf_output.append("\t"); @@ -34,21 +34,21 @@ void output_hits_paf_PE(std::string &paf_output, const Nam &n, const std::string paf_output.append("\t"); paf_output.append(std::to_string(n.ref_start)); paf_output.append("\t"); - paf_output.append(std::to_string(n.ref_prev_hit_startpos + k)); + paf_output.append(std::to_string(n.ref_end)); paf_output.append("\t"); paf_output.append(std::to_string(n.n_hits)); paf_output.append("\t"); - paf_output.append(std::to_string(n.ref_prev_hit_startpos + k - n.ref_start)); + paf_output.append(std::to_string(n.ref_end - n.ref_start)); paf_output.append("\t255\n"); } -void output_hits_paf(std::string &paf_output, const std::vector &all_nams, const std::string& query_name, const References& references, int k, int read_len) { +void output_hits_paf(std::string &paf_output, const std::vector &all_nams, const std::string& query_name, const References& references, int read_len) { // Output results if (all_nams.empty()) { return; } // Only output single best hit based on: number of randstrobe-matches times span of the merged match. Nam n = all_nams[0]; - output_hits_paf_PE(paf_output, n, query_name, references, k, read_len); + output_hits_paf_PE(paf_output, n, query_name, references, read_len); } diff --git a/src/paf.hpp b/src/paf.hpp index 383e9ae1..5d9675a4 100644 --- a/src/paf.hpp +++ b/src/paf.hpp @@ -6,11 +6,11 @@ #include "nam.hpp" void output_hits_paf_PE( - std::string &paf_output, const Nam &n, const std::string &query_name, const References& references, int k, int read_len + std::string &paf_output, const Nam &n, const std::string &query_name, const References& references, int read_len ); void output_hits_paf( - std::string &paf_output, const std::vector &all_nams, const std::string& query_name, const References& references, int k, int read_len + std::string &paf_output, const std::vector &all_nams, const std::string& query_name, const References& references, int read_len ); #endif diff --git a/tests/phix.pe.paf b/tests/phix.pe.paf index 54ba0bfd..d3c94375 100644 --- a/tests/phix.pe.paf +++ b/tests/phix.pe.paf @@ -1,11 +1,11 @@ -SRR1377138.37 301 8 238 - NC_001422.1 5386 800 1030 38 230 255 -SRR1377138.37 301 6 254 + NC_001422.1 5386 707 955 21 248 255 -SRR1377138.38 301 32 239 - NC_001422.1 5386 4971 5178 39 207 255 -SRR1377138.38 301 2 189 + NC_001422.1 5386 4839 5026 29 187 255 -SRR1377138.39/1 301 1 259 - NC_001422.1 5386 1791 2049 45 258 255 -SRR1377138.39/2 301 22 257 + NC_001422.1 5386 1709 1944 45 235 255 -SRR1377138.40 301 4 251 - NC_001422.1 5386 3020 3267 36 247 255 -SRR1377138.40 301 3 267 + NC_001422.1 5386 2957 3221 37 264 255 -rescuable.42 301 4 251 - NC_001422.1 5386 3020 3267 36 247 255 -rescuable.43 301 3 267 + NC_001422.1 5386 2957 3221 37 264 255 -not.rescuable 301 4 251 - NC_001422.1 5386 3020 3267 36 247 255 +SRR1377138.37 301 8 299 - NC_001422.1 5386 800 1091 38 291 255 +SRR1377138.37 301 6 299 + NC_001422.1 5386 707 1000 21 293 255 +SRR1377138.38 301 32 284 - NC_001422.1 5386 4971 5223 39 252 255 +SRR1377138.38 301 2 248 + NC_001422.1 5386 4839 5085 29 246 255 +SRR1377138.39/1 301 1 295 - NC_001422.1 5386 1791 2085 45 294 255 +SRR1377138.39/2 301 22 293 + NC_001422.1 5386 1709 1980 45 271 255 +SRR1377138.40 301 4 293 - NC_001422.1 5386 3020 3309 36 289 255 +SRR1377138.40 301 3 297 + NC_001422.1 5386 2957 3251 37 294 255 +rescuable.42 301 4 293 - NC_001422.1 5386 3020 3309 36 289 255 +rescuable.43 301 3 297 + NC_001422.1 5386 2957 3251 37 294 255 +not.rescuable 301 4 293 - NC_001422.1 5386 3020 3309 36 289 255 diff --git a/tests/phix.se.paf b/tests/phix.se.paf index 8bf70afe..2852c1c4 100644 --- a/tests/phix.se.paf +++ b/tests/phix.se.paf @@ -1,11 +1,11 @@ -SRR1377138.32 301 2 257 + NC_001422.1 5386 1434 1689 41 255 255 -SRR1377138.33 301 2 228 + NC_001422.1 5386 3818 4044 40 226 255 -SRR1377138.34 301 33 241 - NC_001422.1 5386 844 1052 37 208 255 -SRR1377138.35 301 5 262 - NC_001422.1 5386 4041 4298 45 257 255 -SRR1377138.36 301 3 263 + NC_001422.1 5386 4997 5257 47 260 255 -SRR1377138.37 301 8 238 - NC_001422.1 5386 800 1030 38 230 255 -SRR1377138.38 301 32 239 - NC_001422.1 5386 4971 5178 39 207 255 -SRR1377138.39/1 301 1 259 - NC_001422.1 5386 1791 2049 45 258 255 -SRR1377138.40 301 4 251 - NC_001422.1 5386 3020 3267 36 247 255 -rescuable.42 301 4 251 - NC_001422.1 5386 3020 3267 36 247 255 -not.rescuable 301 4 251 - NC_001422.1 5386 3020 3267 36 247 255 +SRR1377138.32 301 2 293 + NC_001422.1 5386 1434 1725 41 291 255 +SRR1377138.33 301 2 267 + NC_001422.1 5386 3818 4083 40 265 255 +SRR1377138.34 301 33 299 - NC_001422.1 5386 844 1110 37 266 255 +SRR1377138.35 301 5 298 - NC_001422.1 5386 4041 4334 45 293 255 +SRR1377138.36 301 3 301 + NC_001422.1 5386 4997 5295 47 298 255 +SRR1377138.37 301 8 299 - NC_001422.1 5386 800 1091 38 291 255 +SRR1377138.38 301 32 284 - NC_001422.1 5386 4971 5223 39 252 255 +SRR1377138.39/1 301 1 295 - NC_001422.1 5386 1791 2085 45 294 255 +SRR1377138.40 301 4 293 - NC_001422.1 5386 3020 3309 36 289 255 +rescuable.42 301 4 293 - NC_001422.1 5386 3020 3309 36 289 255 +not.rescuable 301 4 293 - NC_001422.1 5386 3020 3309 36 289 255