Skip to content

Commit

Permalink
Merge pull request #3004 from vgteam/fix-surject-orientation
Browse files Browse the repository at this point in the history
Fix surject orientation
  • Loading branch information
adamnovak authored Sep 28, 2020
2 parents 3b59993 + a64b70c commit 47c1b30
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 10 deletions.
9 changes: 7 additions & 2 deletions src/alignment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ bam1_t* alignment_to_bam_internal(bam_hdr_t* header,
bool paired,
const int32_t tlen_max) {

// this table does seem to be reproduced in htslib publicly, so I'm copying
// this table doesn't seem to be reproduced in htslib publicly, so I'm copying
// it from the CRAM conversion code
static const char nt_encoding[256] = {
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
Expand Down Expand Up @@ -682,9 +682,14 @@ bam1_t* alignment_to_bam_internal(bam_hdr_t* header,
uint8_t* seq_data = (uint8_t*) (cigar_data + cigar.size());
const string* seq = &alignment.sequence();
string rev_seq;
const string* qual = &alignment.quality();
string rev_qual;
if (refrev) {
// Sequence and quality both need to be flipped to target forward orientation
rev_seq = reverse_complement(*seq);
seq = &rev_seq;
reverse_copy(qual->begin(), qual->end(), back_inserter(rev_qual));
qual = &rev_qual;
}
for (size_t i = 0; i < alignment.sequence().size(); i += 2) {
if (i + 1 < alignment.sequence().size()) {
Expand All @@ -703,7 +708,7 @@ bam1_t* alignment_to_bam_internal(bam_hdr_t* header,
qual_data[i] = '\xff';
}
else {
qual_data[i] = alignment.quality().at(i);
qual_data[i] = qual->at(i);
}
}

Expand Down
6 changes: 0 additions & 6 deletions src/hts_alignment_emitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,6 @@ void HTSAlignmentEmitter::convert_unpaired(Alignment& aln, bam_hdr_t* header, ve
string path_name;
convert_alignment(aln, cigar, pos_rev, pos, path_name);

// TODO: We're passing along a text header so we can make a SAM file so
// we can make a BAM record by re-reading it, which we can then
// possibly output as SAM again. Make this less complicated.
dest.emplace_back(alignment_to_bam(header,
aln,
path_name,
Expand All @@ -224,9 +221,6 @@ void HTSAlignmentEmitter::convert_paired(Alignment& aln1, Alignment& aln2, bam_h
// Determine the TLEN for each read.
auto tlens = compute_template_lengths(pos1, cigar1, pos2, cigar2);

// TODO: We're passing along a text header so we can make a SAM file so
// we can make a BAM record by re-reading it, which we can then
// possibly output as SAM again. Make this less complicated.
dest.emplace_back(alignment_to_bam(header,
aln1,
path_name1,
Expand Down
1 change: 1 addition & 0 deletions src/hts_alignment_emitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class HTSAlignmentEmitter : public AlignmentEmitter {
/// Remember the HTSlib mode string we need to open our files.
string hts_mode;

/// Describe the given alignment as a CIGAR and start position.
virtual void convert_alignment(const Alignment& aln, vector<pair<int, char>>& cigar, bool& pos_rev, int64_t& pos, string& path_name) const;

/// Convert an unpaired alignment to HTS format.
Expand Down
16 changes: 14 additions & 2 deletions test/t/15_vg_surject.t
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ BASH_TAP_ROOT=../deps/bash-tap
PATH=../bin:$PATH # for vg


plan tests 27
plan tests 28

vg construct -r small/x.fa >j.vg
vg index -x j.xg j.vg
Expand Down Expand Up @@ -60,8 +60,20 @@ is $(vg map -s GTTATTTACTATGAATCCTCACCTTCCTTGACTTCTTGAAACATTTGGCTATTGACCTCTTTCTC
# These sequences have edits in them, so we can test CIGAR reversal as well
SEQ="ACCGTCATCTTCAAGTTTGAAAATTGCATCTCAAATCTAAGACCCAGAGGGCTCACCCAGAGTCGAGGCTCAAGGACAGCTCTCCTTTGTGTCCAGAGTG"
SEQ_RC="CACTCTGGACACAAAGGAGAGCTGTCCTTGAGCCTCGACTCTGGGTGAGCCCTCTGGGTCTTAGATTTGAGATGCAATTTTCAAACTTGAAGATGACGGT"
QUAL="CCCFFFFFHHHHHJJJJJHFDDDD&((((+>(26:&)()(+((+3((8A(280<32(+(&+(38>B&&)&&)2(+(&)&))8((28()0&09&05&05<&"
QUAL_R="&<50&50&90&0)(82((8))&)&(+(2)&&)&&B>83(+&(+(23<082(A8((3+((+()()&:62(>+((((&DDDDFHJJJJJHHHHHFFFFFCCC"

is "$(vg map -s $SEQ -g x.gcsa -x x.xg | vg surject -p x -x x.xg - -s | cut -f1,3,4,5,6,7,8,9,10)" "$(vg map -s $SEQ_RC -g x.gcsa -x x.xg | vg surject -p x -x x.xg - -s | cut -f1,3,4,5,6,7,8,9,10)" "forward and reverse orientations of a read produce the same surjected SAM, ignoring flags"
printf "@read\n${SEQ}\n+\n${QUAL}\n" > fwd.fq
printf "@read\n${SEQ_RC}\n+\n${QUAL_R}\n" > rev.fq

vg map -f fwd.fq -g x.gcsa -x x.xg > mapped.fwd.gam
vg map -f rev.fq -g x.gcsa -x x.xg > mapped.rev.gam

is "$(vg view -aj mapped.rev.gam | jq -r '.quality' | base64 -d | xxd -p -c1 | tac | xxd -p -r | xxd)" "$(vg view -aj mapped.fwd.gam | jq -r '.quality' | base64 -d | xxd)" "quality strings we will use for testing are oriented correctly"

is "$(vg surject -p x -x x.xg mapped.fwd.gam -s | cut -f1,3,4,5,6,7,8,9,10,11)" "$(vg surject -p x -x x.xg mapped.rev.gam -s | cut -f1,3,4,5,6,7,8,9,10,11)" "forward and reverse orientations of a read produce the same surjected SAM, ignoring flags"

rm -f fwd.fq rev.fq mapped.fwd.gam mapped.rev.gam

is $(vg map -G <(vg sim -a -n 100 -x x.xg) -g x.gcsa -x x.xg | vg surject -p x -x x.xg -b - | samtools view - | wc -l) \
100 "vg surject produces valid BAM output"
Expand Down

1 comment on commit 47c1b30

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 19092 seconds

Please sign in to comment.