Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix an M2 bug #5241

Merged
merged 1 commit into from
Oct 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMSequenceDictionary;
import org.apache.commons.lang3.mutable.MutableInt;
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
import org.broadinstitute.hellbender.utils.BaseUtils;
Expand Down Expand Up @@ -53,12 +54,15 @@ public final class PalindromeArtifactClipReadTransformer implements ReadTransfor

private final ReferenceDataSource referenceDataSource;

private final SAMSequenceDictionary sequenceDictionary;

// the artifact requires a minimum number of palindromic bases to create the single-strand loop
private final int minPalindromeSize;

public PalindromeArtifactClipReadTransformer(final ReferenceDataSource referenceDataSource, final int minPalindromeSize) {
this.referenceDataSource = referenceDataSource;
this.minPalindromeSize = minPalindromeSize;
sequenceDictionary = referenceDataSource.getSequenceDictionary();
}

@Override
Expand All @@ -85,15 +89,22 @@ public GATKRead apply(final GATKRead read) {
final int numBasesToCompare = Math.min(potentialArtifactBaseCount + minPalindromeSize, read.getLength());

// the reference position of bases that are the reverse complement of the suspected artifact
final String contig = read.getContig();
final int refStart = readIsUpstreamOfMate ? adaptorBoundary - numBasesToCompare : adaptorBoundary + 1;
final int refEnd = readIsUpstreamOfMate ? adaptorBoundary - 1 : adaptorBoundary + numBasesToCompare;

// it's possible that the read's assigned fragment length / mate start were incompatible with the contig length,
// so we explicitly check for this case to avoid errors below. We can't rely on the alignment!
if (refStart < 1 || refEnd > sequenceDictionary.getSequence(contig).getSequenceLength()) {
return read;
}

// if the reference bases overlap the soft-clipped bases, it's not an artifact. Note that read.getStart() is the unclipped start
// this can happen, for a read with a huge soft-clip that overlaps its mate significantly.
if ( (readIsUpstreamOfMate && refStart < read.getStart()) || (!readIsUpstreamOfMate && read.getEnd() < refEnd)) {
return read;
}
final SimpleInterval refInterval = new SimpleInterval(read.getContig(), refStart, refEnd);
final SimpleInterval refInterval = new SimpleInterval(contig, refStart, refEnd);
final MutableInt numMatch = new MutableInt(0);

// we traverse the reference in the forward direction, hence the read in the reverse direction
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ public class PalindromeArtifactClipReadTransformerUnitTest {
@Test
public void test() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final String contig = header.getSequence(0).toString();
final String contig = header.getSequence(0).getSequenceName();
final int refStart = 1000;
final int contigLength = header.getSequence(0).getSequenceLength();

// some random reference sequence -- no ITR yet
final byte[] refBases = ("GATTCCCCAAGGGGGCTGCTCCCAGAGGGTGTGTTGCTGGGATTGCCCAGGACAGGGATGGCCCTCTCATCAGGTGGGGG" +
Expand Down Expand Up @@ -84,6 +85,13 @@ public void test() {
final GATKRead definitelyNotArtifactRead = makeRead(header, contig, readStart, fragmentLength, readBasesMatchingRef, quals, cigar);
Assert.assertTrue(transformer.apply(definitelyNotArtifactRead).getCigar().getFirstCigarElement().getOperator() == CigarOperator.SOFT_CLIP);

// reads would cause chromosome out-of-bounds error if we didn't check explicitly for this edge case
final GATKRead mateOffContigRead = makeRead(header, contig, readStart, contigLength, readBasesMatchingRef, quals, cigar);
Assert.assertTrue(transformer.apply(mateOffContigRead) == mateOffContigRead);

final GATKRead mateOffContigRead2 = makeRead(header, contig, readStart, -contigLength, readBasesMatchingRef, quals, cigar);
Assert.assertTrue(transformer.apply(mateOffContigRead2) == mateOffContigRead2);

}

private static GATKRead makeRead(final SAMFileHeader header, final String contig, final int readStart, final int fragmentLength, final byte[] bases, final byte[] qual, final String cigar) {
Expand Down