Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement heuristics that align haplotypes to reads without calling Smith-Waterman #6015

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7cdde7c
added counts for SW and non-SW calls
jonathoncohen98 Jun 19, 2019
0ef2b14
add another aligner object for testing
jonathoncohen98 Jun 25, 2019
b456351
edited for David Benjamin's comments
jonathoncohen98 Jun 25, 2019
8ff93a2
added test for multiple oneMismatches
jonathoncohen98 Jun 25, 2019
323072a
stylistic change with brackes
jonathoncohen98 Jun 25, 2019
5a7344a
removed test code
jonathoncohen98 Jun 26, 2019
e175714
include correct paramaters before heuristic execution
jonathoncohen98 Jun 26, 2019
c9b20ec
edited for David Benjamin's final comments for stage 1
jonathoncohen98 Jul 2, 2019
21532cb
added atMostOneIndel heuristic
jonathoncohen98 Jul 2, 2019
4735712
added test for atMostOneIndel
jonathoncohen98 Jul 2, 2019
16753db
added oneIndel heuristic to alignment and haplotypeToRef aligner obje…
jonathoncohen98 Jul 3, 2019
6713458
completed indel alignment implementation
jonathoncohen98 Jul 3, 2019
6f2fd00
for indel detection, traverse backwards first and then forward
jonathoncohen98 Jul 3, 2019
07e6968
removed test code
jonathoncohen98 Jul 3, 2019
2e5f3e9
consolidated all mismatch heuristics into one method using allowedMis…
jonathoncohen98 Jul 8, 2019
3d6cf81
extracted private indel method to avoid code duplication
jonathoncohen98 Jul 8, 2019
a7e5ef4
edited for david Benjamin's comments on indel stage
jonathoncohen98 Jul 8, 2019
056ac96
fixed allowed indel length
jonathoncohen98 Jul 8, 2019
07fdbb3
cleaned up align method
jonathoncohen98 Jul 11, 2019
6eef7be
added method that aligns read-bestHap given 1 indel
jonathoncohen98 Jul 25, 2019
ccfdb9c
control code to measure runtime
jonathoncohen98 Aug 5, 2019
c8f4af8
fixed formatting error
jonathoncohen98 Aug 6, 2019
46d6299
fixed totalCallsToSW count
jonathoncohen98 Aug 6, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanJavaAligner;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
Expand Down Expand Up @@ -699,7 +700,9 @@ private List<VariantContext> referenceModelForNoVariation(final AssemblyRegion r
*/
public void shutdown() {
likelihoodCalculationEngine.close();

aligner.close();

if ( haplotypeBAMWriter.isPresent() ) {
haplotypeBAMWriter.get().close();
}
Expand All @@ -710,8 +713,6 @@ public void shutdown() {
throw new RuntimeIOException(e);
}
}


}

private Set<GATKRead> filterNonPassingReads( final AssemblyRegion activeRegion ) {
Expand Down
19 changes: 19 additions & 0 deletions src/main/java/org/broadinstitute/hellbender/utils/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,25 @@ public static int lastIndexOf(final byte[] reference, final byte[] query) {
return -1;
}

public static int lastIndexOfAtMostOneMismatch(final byte[] reference, final byte[] query) {
jonathoncohen98 marked this conversation as resolved.
Show resolved Hide resolved
int queryLength = query.length;

// start search from the last possible matching position and search to the left
for (int refIndex = reference.length - queryLength; refIndex >= 0; refIndex--) {
int mismatchCount = 0;
for (int queryIndex = 0; queryIndex < queryLength && mismatchCount < 2; queryIndex++) {
if (reference[refIndex+queryIndex] != query[queryIndex]) {
mismatchCount++;
}
}

if (mismatchCount < 2) {
return refIndex;
}
}
return -1;
}

/**
* Simple wrapper for sticking elements of a int[] array into a List<Integer>
* @param ar - the array whose elements should be listified
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ public static GATKRead createReadAlignedToRef(final GATKRead originalRead,

// compute the smith-waterman alignment of read -> haplotype
final SmithWatermanAlignment swPairwiseAlignment = aligner.align(haplotype.getBases(), originalRead.getBases(), CigarUtils.ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS, SWOverhangStrategy.SOFTCLIP);

if ( swPairwiseAlignment.getAlignmentOffset() == -1 ) {
// sw can fail (reasons not clear) so if it happens just don't realign the read
return originalRead;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,24 @@ public SWNativeAlignerWrapper(final SWAlignerNativeBinding aligner) {
this.aligner = aligner;
}


//methods for printing out number of SW/non-SW alignments. not needed
public int getNumOfAlignments()
jonathoncohen98 marked this conversation as resolved.
Show resolved Hide resolved
{
System.out.println("SW native wrapper being used");
return 0;
}
public int noSW()
{
System.out.println("native SW being used");
return 0;
}
public int yesSW()
{
System.out.println("native SW being used");
return 0;
}

@Override
public SmithWatermanAlignment align(final byte[] reference, final byte[] alternate, final SWParameters parameters, final SWOverhangStrategy overhangStrategy){
long startTime = System.nanoTime();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
public final class SmithWatermanJavaAligner implements SmithWatermanAligner {
private static final SmithWatermanJavaAligner ALIGNER = new SmithWatermanJavaAligner();
private long totalComputeTime = 0;
private long totalHeuristicTime = 0;
jonathoncohen98 marked this conversation as resolved.
Show resolved Hide resolved
private long totalMismatchHeuristicTime = 0;

/**
* return the stateless singleton instance of SmithWatermanJavaAligner
Expand Down Expand Up @@ -74,24 +76,38 @@ public SmithWatermanAlignment align(final byte[] reference, final byte[] alterna

final SmithWatermanAlignment alignmentResult;

//if exact match
if (matchIndex != -1) {
// generate the alignment result when the substring search was successful
final List<CigarElement> lce = new ArrayList<>(alternate.length);
lce.add(makeElement(State.MATCH, alternate.length));
final List<CigarElement> lce = Collections.singletonList(makeElement(State.MATCH, alternate.length));
alignmentResult = new SWPairwiseAlignmentResult(AlignmentUtils.consolidateCigar(new Cigar(lce)), matchIndex);
}
else {
// run full Smith-Waterman
final int n = reference.length+1;
final int m = alternate.length+1;
final int[][] sw = new int[n][m];
final int[][] btrack=new int[n][m];

calculateMatrix(reference, alternate, sw, btrack, overhangStrategy, parameters);
alignmentResult = calculateCigar(sw, btrack, overhangStrategy); // length of the segment (continuous matches, insertions or deletions)
int matchIndex2 = -1;
jonathoncohen98 marked this conversation as resolved.
Show resolved Hide resolved
if (overhangStrategy == SWOverhangStrategy.SOFTCLIP || overhangStrategy == SWOverhangStrategy.IGNORE) {
//look for one mismatch
matchIndex2 = Utils.lastIndexOfAtMostOneMismatch(reference, alternate);
}

if (matchIndex2 != -1) {
// generate the alignment result when the substring search was successful
final List<CigarElement> lce = Collections.singletonList(makeElement(State.MATCH, alternate.length));
alignmentResult = new SWPairwiseAlignmentResult(AlignmentUtils.consolidateCigar(new Cigar(lce)), matchIndex2);
}
else{
// run full Smith-Waterman
final int n = reference.length+1;
final int m = alternate.length+1;
final int[][] sw = new int[n][m];
final int[][] btrack=new int[n][m];

calculateMatrix(reference, alternate, sw, btrack, overhangStrategy, parameters);
alignmentResult = calculateCigar(sw, btrack, overhangStrategy); // length of the segment (continuous matches, insertions or deletions)
}
}

totalComputeTime += System.nanoTime() - startTime;

return alignmentResult;
}

Expand Down
103 changes: 103 additions & 0 deletions src/test/java/org/broadinstitute/hellbender/utils/UtilsUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,109 @@ public void testLastIndexOfLastBoundaries() {
Assert.assertEquals(result, expected);
}

@Test
public void testLastIndexOfAtMostOneMismatchLastBoundaries() {
final String reference = "AAAACCCCTTTTGGGG";

// match right boundary of reference
final String query1 = "TGAGG";
int result = Utils.lastIndexOfAtMostOneMismatch(reference.getBytes(), query1.getBytes());
int expected = reference.length() - query1.length();
Assert.assertEquals(result, expected);

// match left boundary of reference
final String query2 = "AAGAC";
result = Utils.lastIndexOfAtMostOneMismatch(reference.getBytes(), query2.getBytes());
Assert.assertEquals(result, 0);
}

@Test
public void testLastIndexOfAtMostOneMismatchRandomExactMatch() {
final int numTests = 100;
final int referenceLength = 100;
final int queryLength = 10;

byte[] reference = new byte[referenceLength];
byte[] query = new byte[queryLength];

final Random rng = Utils.getRandomGenerator();

for (int i = 0; i < numTests; i++) {
randomByteString(rng, reference);
randomByteString(rng, query);

int index = -1;

//add query to reference at a random lo cation for 75% of the tests
if (i % 4 > 0) {
index = rng.nextInt(referenceLength - queryLength);
System.arraycopy(query,0, reference, index, queryLength);
}

final int result = Utils.lastIndexOfAtMostOneMismatch(reference, query);
final int expected = index;
Assert.assertEquals(result, expected);
}
}

@Test
public void testLastIndexOfAtMostOneMismatchTwoMismatches() {
final String reference = "AAAACCCCTTTTGGGG";

// match right boundary of reference
final String query1 = "AGAGG";
int result = Utils.lastIndexOfAtMostOneMismatch(reference.getBytes(), query1.getBytes());
Assert.assertEquals(result, -1);

// match right boundary of reference
final String query2 = "GGAAC";
int result2 = Utils.lastIndexOfAtMostOneMismatch(reference.getBytes(), query2.getBytes());
Assert.assertEquals(result2, -1);
}

@Test
public void testLastIndexOfAtMostOneMismatchOneMismatchTwice() {
final String reference = "AGGCCCCCTTTTGGGG";

//matches both boundaries of reference
final String query1 = "AGGG";
int result = Utils.lastIndexOfAtMostOneMismatch(reference.getBytes(), query1.getBytes());
//will catch first match it encounters from the right
int expected = reference.length() - query1.length();
Assert.assertEquals(result, expected);
}

@Test
public void testLastIndexOfAtMostOneMismatchRandom() {
final int numTests = 100;
final int referenceLength = 100;
final int queryLength = 10;

byte[] reference = new byte[referenceLength];
byte[] query = new byte[queryLength];

final Random rng = Utils.getRandomGenerator();
jonathoncohen98 marked this conversation as resolved.
Show resolved Hide resolved

for (int i = 0; i < numTests; i++) {
randomByteString(rng, reference);
randomByteString(rng, query);

int index = -1;

// add one-off query to reference at a random location for 75% of the tests
if (i % 4 > 0) {
index = rng.nextInt(referenceLength - queryLength);
System.arraycopy(query,0, reference, index, queryLength);
int mismatchPosition = rng.nextInt(queryLength);
reference[index + mismatchPosition] = BaseUtils.simpleComplement(query[mismatchPosition]);
}

final int result = Utils.lastIndexOfAtMostOneMismatch(reference, query);
final int expected = index;
Assert.assertEquals(result, expected);
}
}

private void randomByteString(Random rng, byte[] bytes) {
for (int i = 0; i < bytes.length; i++) {
bytes[i] = (byte)(rng.nextInt(94) + 32);
Expand Down