From 9d9484f04e765032a13a4b9833969a96f35ffa40 Mon Sep 17 00:00:00 2001 From: Steve Huang Date: Mon, 23 Jul 2018 14:37:01 -0400 Subject: [PATCH] (SV) addressing more edge cases in assembly alignments (#5044) * ticket 4951 * two neighboring gapped alignments, after gap split, generate unsorted alignments * also a new utility method in SVInterval --- .../AssemblyContigAlignmentsConfigPicker.java | 53 ++++++++- ...ChimericAlignmentIterativeInterpreter.java | 7 +- .../tools/spark/sv/utils/SVInterval.java | 4 + ...yContigAlignmentsConfigPickerUnitTest.java | 104 ++++++++++++++++++ .../spark/sv/utils/SVIntervalUnitTest.java | 32 ++++++ 5 files changed, 193 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPicker.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPicker.java index 9106f82a000..ba926050cfd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPicker.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPicker.java @@ -571,16 +571,27 @@ public static Iterator reConstructContigF return bestConfigurations.stream() .map(mappings -> splitGaps(mappings, false)) .map(mappings -> removeNonUniqueMappings(mappings, ALIGNMENT_MQ_THRESHOLD, ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD)) + .filter(mappings -> alignmentShouldNotBeStitchedTogether(mappings.goodMappings)) .map(mappings -> createContigGivenClassifiedAlignments(contigName, contigSeq, mappings, true)) .sorted(getConfigurationComparator()) .iterator(); } else { final GoodAndBadMappings intermediate = splitGaps(bestConfigurations.get(0), false); final GoodAndBadMappings result = removeNonUniqueMappings(intermediate, ALIGNMENT_MQ_THRESHOLD, ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD); - return Collections.singletonList(createContigGivenClassifiedAlignments(contigName, contigSeq, result, false)).iterator(); + if (alignmentShouldNotBeStitchedTogether(result.goodMappings)) + return Collections.singletonList(createContigGivenClassifiedAlignments(contigName, contigSeq, result, false)).iterator(); + else { + return Collections.emptyIterator(); + } } } + private static boolean alignmentShouldNotBeStitchedTogether(final List alignments) { + return alignments.size() != 2 + || + ! simpleChimeraWithStichableAlignments(alignments.get(0), alignments.get(1)); + } + private static AssemblyContigWithFineTunedAlignments createContigGivenClassifiedAlignments(final String contigName, final byte[] contigSeq, final GoodAndBadMappings goodAndBadMappings, final boolean setResultContigAsAmbiguous) { @@ -630,7 +641,8 @@ static Comparator getConfigurationCompara * 2) drop the gap-split child alignments whose read spans are contained in other alignment spans * TODO: based on evaluation done on 2018-06-30, making either choice has small effect on the final call set; one could further evaluate options when making improvements. */ - private static GoodAndBadMappings splitGaps(final GoodAndBadMappings configuration, final boolean keepSplitChildrenTogether) { + @VisibleForTesting + static GoodAndBadMappings splitGaps(final GoodAndBadMappings configuration, final boolean keepSplitChildrenTogether) { return keepSplitChildrenTogether ? splitGapsAndKeepChildrenTogether(configuration) : splitGapsAndDropAlignmentContainedByOtherOnRead(configuration); } @@ -691,6 +703,7 @@ static GoodAndBadMappings splitGapsAndKeepChildrenTogether(final GoodAndBadMappi bad.addAll( Lists.newArrayList(pair._2) ); } } + good.sort(AlignedContig.getAlignmentIntervalComparator()); return new GoodAndBadMappings(good, bad, configuration.goodMappingToNonCanonicalChromosome); } @@ -725,6 +738,7 @@ static GoodAndBadMappings splitGapsAndDropAlignmentContainedByOtherOnRead(final } } gapSplit.removeAll(bad); + gapSplit.sort(AlignedContig.getAlignmentIntervalComparator()); return new GoodAndBadMappings(gapSplit, bad, configuration.goodMappingToNonCanonicalChromosome); } @@ -857,7 +871,8 @@ private static final class TempMaxOverlapInfo { * Extract the max overlap information, front and back, for each alignment in {@code configuration}. * For each alignment, the corresponding tuple2 has the max (front, rear) overlap base counts. */ - private static Map> getMaxOverlapPairs(final List configuration) { + @VisibleForTesting + static Map> getMaxOverlapPairs(final List configuration) { final List intermediateResult = new ArrayList<>(Collections.nCopies(configuration.size(), new TempMaxOverlapInfo())); @@ -904,4 +919,36 @@ private static Map> getMaxOverlapPai return maxOverlapMap; } + + /** + * See the funny alignment signature described in ticket 4951 on GATK github + * @param intervalOne assumed to start no later than {@code intervalTwo} on the read + * @param intervalTwo assumed to start no earlier than {@code intervalOne} on the read + * @return true if the two given intervals can be stitched together + * @throws IllegalArgumentException if the two intervals are not sorted according to their {@link AlignmentInterval#startInAssembledContig} + */ + public static boolean simpleChimeraWithStichableAlignments(final AlignmentInterval intervalOne, final AlignmentInterval intervalTwo) { + if ( intervalOne.startInAssembledContig > intervalTwo.startInAssembledContig ) + throw new IllegalArgumentException("Assumption that input intervals are sorted by their starts on read is violated.\tFirst: " + + intervalOne.toPackedString() + "\tSecond: " + intervalTwo.toPackedString()); + if ( ! intervalOne.referenceSpan.getContig().equals(intervalTwo.referenceSpan.getContig())) + return false; + if (intervalOne.forwardStrand != intervalTwo.forwardStrand) + return false; + if ( intervalOne.containsOnRead(intervalTwo) || intervalTwo.containsOnRead(intervalOne) ) + return false; + if ( intervalOne.containsOnRef(intervalTwo) || intervalTwo.containsOnRef(intervalOne) ) + return false; + final boolean refOrderSwap = intervalOne.forwardStrand != (intervalOne.referenceSpan.getStart() < intervalTwo.referenceSpan.getStart()); + if (refOrderSwap) + return false; + final int overlapOnContig = AlignmentInterval.overlapOnContig(intervalOne, intervalTwo); + final int overlapOnRefSpan = AlignmentInterval.overlapOnRefSpan(intervalOne, intervalTwo); + if (overlapOnContig == 0 && overlapOnRefSpan == 0) { + final boolean canBeStitched = intervalTwo.referenceSpan.getStart() - intervalOne.referenceSpan.getEnd() == 1 + && intervalTwo.startInAssembledContig - intervalOne.endInAssembledContig == 1 ; + return canBeStitched; + } else + return overlapOnContig == overlapOnRefSpan; + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ContigChimericAlignmentIterativeInterpreter.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ContigChimericAlignmentIterativeInterpreter.java index 2dcd21e636f..50f64644500 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ContigChimericAlignmentIterativeInterpreter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/inference/ContigChimericAlignmentIterativeInterpreter.java @@ -14,10 +14,7 @@ import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVType; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputMetaData; import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryUtils; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments; -import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.*; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree; import scala.Tuple2; @@ -149,6 +146,8 @@ public static List parseOneContig(final AlignedContig alignedCont if (filterAlignmentByMqOrLength) { if (firstAlignmentIsTooShort(current, next, uniqueRefSpanThreshold)) { continue; + } else if (AssemblyContigAlignmentsConfigPicker.simpleChimeraWithStichableAlignments(current, next)) { + continue; } else if (nextAlignmentMayBeInsertion(current, next, mapQualThresholdInclusive, uniqueRefSpanThreshold, filterWhollyContainedAlignments)) { if (iterator.hasNext()) { insertionMappings.add(next.toPackedString()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVInterval.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVInterval.java index 2986778c90e..754610f97b1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVInterval.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVInterval.java @@ -58,6 +58,10 @@ public boolean isUpstreamOf( final SVInterval that ) { return this.contig < that.contig || (this.contig == that.contig && this.end <= that.start); } + public boolean contains( final SVInterval that ) { + return this.contig == that.contig && this.start <= that.start && this.end >= that.end; + } + public int gapLen( final SVInterval that ) { if ( this.contig != that.contig ) return Integer.MAX_VALUE; return that.start - this.end; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java index b8627329582..846e212e7db 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/discovery/alignment/AssemblyContigAlignmentsConfigPickerUnitTest.java @@ -4,6 +4,7 @@ import htsjdk.samtools.TextCigarCodec; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.spark.SparkContextFactory; +import org.broadinstitute.hellbender.tools.spark.sv.discovery.TestUtilsForAssemblyBasedSVDiscovery; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.read.CigarUtils; import org.testng.Assert; @@ -471,4 +472,107 @@ public void testConfigurationSorting(final List data = new ArrayList<>(20); + + AlignmentInterval zero = fromSAMRecordString("asm019085:tig00005\t2064\tchr8\t38525855\t0\t525H36M7D70M\t*\t0\t0\tCCTTCCCTTCCCTTCCCCTTCCTTCCTTTCTTCCTTCCTTCCCTTCCCTTCCCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCC\t*\tSA:Z:chr20,43927117,-,374M257S,60,3;chr20,43927479,-,342S74M7D69M146S,60,8;chr20,43927560,-,416S51M15D58M5I37M64S,60,20;\tMD:Z:28C7^TCCCTTC70\tRG:Z:GATKSVContigAlignments\tNM:i:8\tAS:i:78\tXS:i:75", true); + AlignmentInterval one = fromSAMRecordString("asm019085:tig00005\t2064\tchr20\t43927560\t60\t416H51M15D58M5I37M64H\t*\t0\t0\tTCTTGGCTTTGCCACCTATGAAGAGTTAATTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTCCCCTTCCCTTCCCTTCCCCTTCCTTCCTTCCTTTCTTCCTTCCTTCCCTTCCCTTCCCCTTCCTTCCTTTCTTCCTTCCTTCC\t*\tSA:Z:chr20,43927117,-,374M257S,60,3;chr20,43927479,-,342S74M7D69M146S,60,8;chr8,38525855,-,525S36M7D70M,0,8;\tMD:Z:51^TCCTTCCTTCCTTCC95\tRG:Z:GATKSVContigAlignments\tNM:i:20\tAS:i:94\tXS:i:46", true); + AlignmentInterval two = fromSAMRecordString("asm019085:tig00005\t2064\tchr20\t43927479\t60\t342H74M7D69M146H\t*\t0\t0\tACACACACACACACACACACACACACACACACGTCATGAGATATGCTTTGGAGTCATACTGGCCTGGGATCAAATCTTGGCTTTGCCACCTATGAAGAGTTAATTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTCC\t*\tSA:Z:chr20,43927117,-,374M257S,60,3;chr20,43927560,-,416S51M15D58M5I37M64S,60,20;chr8,38525855,-,525S36M7D70M,0,8;\tMD:Z:32A41^TCTTGGC69\tRG:Z:GATKSVContigAlignments\tNM:i:8\tAS:i:115\tXS:i:0", true); + AlignmentInterval three = fromSAMRecordString("asm019085:tig00005\t16\tchr20\t43927117\t60\t374M257S\t*\t0\t0\tCCAAATTATGGCTTCTTTTTCAGATATTTATAACTTATTTATAGTTCCTCCCAGTTCAAATGCAACTTACCAATCAGGGGTCATTTTTACCATAAGCAGAGTTGCCTGGTAACATAGTTAACGTCCCACTTTCCTCAGGTTTCCAGGGGAGTTATGCTCCGCAATTAACAAAGGTGAAATTCTCTTGCAACAAGGAAAAGGGTTTGGTTAACCCTTTCCCCCATATTCATCATCCTACTTTTTTCCCCTGTGGGCTGGTATTTTTGGCATCTCTTTTGGAAGGATGAATGGAGTCCTCAGTAATATATTCCACACTGTGTACATTTTCTGATGATCTATGTAACACACACACACACACACACACACACACACACGTCATGAGATATGCTTTGGAGTCATACTGGCCTGGGATCAAATCTTGGCTTTGCCACCTATGAAGAGTTAATTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTCCCCTTCCCTTCCCTTCCCCTTCCTTCCTTCCTTTCTTCCTTCCTTCCCTTCCCTTCCCCTTCCTTCCTTTCTTCCTTCCTTCCCTTCCCTTCCCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCC\t*\tSA:Z:chr20,43927479,-,342S74M7D69M146S,60,8;chr20,43927560,-,416S51M15D58M5I37M64S,60,20;chr8,38525855,-,525S36M7D70M,0,8;\tMD:Z:284C15A38A34\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:359\tXS:i:0", true); + data.add(new Object[]{zero, one, false}); // different chr + data.add(new Object[]{one, three, false}); + data.add(new Object[]{two, three, false}); + + data.add(new Object[]{new AlignmentInterval(new SimpleInterval("chr1:1-100"), 1, 100, TextCigarCodec.decode("100M50S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE), + new AlignmentInterval(new SimpleInterval("chr1:21-70"), 101, 150, TextCigarCodec.decode("100S50M"), true, 60, 0, 50, ContigAlignmentsModifier.AlnModType.NONE), + false + }); // contains on ref + data.add(new Object[]{new AlignmentInterval(new SimpleInterval("chr1:1-100"), 1, 100, TextCigarCodec.decode("100M50S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE), + new AlignmentInterval(new SimpleInterval("chr1:101-150"), 51, 100, TextCigarCodec.decode("50S50M50S"), true, 60, 0, 50, ContigAlignmentsModifier.AlnModType.NONE), + false + }); // contains on read + + data.add(new Object[]{new AlignmentInterval(new SimpleInterval("chr1:1-100"), 1, 100, TextCigarCodec.decode("100M50S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE), + new AlignmentInterval(new SimpleInterval("chr1:101-150"), 101, 150, TextCigarCodec.decode("100S50M"), true, 60, 0, 50, ContigAlignmentsModifier.AlnModType.NONE), + true + }); + data.add(new Object[]{one, two, true}); + + // strand switch + data.add(new Object[]{new AlignmentInterval(new SimpleInterval("chr1:1001001-1001100"), 1, 100, TextCigarCodec.decode("100M1080S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE), + new AlignmentInterval(new SimpleInterval("chr1:1000001-1001100"), 81, 1180, TextCigarCodec.decode("1100M80S"), false, 60, 0, 1100, ContigAlignmentsModifier.AlnModType.NONE), + false + }); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forSimpleChimeraWithStichableAlignments") + public void testSimpleChimeraWithStichableAlignments(final AlignmentInterval one, final AlignmentInterval two, + final boolean expected) { + Assert.assertEquals(AssemblyContigAlignmentsConfigPicker.simpleChimeraWithStichableAlignments(one, two), expected); + } + @Test(groups = "sv", expectedExceptions = IllegalArgumentException.class) + public void testSimpleChimeraWithStichableAlignmentsUnsortedInputs() { + AlignmentInterval one = fromSAMRecordString("asm019085:tig00005\t2064\tchr20\t43927560\t60\t416H51M15D58M5I37M64H\t*\t0\t0\tTCTTGGCTTTGCCACCTATGAAGAGTTAATTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTCCCCTTCCCTTCCCTTCCCCTTCCTTCCTTCCTTTCTTCCTTCCTTCCCTTCCCTTCCCCTTCCTTCCTTTCTTCCTTCCTTCC\t*\tSA:Z:chr20,43927117,-,374M257S,60,3;chr20,43927479,-,342S74M7D69M146S,60,8;chr8,38525855,-,525S36M7D70M,0,8;\tMD:Z:51^TCCTTCCTTCCTTCC95\tRG:Z:GATKSVContigAlignments\tNM:i:20\tAS:i:94\tXS:i:46", true); + AlignmentInterval two = fromSAMRecordString("asm019085:tig00005\t2064\tchr20\t43927479\t60\t342H74M7D69M146H\t*\t0\t0\tACACACACACACACACACACACACACACACACGTCATGAGATATGCTTTGGAGTCATACTGGCCTGGGATCAAATCTTGGCTTTGCCACCTATGAAGAGTTAATTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTTCCTCC\t*\tSA:Z:chr20,43927117,-,374M257S,60,3;chr20,43927560,-,416S51M15D58M5I37M64S,60,20;chr8,38525855,-,525S36M7D70M,0,8;\tMD:Z:32A41^TCTTGGC69\tRG:Z:GATKSVContigAlignments\tNM:i:8\tAS:i:115\tXS:i:0", true); + AssemblyContigAlignmentsConfigPicker.simpleChimeraWithStichableAlignments(two, one); + } + + @DataProvider + private Object[][] forSortingAfterGapSplit() { + final List data = new ArrayList<>(20); + + AlignmentInterval one = TestUtilsForAssemblyBasedSVDiscovery.fromSAMRecordString("asm017280:tig00000\t0\tchr18\t77371711\t60\t1447M50D184M980S\t*\t0\t0\tAATCAGATTTTCAGAACAGAGCTTCTTAGGGAGATGGTTGGACCACCATAGTCCTGCATCTTCAGGGAGCTGCTCACCATCCCCACTACTGCAGGCACCACTGATCCCTGAGTGAGACCCAGGCACTAACAAGACATGAAGAAGGTGCTGGGTTCCTGGCCGGTCCTTTTGACCCATAAGTGTGGTTCAGTTCTTGGCACTGATGGGCTGGTCGGATGGTGGTGAGGTCAAGCACTCCTTCAGTGCCTGAGGCGTGGCGAGTGGACTTTGGGACCTCTTCCTGAGATGATGATTGCTTCTCAAGGGTGGGAATGAGTTCTTCCAGGGGTCAGCGGTGTGTCATAGGCAAGCACAGCACATTACTAGAAAGCATACGTTTTATTAAGTAGAAGGTATCTTTAATTCCCTGAATAGTAAGACATCTTTGGATTGATGGAAAGGTTGCCCTGGGAAAGGTGGAATATTTGTTGTCATGACCTAAAGCAGCATGCCTAACACCATGTTTGCCAGAGCTCATGCCCACACTGCCATCTCAGGGTTTTCTAAGGGAGAGGAAGCAAGAGGAGGCCCTCGAATTTCTAACCGATTGCTGCAGTCAAAAAATTGCTAAGAAGGACTCGGTGGAAAAGGGAAAACTCAAAAGTGAAGTATGCTGTGCAGAAGCCAGAGCTCCACCGGCTCCCAGGATTACAGTAACAGTCAGGCTCCTGCTGTCTGGGCCCAATAACGCCATGCGCAGCATCCACATCAGCTGTCCTGAGAACCAGCAAGGCGTCCAAGGCTTAGTGTGTGTTAGAGGCCAATACTGTCCCTGCTGTTCATTCTCAGGCTAAACCCACGATCTTAGTAAGCACAGATGCAGAGACACAGTTTGTGGTATTGCCGTCATTTTCTTGGGAATGTATCAACAGAGTATACAAGGGTAACAGTTCTTTCTATAAAAAAAATCAACATTTATTTTTATTCATTAATTTACCATGAAAATGATTTTTTCCCAGTAGAGAAGTCAGTCTGGGAAAACCAGATAAAAACTAGAAATGATCTTGTTCGCTGTTGTGGAGGCGGTGCAGGAGGCCCGGGAGCTGGATCACTTGACTTTGTTTTCTCTCCTCCTCTGCTTGCAGAACATCTCTGCTACAGTGGAGGACCCACACTCATAGTTCAGCTCCCCACCTCACACCCCAGCCAAGCCCAGGACCCACACTCATGGCCCCGCTCCCCACCTCACACCCCAGCCAAGCCCAGGACCCACGCTCATGGCCCCGCTCTCCACCTCATACCCCAACCAAGTCTAGGACCCACGCTCATGGCCCTGCTCCCCACCTCACACCCCGACCAAGTCCAGGACCCACGCTCAGGACCCCTCTCCCCACCTCACACCTCAGCCACGCCCAGGACCCATGCTCACAGTTCCGCTCCCCACCTCACACCCCAACCAAGTCCAGGACCCATGCTCATGGCCCTGCTCCCCACCTCACACCCCGACCAAGTCCAGGACCCATGCTCATGGCCCCACTCTCCACCTCATACCCCAGCCAAGCCCAGGACCCACACTCACGGTTCCGCTCCCCACCTCACACCCCGACCAAGCCCAGGACCCACACTCAGGACCCCTCTCCCCACCTCACACCGCAGTTGCAGCATCTGAGGGGAGGGGCAGGACAAGGTGGCAGAGGTGGGGCATTTTTACCCAGAGAAGACCAAAAACAGTCAAAGGGATGGTTTAAATACCTGTCTTTTCTATTTTTTCACTGACTCCATGGTAACAGATTTTGCAGTAAGAACCAACATACACACATCCCTAAAACATATTTCATCAAATAATGCTGACATTATGTGTAGCACATGCTGATATTTTCTATTCTATTCTCATTTTCTCTCTCTTTTTTTCAAACAAATGATAATGAAAATCCACTAAGCTAAGGGTCATGACCAACTAACGGGTAACACCCTTCAGTTTGGAAATACTGATTTAAATTTTGGCTTGCTCTAAGCTATAATTCACATTTGACATTTAGATCATATTTTTCCCCTACTGACTCTCAAGTGGGAGCTGAGAGGCAGTACCTAATAAATACAGAGAAAAAGAAAGAAGGAAGGAAAATTGATATTTACAACACAACCAAGTTTTAATGTGAGTTAAAAAAACGGGTTTCTTTTTGATGGAAATCTTCTAAAATTGTGCTGATGGTTACACACTTCTGTGAATGCACCAAAAACCATTGAATTGTACATTTTAAATGAGGTCATTGTTGCTCTGTGAGTTATATGTCAACAAAGCCGTGAAAATCAGCAAACGAGAACAAACAGTTCTGGCCTAGGCCCTGCGAGGCCTCTTTCTAAGGCCGCCGCACGCCCATCTGGGCCATTAAACTCCTCAAGGCGCTCTGACTTCCTTGTGTAAGAGATATTGCTGTTTCTTATAAGGCACTGATGTACTTTTGCTTCCTTATTGGGGTAAAATTTAAAGACCATAACACTGATGGTTTTAAGAGTGCAGTTAAAAAAAGTTTTGACAAATGTATAAACACATGTAACAACCGCCCCAATCAATACACAGCACATTTCCACGGCCCCAGCAGGTTTCCTTGGTCCCCTTCCCAGCAAATCC\t*\tSA:Z:chr18,77373176,+,1165S338M50I1058M,60,93,1115;\tMD:Z:188G345G84A55T34A16T324A1G0A9C186A15C8C6G5C1C9A24T5A8T14T1G4G16C6A12C0A4T0G1C0C1T19G13^CCCACGCTCAGGACCCCTCTCCCCACCTCACACCTCAGCCACGCCCAGGG10C2T0T1C19A147\tRG:Z:GATKSVContigAlignments\tNM:i:88\tAS:i:1375\tXS:i:262", true); + AlignmentInterval two = TestUtilsForAssemblyBasedSVDiscovery.fromSAMRecordString("asm017280:tig00000\t2048\tchr18\t77373176\t60\t1165H338M50I1058M\t*\t0\t0\tCTCCCCACCTCACACCCCAGCCAAGCCCAGGACCCACACTCATGGCCCCGCTCCCCACCTCACACCCCAGCCAAGCCCAGGACCCACGCTCATGGCCCCGCTCTCCACCTCATACCCCAACCAAGTCTAGGACCCACGCTCATGGCCCTGCTCCCCACCTCACACCCCGACCAAGTCCAGGACCCACGCTCAGGACCCCTCTCCCCACCTCACACCTCAGCCACGCCCAGGACCCATGCTCACAGTTCCGCTCCCCACCTCACACCCCAACCAAGTCCAGGACCCATGCTCATGGCCCTGCTCCCCACCTCACACCCCGACCAAGTCCAGGACCCATGCTCATGGCCCCACTCTCCACCTCATACCCCAGCCAAGCCCAGGACCCACACTCACGGTTCCGCTCCCCACCTCACACCCCGACCAAGCCCAGGACCCACACTCAGGACCCCTCTCCCCACCTCACACCGCAGTTGCAGCATCTGAGGGGAGGGGCAGGACAAGGTGGCAGAGGTGGGGCATTTTTACCCAGAGAAGACCAAAAACAGTCAAAGGGATGGTTTAAATACCTGTCTTTTCTATTTTTTCACTGACTCCATGGTAACAGATTTTGCAGTAAGAACCAACATACACACATCCCTAAAACATATTTCATCAAATAATGCTGACATTATGTGTAGCACATGCTGATATTTTCTATTCTATTCTCATTTTCTCTCTCTTTTTTTCAAACAAATGATAATGAAAATCCACTAAGCTAAGGGTCATGACCAACTAACGGGTAACACCCTTCAGTTTGGAAATACTGATTTAAATTTTGGCTTGCTCTAAGCTATAATTCACATTTGACATTTAGATCATATTTTTCCCCTACTGACTCTCAAGTGGGAGCTGAGAGGCAGTACCTAATAAATACAGAGAAAAAGAAAGAAGGAAGGAAAATTGATATTTACAACACAACCAAGTTTTAATGTGAGTTAAAAAAACGGGTTTCTTTTTGATGGAAATCTTCTAAAATTGTGCTGATGGTTACACACTTCTGTGAATGCACCAAAAACCATTGAATTGTACATTTTAAATGAGGTCATTGTTGCTCTGTGAGTTATATGTCAACAAAGCCGTGAAAATCAGCAAACGAGAACAAACAGTTCTGGCCTAGGCCCTGCGAGGCCTCTTTCTAAGGCCGCCGCACGCCCATCTGGGCCATTAAACTCCTCAAGGCGCTCTGACTTCCTTGTGTAAGAGATATTGCTGTTTCTTATAAGGCACTGATGTACTTTTGCTTCCTTATTGGGGTAAAATTTAAAGACCATAACACTGATGGTTTTAAGAGTGCAGTTAAAAAAAGTTTTGACAAATGTATAAACACATGTAACAACCGCCCCAATCAATACACAGCACATTTCCACGGCCCCAGCAGGTTTCCTTGGTCCCCTTCCCAGCAAATCC\t*\tSA:Z:chr18,77371711,+,1447M50D184M980S,60,88,1375;\tMD:Z:16T6C7G4T0G4C2T0T22A5T10T12A19G5C1C9A4C2T0T1C26C11A28C6A12C0A5G18A5G6C10C5C6A70G4T10T0G4T1G4G3T200G738G52\tRG:Z:GATKSVContigAlignments\tNM:i:93\tAS:i:1115\tXS:i:0", true); + data.add(new Object[]{new GoodAndBadMappings(Arrays.asList(one, two)), + false, + new GoodAndBadMappings(Arrays.asList(new AlignmentInterval(new SimpleInterval("chr18:77371711-77373157"), 1, 1447, TextCigarCodec.decode("1447M1164S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT), + new AlignmentInterval(new SimpleInterval("chr18:77373176-77373513"), 1166, 1503, TextCigarCodec.decode("1165H338M1108S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT), + new AlignmentInterval(new SimpleInterval("chr18:77373208-77373391"), 1448, 1631, TextCigarCodec.decode("1447S184M980S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT), + new AlignmentInterval(new SimpleInterval("chr18:77373514-77374571"), 1554, 2611, TextCigarCodec.decode("1165H388S1058M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT))) + }); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forSortingAfterGapSplit") + public void testSortingAfterGapSplit(final GoodAndBadMappings input, final boolean keepSplitChildrenTogether, + final GoodAndBadMappings expectedOutput) { + Assert.assertEquals(AssemblyContigAlignmentsConfigPicker.splitGaps(input, keepSplitChildrenTogether), expectedOutput); + } + + @DataProvider Object[][] forGetMaxOverlapPairs() { + final List data = new ArrayList<>(20); + + final List alignments = Arrays.asList(new AlignmentInterval(new SimpleInterval("chr18:77371711-77373157"), 1, 1447, TextCigarCodec.decode("1447M1164S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT), + new AlignmentInterval(new SimpleInterval("chr18:77373176-77373513"), 1166, 1503, TextCigarCodec.decode("1165H338M1108S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT), + new AlignmentInterval(new SimpleInterval("chr18:77373208-77373391"), 1448, 1631, TextCigarCodec.decode("1447S184M980S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT), + new AlignmentInterval(new SimpleInterval("chr18:77373514-77374571"), 1554, 2611, TextCigarCodec.decode("1165H388S1058M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)); + + final Map> overlapMap = new HashMap<>(4); + overlapMap.put(alignments.get(0), new Tuple2<>(-1, 282)); + overlapMap.put(alignments.get(1), new Tuple2<>(282, 56)); + overlapMap.put(alignments.get(2), new Tuple2<>(56, 78)); + overlapMap.put(alignments.get(3), new Tuple2<>(78, -1)); + data.add(new Object[]{alignments, overlapMap}); + + return data.toArray(new Object[data.size()][]); + } + @Test(groups = "sv", dataProvider = "forGetMaxOverlapPairs") + public void testGetMaxOverlapPairs(final List alignments, final Map> expected) { + final Map> actual = AssemblyContigAlignmentsConfigPicker.getMaxOverlapPairs(alignments); + twoSetsEqualIgnoreOrder(new HashSet<>(actual.keySet()), new HashSet<>(expected.keySet())); + actual.forEach((k,v) -> Assert.assertEquals(v, expected.get(k))); + } + + private static void twoSetsEqualIgnoreOrder(final Set one, final Set two) { + Iterator iterator = one.iterator(); + while (iterator.hasNext()) { + AlignmentInterval next = iterator.next(); + Assert.assertTrue(two.contains(next)); + two.remove(next); + iterator.remove(); + } + Assert.assertTrue(two.isEmpty()); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalUnitTest.java index e129d3b551e..e3393f9d707 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/sv/utils/SVIntervalUnitTest.java @@ -2,8 +2,12 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.util.ArrayList; +import java.util.List; + public class SVIntervalUnitTest extends GATKBaseTest { @Test(groups = "sv") public void testOverlapLen() { @@ -13,4 +17,32 @@ public void testOverlapLen() { Assert.assertTrue(container.overlapLen(containee) == containee.getLength()); } + + @DataProvider + private Object[][] forTestContainment() { + final List data = new ArrayList<>(20); + SVInterval one = new SVInterval(1, 1000, 2000); + SVInterval two = new SVInterval(1, 1100, 1200); + data.add(new Object[]{one, two, true}); + data.add(new Object[]{two, one, false}); + + two = new SVInterval(1, 1000, 1200); + data.add(new Object[]{one, two, true}); + data.add(new Object[]{two, one, false}); + + two = new SVInterval(1, 1100, 2000); + data.add(new Object[]{one, two, true}); + data.add(new Object[]{two, one, false}); + + two = new SVInterval(1, 1000, 2000); + data.add(new Object[]{one, two, true}); + data.add(new Object[]{two, one, true}); + + return data.toArray(new Object[data.size()][]); + } + + @Test(groups = "sv", dataProvider = "forTestContainment") + public void testContainment(final SVInterval one, final SVInterval two, final boolean expected) { + Assert.assertEquals(one.contains(two), expected); + } } \ No newline at end of file