Skip to content

Commit

Permalink
(SV) addressing more edge cases in assembly alignments (#5044)
Browse files Browse the repository at this point in the history
* ticket 4951
* two neighboring gapped alignments, after gap split, generate unsorted alignments
* also a new utility method in SVInterval
  • Loading branch information
SHuang-Broad authored Jul 23, 2018
1 parent aa1d525 commit 9d9484f
Show file tree
Hide file tree
Showing 5 changed files with 193 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -571,16 +571,27 @@ public static Iterator<AssemblyContigWithFineTunedAlignments> reConstructContigF
return bestConfigurations.stream()
.map(mappings -> splitGaps(mappings, false))
.map(mappings -> removeNonUniqueMappings(mappings, ALIGNMENT_MQ_THRESHOLD, ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD))
.filter(mappings -> alignmentShouldNotBeStitchedTogether(mappings.goodMappings))
.map(mappings -> createContigGivenClassifiedAlignments(contigName, contigSeq, mappings, true))
.sorted(getConfigurationComparator())
.iterator();
} else {
final GoodAndBadMappings intermediate = splitGaps(bestConfigurations.get(0), false);
final GoodAndBadMappings result = removeNonUniqueMappings(intermediate, ALIGNMENT_MQ_THRESHOLD, ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD);
return Collections.singletonList(createContigGivenClassifiedAlignments(contigName, contigSeq, result, false)).iterator();
if (alignmentShouldNotBeStitchedTogether(result.goodMappings))
return Collections.singletonList(createContigGivenClassifiedAlignments(contigName, contigSeq, result, false)).iterator();
else {
return Collections.emptyIterator();
}
}
}

private static boolean alignmentShouldNotBeStitchedTogether(final List<AlignmentInterval> alignments) {
return alignments.size() != 2
||
! simpleChimeraWithStichableAlignments(alignments.get(0), alignments.get(1));
}

private static AssemblyContigWithFineTunedAlignments createContigGivenClassifiedAlignments(final String contigName, final byte[] contigSeq,
final GoodAndBadMappings goodAndBadMappings,
final boolean setResultContigAsAmbiguous) {
Expand Down Expand Up @@ -630,7 +641,8 @@ static Comparator<AssemblyContigWithFineTunedAlignments> getConfigurationCompara
* 2) drop the gap-split child alignments whose read spans are contained in other alignment spans
* TODO: based on evaluation done on 2018-06-30, making either choice has small effect on the final call set; one could further evaluate options when making improvements.
*/
private static GoodAndBadMappings splitGaps(final GoodAndBadMappings configuration, final boolean keepSplitChildrenTogether) {
@VisibleForTesting
static GoodAndBadMappings splitGaps(final GoodAndBadMappings configuration, final boolean keepSplitChildrenTogether) {
return keepSplitChildrenTogether ? splitGapsAndKeepChildrenTogether(configuration) : splitGapsAndDropAlignmentContainedByOtherOnRead(configuration);
}

Expand Down Expand Up @@ -691,6 +703,7 @@ static GoodAndBadMappings splitGapsAndKeepChildrenTogether(final GoodAndBadMappi
bad.addAll( Lists.newArrayList(pair._2) );
}
}
good.sort(AlignedContig.getAlignmentIntervalComparator());
return new GoodAndBadMappings(good, bad, configuration.goodMappingToNonCanonicalChromosome);
}

Expand Down Expand Up @@ -725,6 +738,7 @@ static GoodAndBadMappings splitGapsAndDropAlignmentContainedByOtherOnRead(final
}
}
gapSplit.removeAll(bad);
gapSplit.sort(AlignedContig.getAlignmentIntervalComparator());
return new GoodAndBadMappings(gapSplit, bad, configuration.goodMappingToNonCanonicalChromosome);
}

Expand Down Expand Up @@ -857,7 +871,8 @@ private static final class TempMaxOverlapInfo {
* Extract the max overlap information, front and back, for each alignment in {@code configuration}.
* For each alignment, the corresponding tuple2 has the max (front, rear) overlap base counts.
*/
private static Map<AlignmentInterval, Tuple2<Integer, Integer>> getMaxOverlapPairs(final List<AlignmentInterval> configuration) {
@VisibleForTesting
static Map<AlignmentInterval, Tuple2<Integer, Integer>> getMaxOverlapPairs(final List<AlignmentInterval> configuration) {

final List<TempMaxOverlapInfo> intermediateResult =
new ArrayList<>(Collections.nCopies(configuration.size(), new TempMaxOverlapInfo()));
Expand Down Expand Up @@ -904,4 +919,36 @@ private static Map<AlignmentInterval, Tuple2<Integer, Integer>> getMaxOverlapPai

return maxOverlapMap;
}

/**
* See the funny alignment signature described in ticket 4951 on GATK github
* @param intervalOne assumed to start no later than {@code intervalTwo} on the read
* @param intervalTwo assumed to start no earlier than {@code intervalOne} on the read
* @return true if the two given intervals can be stitched together
* @throws IllegalArgumentException if the two intervals are not sorted according to their {@link AlignmentInterval#startInAssembledContig}
*/
public static boolean simpleChimeraWithStichableAlignments(final AlignmentInterval intervalOne, final AlignmentInterval intervalTwo) {
if ( intervalOne.startInAssembledContig > intervalTwo.startInAssembledContig )
throw new IllegalArgumentException("Assumption that input intervals are sorted by their starts on read is violated.\tFirst: " +
intervalOne.toPackedString() + "\tSecond: " + intervalTwo.toPackedString());
if ( ! intervalOne.referenceSpan.getContig().equals(intervalTwo.referenceSpan.getContig()))
return false;
if (intervalOne.forwardStrand != intervalTwo.forwardStrand)
return false;
if ( intervalOne.containsOnRead(intervalTwo) || intervalTwo.containsOnRead(intervalOne) )
return false;
if ( intervalOne.containsOnRef(intervalTwo) || intervalTwo.containsOnRef(intervalOne) )
return false;
final boolean refOrderSwap = intervalOne.forwardStrand != (intervalOne.referenceSpan.getStart() < intervalTwo.referenceSpan.getStart());
if (refOrderSwap)
return false;
final int overlapOnContig = AlignmentInterval.overlapOnContig(intervalOne, intervalTwo);
final int overlapOnRefSpan = AlignmentInterval.overlapOnRefSpan(intervalOne, intervalTwo);
if (overlapOnContig == 0 && overlapOnRefSpan == 0) {
final boolean canBeStitched = intervalTwo.referenceSpan.getStart() - intervalOne.referenceSpan.getEnd() == 1
&& intervalTwo.startInAssembledContig - intervalOne.endInAssembledContig == 1 ;
return canBeStitched;
} else
return overlapOnContig == overlapOnRefSpan;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,7 @@
import org.broadinstitute.hellbender.tools.spark.sv.discovery.SimpleSVType;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryInputMetaData;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.SvDiscoveryUtils;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignedContig;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AlignmentInterval;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.AssemblyContigWithFineTunedAlignments;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch;
import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.*;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVIntervalTree;
import scala.Tuple2;
Expand Down Expand Up @@ -149,6 +146,8 @@ public static List<SimpleChimera> parseOneContig(final AlignedContig alignedCont
if (filterAlignmentByMqOrLength) {
if (firstAlignmentIsTooShort(current, next, uniqueRefSpanThreshold)) {
continue;
} else if (AssemblyContigAlignmentsConfigPicker.simpleChimeraWithStichableAlignments(current, next)) {
continue;
} else if (nextAlignmentMayBeInsertion(current, next, mapQualThresholdInclusive, uniqueRefSpanThreshold, filterWhollyContainedAlignments)) {
if (iterator.hasNext()) {
insertionMappings.add(next.toPackedString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ public boolean isUpstreamOf( final SVInterval that ) {
return this.contig < that.contig || (this.contig == that.contig && this.end <= that.start);
}

public boolean contains( final SVInterval that ) {
return this.contig == that.contig && this.start <= that.start && this.end >= that.end;
}

public int gapLen( final SVInterval that ) {
if ( this.contig != that.contig ) return Integer.MAX_VALUE;
return that.start - this.end;
Expand Down
Loading

0 comments on commit 9d9484f

Please sign in to comment.