-
Notifications
You must be signed in to change notification settings - Fork 589
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Tools, filters and annotations for mitochodnria pipeline
- Loading branch information
1 parent
8103bde
commit 38f384f
Showing
27 changed files
with
646 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
76 changes: 76 additions & 0 deletions
76
src/main/java/org/broadinstitute/hellbender/tools/AddOriginalAlignmentTags.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
package org.broadinstitute.hellbender.tools; | ||
|
||
import htsjdk.samtools.SAMTag; | ||
import org.broadinstitute.barclay.argparser.Argument; | ||
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; | ||
import org.broadinstitute.barclay.argparser.ExperimentalFeature; | ||
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; | ||
import org.broadinstitute.hellbender.engine.FeatureContext; | ||
import org.broadinstitute.hellbender.engine.ReadWalker; | ||
import org.broadinstitute.hellbender.engine.ReferenceContext; | ||
import org.broadinstitute.hellbender.utils.io.IOUtils; | ||
import org.broadinstitute.hellbender.utils.read.GATKRead; | ||
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter; | ||
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup; | ||
|
||
@CommandLineProgramProperties( | ||
summary = "Adds Original Alignment tag and original mate contig tag", | ||
oneLineSummary = "Adds Original Alignment tag and original mate contig tag", | ||
programGroup = ReadDataManipulationProgramGroup.class | ||
) | ||
@ExperimentalFeature | ||
public class AddOriginalAlignmentTags extends ReadWalker { | ||
@Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, | ||
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, | ||
doc="Write output to this file") | ||
public String output; | ||
private SAMFileGATKReadWriter outputWriter; | ||
|
||
public final static String MATE_CONTIG_TAG_NAME = "XM"; | ||
public final static String OA_TAG_NAME = "OA"; | ||
public final static String OA_SEPARATOR = ","; | ||
|
||
@Override | ||
public void onTraversalStart() { | ||
outputWriter = createSAMWriter(IOUtils.getPath(output), true); | ||
} | ||
@Override | ||
public void apply(GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext) { | ||
addOATag(read); | ||
addMateContigTag(read); | ||
outputWriter.addRead(read); | ||
} | ||
@Override | ||
public void closeTool() { | ||
if ( outputWriter != null ) { | ||
outputWriter.close(); | ||
} | ||
} | ||
|
||
private static void addMateContigTag(final GATKRead read) { | ||
read.setAttribute(MATE_CONTIG_TAG_NAME, !read.mateIsUnmapped() ? read.getMateContig() : "*"); | ||
} | ||
|
||
//TODO: Once the OA tag is merged into the spec (https://github.com/samtools/hts-specs/pull/193) this should be moved to htsjdk | ||
private static void addOATag(final GATKRead read) { | ||
String OAValue; | ||
if(!read.isUnmapped()){ | ||
OAValue = String.format("%s,%s,%s,%s,%s,%s;", | ||
read.getContig().replace(OA_SEPARATOR, "_"), | ||
read.getStart(), | ||
read.isReverseStrand() ? "-" : "+", | ||
read.getCigar().toString(), | ||
read.getMappingQuality(), | ||
read.getAttributeAsString(SAMTag.NM.name())); | ||
} else { | ||
OAValue = "*,0,*,*,0,0;"; | ||
} | ||
|
||
read.setAttribute(OA_TAG_NAME, OAValue); | ||
} | ||
|
||
public static String getOAContig (final GATKRead read) { | ||
return(read.getAttributeAsString(OA_TAG_NAME).split(OA_SEPARATOR)[0]); | ||
} | ||
|
||
} |
73 changes: 73 additions & 0 deletions
73
src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/OriginalAlignment.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package org.broadinstitute.hellbender.tools.walkers.annotator; | ||
|
||
import htsjdk.variant.variantcontext.Allele; | ||
import htsjdk.variant.variantcontext.Genotype; | ||
import htsjdk.variant.variantcontext.GenotypeBuilder; | ||
import htsjdk.variant.variantcontext.VariantContext; | ||
import htsjdk.variant.vcf.VCFFormatHeaderLine; | ||
import org.broadinstitute.barclay.help.DocumentedFeature; | ||
import org.broadinstitute.hellbender.engine.ReferenceContext; | ||
import org.broadinstitute.hellbender.tools.AddOriginalAlignmentTags; | ||
import org.broadinstitute.hellbender.utils.*; | ||
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods; | ||
import org.broadinstitute.hellbender.utils.help.HelpConstants; | ||
import org.broadinstitute.hellbender.utils.logging.OneShotLogger; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
/** | ||
* Original Alignment annotation counts the number of alt reads where the original alignment contig doesn't match the current alignment contig | ||
* | ||
* <p>If reads were realigned to multiple references (for example the full human reference followed by just the | ||
* mitochondrial contig) and the original alignment tag was recorded before realigning, then we can count the number of alt | ||
* reads that have been realigned from other contigs to this one. This can be useful for downstream filtering if an alt | ||
* allele has all or most of its support originally from a different contig. In the mitochondrial case this can be useful | ||
* for filtering known NuMTs that are present in other contigs in the reference. </p> | ||
* | ||
* <h3>Caveat</h3> | ||
* <p>This annotation can only be calculated if an OA tag is present in the bam and can only be run by Mutect2.</p> | ||
* | ||
*/ | ||
@DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of alt reads with an OA tag that doesn't match the current alignment contig.") | ||
public class OriginalAlignment extends GenotypeAnnotation implements StandardMutectAnnotation { | ||
protected final OneShotLogger warning = new OneShotLogger(this.getClass()); | ||
public static final String KEY = GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY; | ||
|
||
@Override | ||
public void annotate(ReferenceContext ref, VariantContext vc, Genotype g, GenotypeBuilder gb, ReadLikelihoods<Allele> likelihoods) { | ||
Utils.nonNull(gb); | ||
Utils.nonNull(vc); | ||
Utils.nonNull(likelihoods); | ||
|
||
final double[] lods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY); | ||
if (lods==null) { | ||
warning.warn(String.format("One or more variant contexts is missing the 'LOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY)); | ||
return; | ||
} | ||
final int indexOfMaxLod = MathUtils.maxElementIndex(lods); | ||
final Allele altAlelle = vc.getAlternateAllele(indexOfMaxLod); | ||
final Collection<ReadLikelihoods<Allele>.BestAllele> bestAlleles = likelihoods.bestAllelesBreakingTies(g.getSampleName()); | ||
final String currentContig = ref.getInterval().getContig(); | ||
|
||
final long nonChrMAlt = bestAlleles.stream() | ||
.filter(ba -> ba.read.hasAttribute(AddOriginalAlignmentTags.OA_TAG_NAME) && ba.isInformative() && ba.allele.equals(altAlelle) && | ||
!AddOriginalAlignmentTags.getOAContig(ba.read).equals(currentContig)) | ||
.count(); | ||
gb.attribute(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, nonChrMAlt); | ||
} | ||
|
||
@Override | ||
public List<VCFFormatHeaderLine> getDescriptions() { | ||
return Collections.singletonList(GATKVCFHeaderLines.getFormatLine(KEY)); | ||
} | ||
|
||
@Override | ||
public List<String> getKeyNames() { | ||
return Collections.singletonList(KEY); | ||
} | ||
} |
86 changes: 86 additions & 0 deletions
86
src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/PolymorphicNuMT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
package org.broadinstitute.hellbender.tools.walkers.annotator; | ||
|
||
import org.broadinstitute.barclay.help.DocumentedFeature; | ||
import org.broadinstitute.hellbender.utils.help.HelpConstants; | ||
import htsjdk.variant.variantcontext.Allele; | ||
import htsjdk.variant.variantcontext.Genotype; | ||
import htsjdk.variant.variantcontext.GenotypeBuilder; | ||
import htsjdk.variant.variantcontext.VariantContext; | ||
import htsjdk.variant.vcf.VCFFormatHeaderLine; | ||
import org.apache.commons.math3.distribution.PoissonDistribution; | ||
import org.broadinstitute.hellbender.engine.ReferenceContext; | ||
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils; | ||
import org.broadinstitute.hellbender.utils.MathUtils; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; | ||
import org.broadinstitute.hellbender.utils.logging.OneShotLogger; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; | ||
import org.spark_project.guava.collect.Range; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.List; | ||
|
||
/** | ||
* Potential polymorphic NuMT annotation compares the number of alts to the autosomal coverage | ||
* | ||
* <p>Polymorphic NuMTs (NuMT sequence that is not in the reference) will result in autosomal reads that map to the | ||
* mitochondria. This annotation notes when the number of alt reads falls within 80% of the autosomal coverage | ||
* distribution, assuming that autosomal coverage is a Poisson distribution given a central statistic of the depth | ||
* (median is recommended). This will also include true low allele fraction mitochondrial variants and should be used | ||
* as an annotation, rather than a filter.</p> | ||
* | ||
* <h3>Caveat</h3> | ||
* <p>This annotation can only be calculated in Mutect2 if median-autosomal-coverage argument is provided.</p> | ||
* | ||
*/ | ||
@DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Number of alts indicates it could be an autosomal false positive.") | ||
public class PolymorphicNuMT extends GenotypeAnnotation implements Annotation { | ||
protected final OneShotLogger warning = new OneShotLogger(this.getClass()); | ||
public static final String KEY = GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_KEY; | ||
private static final double LOWER_BOUND_PROB = .1; | ||
private Range<Long> autosomalHetRange; | ||
private Range<Long> autosomalHomAltRange; | ||
|
||
public PolymorphicNuMT(final double medianAutosomalCoverage){ | ||
final PoissonDistribution autosomalCoverage = new PoissonDistribution(medianAutosomalCoverage); | ||
final long minAutosomalHomAlt = autosomalCoverage.inverseCumulativeProbability(LOWER_BOUND_PROB); | ||
final long maxAutosomalHomAlt = autosomalCoverage.inverseCumulativeProbability(1 - LOWER_BOUND_PROB); | ||
final long minAutosomalHet = minAutosomalHomAlt / 2; | ||
final long maxAutosomalHet = maxAutosomalHomAlt / 2; | ||
autosomalHetRange = Range.closed(minAutosomalHet, maxAutosomalHet); | ||
autosomalHomAltRange = Range.closed(minAutosomalHomAlt, maxAutosomalHomAlt); | ||
} | ||
|
||
// Barclay requires that each annotation define a constructor that takes no arguments | ||
public PolymorphicNuMT(){ } | ||
|
||
@Override | ||
public void annotate(ReferenceContext ref, VariantContext vc, Genotype g, GenotypeBuilder gb, ReadLikelihoods<Allele> likelihoods) { | ||
Utils.nonNull(gb); | ||
Utils.nonNull(vc); | ||
Utils.nonNull(likelihoods); | ||
final double[] lods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY); | ||
if (lods==null) { | ||
warning.warn(String.format("One or more variant contexts is missing the 'LOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_KEY)); | ||
return; | ||
} | ||
final int indexOfMaxLod = MathUtils.maxElementIndex(lods); | ||
final Allele altAlelle = vc.getAlternateAllele(indexOfMaxLod); | ||
Collection<ReadLikelihoods<Allele>.BestAllele> bestAlleles = likelihoods.bestAllelesBreakingTies(g.getSampleName()); | ||
final long numAltReads = bestAlleles.stream().filter(ba -> ba.isInformative() && ba.allele.equals(altAlelle)).count(); | ||
if ( autosomalHetRange.contains(numAltReads) || autosomalHomAltRange.contains(numAltReads) ) { | ||
gb.attribute(GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_KEY, "true"); | ||
} | ||
} | ||
@Override | ||
public List<VCFFormatHeaderLine> getDescriptions() { | ||
return Collections.singletonList(GATKVCFHeaderLines.getFormatLine(KEY)); | ||
} | ||
@Override | ||
public List<String> getKeyNames() { | ||
return Collections.singletonList(KEY); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.