- @Argument(fullName = TUMOR_SAMPLE_LONG_NAME, shortName = TUMOR_SAMPLE_SHORT_NAME, doc = "BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode argument.", optional = false)
+ @Argument(fullName = TUMOR_SAMPLE_LONG_NAME, shortName = TUMOR_SAMPLE_SHORT_NAME, doc = "BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode argument.", optional = true)
protected String tumorSample = null;
@Argument(fullName = NORMAL_SAMPLE_LONG_NAME, shortName = NORMAL_SAMPLE_SHORT_NAME, doc = "BAM sample name of normal. May be URL-encoded as output by GetSampleName with -encode argument.", optional = true)
@@ -91,27 +98,44 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
@Argument(fullName= DEFAULT_AF_LONG_NAME, shortName = DEFAULT_AF_SHORT_NAME,
doc="Population allele fraction assigned to alleles not found in germline resource. Please see docs/mutect/mutect2.pdf for" +
"a derivation of the default value.", optional = true)
- public double afOfAllelesNotInGermlineResource = -1;
+ private double afOfAllelesNotInGermlineResource = -1;
public double getDefaultAlleleFrequency() {
return afOfAllelesNotInGermlineResource >= 0 ? afOfAllelesNotInGermlineResource :
- (normalSample == null ? DEFAULT_AF_FOR_TUMOR_ONLY_CALLING : DEFAULT_AF_FOR_TUMOR_NORMAL_CALLING);
+ (mitochondria ? DEFAULT_AF_FOR_MITO_CALLING:
+ (normalSample == null ? DEFAULT_AF_FOR_TUMOR_ONLY_CALLING : DEFAULT_AF_FOR_TUMOR_NORMAL_CALLING));
}
+ /**
+ * Mitochondria mode changes default values for --tumor-lod-to-emit and --initial-tumor-lod to 0 to increase sensitivity.
+ * --tumor-sample is also not explicitly required in mitochondria mode since a single sample bam is expected as input.
+ * Mitochondria mode is also required in FilterMutectCalls if used here.
+ */
+ @Argument(fullName = MITOCHONDIRA_MODE_LONG_NAME, optional = true, doc="Mitochondria mode sets emission and initial LODs to 0.")
+ public Boolean mitochondria = false;
+
/**
* Only variants with tumor LODs exceeding this threshold will be written to the VCF, regardless of filter status.
* Set to less than or equal to tumor_lod. Increase argument value to reduce false positives in the callset.
* Default setting of 3 is permissive and will emit some amount of negative training data that
* {@link FilterMutectCalls} should then filter.
*/
- @Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit tumor variant to VCF.")
- public double emissionLod = 3.0;
+ @Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit variant to VCF.")
+ private double emissionLodArg = DEFAULT_EMISSION_LOD;
+
+ public double getEmissionLod() {
+ return mitochondria && emissionLodArg == DEFAULT_EMISSION_LOD ? DEFAULT_MITO_EMISSION_LOD : emissionLodArg;
+ }
/**
* Only variants with estimated tumor LODs exceeding this threshold will be considered active.
*/
@Argument(fullName = INITIAL_TUMOR_LOD_LONG_NAME, shortName = INITIAL_TUMOR_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to consider pileup active.")
- public double initialTumorLod = 2.0;
+ private double initialLod = DEFAULT_INITIAL_LOD;
+
+ public double getInitialLod() {
+ return mitochondria && initialLod == DEFAULT_INITIAL_LOD ? DEFAULT_MITO_INITIAL_LOD : initialLod;
+ }
/**
* PCR error rate for overlapping fragments in isActive()
@@ -178,4 +202,11 @@ public double getDefaultAlleleFrequency() {
@Argument(fullName = ANNOTATE_BASED_ON_READS_LONG_NAME, doc = "If true, strand-based annotations use the number of reads, not fragments")
public boolean annotateBasedOnReads = false;
+ /**
+ * Used to model autosomal coverage when calling mitochondria. The median tends to be a more robust center statistic.
+ */
+ @Advanced
+ @Argument(fullName = MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, doc="For mitochondrial calling only; Annotate possible polymorphic NuMT based on Poisson distribution given median autosomal coverage", optional = true)
+ public double autosomalCoverage;
+
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2FiltersArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2FiltersArgumentCollection.java
index dfa83e36a19..a46f245d69e 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2FiltersArgumentCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2FiltersArgumentCollection.java
@@ -24,11 +24,14 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
public static final String MAX_STRAND_ARTIFACT_PROBABILITY_LONG_NAME = "max-strand-artifact-probability";
public static final String MIN_STRAND_ARTIFACT_ALLELE_FRACTION_LONG_NAME = "min-strand-artifact-allele-fraction";
public static final String CONTAMINATION_TABLE_LONG_NAME = "contamination-table";
+ public static final String CONTAMINATION_ESTIMATE_LONG_NAME = "contamination-estimate";
public static final String MAX_CONTAMINATION_PROBABILITY_LONG_NAME = "max-contamination-probability";
public static final String UNIQUE_ALT_READ_COUNT_LONG_NAME = "unique-alt-read-count";
public static final String TUMOR_SEGMENTATION_LONG_NAME = "tumor-segmentation";
public static final String ORIENTATION_BIAS_FDR_LONG_NAME = "orientation-bias-fdr"; // FDR = false discovery rate
public static final String MAX_DISTANCE_TO_FILTERED_CALL_ON_SAME_HAPLOTYPE_LONG_NAME = "distance-on-haplotype";
+ public static final String LOD_BY_DEPTH = "lod-divided-by-depth";
+ public static final String NON_MT_ALT_READS_BY_ALT_READS = "non-mt-alts-divided-by-alts";
public static final String FILTERING_STATS_LONG_NAME = "stats";
@@ -53,9 +56,9 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
public double log10PriorProbOfSomaticEvent = -6.0;
/**
- * Only variants with tumor LODs exceeding this threshold can pass filtering.
+ * Only variants with log odds ratios exceeding this threshold can pass filtering.
*/
- @Argument(fullName = TUMOR_LOD_LONG_NAME, optional = true, doc = "LOD threshold for calling tumor variant")
+ @Argument(fullName = TUMOR_LOD_LONG_NAME, optional = true, doc = "LOD threshold for calling variant")
public double TUMOR_LOD_THRESHOLD = 5.3;
/**
@@ -107,6 +110,9 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
@Argument(fullName = CONTAMINATION_TABLE_LONG_NAME, optional = true, doc = "Table containing contamination information.")
public File contaminationTable = null;
+ @Argument(fullName = CONTAMINATION_ESTIMATE_LONG_NAME, optional = true, doc = "Estimate of contamination.")
+ public double contaminationEstimate = 0;
+
@Argument(fullName = MAX_CONTAMINATION_PROBABILITY_LONG_NAME, optional = true, doc = "Filter variants with posterior probability to be due to contamination greater than this.")
public double maxContaminationProbability = 0.1;
@@ -128,5 +134,32 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
@Argument(fullName = MAX_DISTANCE_TO_FILTERED_CALL_ON_SAME_HAPLOTYPE_LONG_NAME, optional = true, doc = "On second filtering pass, variants with same PGT and PID tags as a filtered variant within this distance are filtered.")
public int maxDistanceToFilteredCallOnSameHaplotype = 100;
+ /**
+ * Only variants with LOD divided by depth exceeding this threshold can pass filtering.
+ */
+ @Argument(fullName = LOD_BY_DEPTH, doc="LOD by depth threshold for filtering variant", optional = true)
+ public double lodByDepth = 0.005;
+
+ /**
+ * Only variants with alt reads originally aligned outside of the mitochondria (known NuMTs) divided by total alt
+ * reads exceeding this threshold can pass filtering.
+ */
+ @Argument(fullName = NON_MT_ALT_READS_BY_ALT_READS, doc="Known NuMT alts by total alts threshold for filtering variant", optional = true)
+ public double nonMtAltByAlt = 0.85;
+
+ /**
+ * Mitochondria mode includes "LOD by depth" and "Non MT alt reads by alt reads" filters, which are not included
+ * without mitochondria mode. Mitochondria mode only runs the following filters:
+ * Insufficient Evidence Filter
+ * Duplicated Alt Read Filter
+ * Strand Artifact Filter
+ * Base Quality Filter
+ * Mapping Quality Filter
+ * Chimeric Original Alignment Filter
+ * LOD by depth Filter
+ * Contamination Filter
+ */
+ @Argument(fullName = M2ArgumentCollection.MITOCHONDIRA_MODE_LONG_NAME, optional = true, doc = "Set filters to mitochondrial defaults")
+ public boolean mitochondria = false;
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java
index d8356d11d6f..4735b4b478a 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2.java
@@ -9,18 +9,15 @@
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
-import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;
-import org.broadinstitute.hellbender.tools.walkers.annotator.ReadOrientationArtifact;
-import org.broadinstitute.hellbender.tools.walkers.annotator.ReferenceBases;
-import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
+import org.broadinstitute.hellbender.exceptions.UserException;
+import org.broadinstitute.hellbender.tools.walkers.annotator.*;
import org.broadinstitute.hellbender.transformers.ReadTransformer;
import org.broadinstitute.hellbender.utils.downsampling.MutectDownsampler;
import org.broadinstitute.hellbender.utils.downsampling.ReadsDownsampler;
+import org.broadinstitute.hellbender.utils.read.ReadUtils;
import java.io.File;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
+import java.util.*;
/**
* Call somatic short variants via local assembly of haplotypes.
@@ -185,8 +182,23 @@ protected ReadsDownsampler createDownsampler() {
@Override
public AssemblyRegionEvaluator assemblyRegionEvaluator() { return m2Engine; }
+ @Override
+ protected String[] customCommandLineValidation() {
+ if (MTAC.tumorSample == null && !MTAC.mitochondria) {
+ return new String[]{"Argument tumor-sample was missing: Argument 'tumor-sample' is required when not in mitochondria mode."};
+ }
+ return null;
+ }
+
@Override
public void onTraversalStart() {
+ if (MTAC.mitochondria) {
+ final Set samples = ReadUtils.getSamplesFromHeader(getHeaderForReads());
+ if (samples.size() != 1) {
+ throw new UserException(String.format("The input bam has more than one sample: %s", Arrays.toString(samples.toArray())));
+ }
+ MTAC.tumorSample = samples.iterator().next();
+ }
VariantAnnotatorEngine annotatorEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), null, Collections.emptyList(), false);
m2Engine = new Mutect2Engine(MTAC, createOutputBamIndex, createOutputBamMD5, getHeaderForReads(), referenceArguments.getReferenceFileName(), annotatorEngine);
vcfWriter = createVCFWriter(outputVCF);
@@ -202,6 +214,9 @@ public Collection makeVariantAnnotations(){
annotations.add(new ReadOrientationArtifact(MTAC.artifactPriorTable));
annotations.add(new ReferenceBases());
}
+ if (MTAC.autosomalCoverage > 0) {
+ annotations.add(new PolymorphicNuMT(MTAC.autosomalCoverage));
+ }
return annotations;
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java
index 5169dc8e2d9..207c4d87b88 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java
@@ -132,6 +132,7 @@ public static List makeStandardMutect2ReadFilters() {
ReadFilterLibrary.NOT_SECONDARY_ALIGNMENT,
ReadFilterLibrary.NOT_DUPLICATE,
ReadFilterLibrary.PASSES_VENDOR_QUALITY_CHECK,
+ ReadFilterLibrary.NON_CHIMERIC_ORIGINAL_ALIGNMENT_READ_FILTER,
ReadFilterLibrary.NON_ZERO_REFERENCE_LENGTH_ALIGNMENT,
new ReadLengthReadFilter(MIN_READ_LENGTH, Integer.MAX_VALUE),
ReadFilterLibrary.GOOD_CIGAR,
@@ -168,7 +169,9 @@ public void writeHeader(final VariantContextWriter vcfWriter, final Set tumorAltQuals = altQuals(tumorPileup, refBase, MTAC.initialPCRErrorQual);
final double tumorLog10Odds = MathUtils.logToLog10(lnLikelihoodRatio(tumorPileup.size()-tumorAltQuals.size(), tumorAltQuals));
- if (tumorLog10Odds < MTAC.initialTumorLod) {
+ if (tumorLog10Odds < MTAC.getInitialLod()) {
return new ActivityProfileState(refInterval, 0.0);
} else if (hasNormal() && !MTAC.genotypeGermlineSites) {
final ReadPileup normalPileup = pileup.getPileupForSample(normalSample, header);
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2FilteringEngine.java
index 89b9420255b..d735ff8e56a 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2FilteringEngine.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2FilteringEngine.java
@@ -30,7 +30,7 @@ public class Mutect2FilteringEngine {
public Mutect2FilteringEngine(final M2FiltersArgumentCollection MTFAC, final String tumorSample, final Optional normalSample) {
this.MTFAC = MTFAC;
- contamination = MTFAC.contaminationTable == null ? 0.0 : ContaminationRecord.readFromFile(MTFAC.contaminationTable).get(0).getContamination();
+ contamination = MTFAC.contaminationTable == null ? MTFAC.contaminationEstimate : ContaminationRecord.readFromFile(MTFAC.contaminationTable).get(0).getContamination();
this.tumorSample = tumorSample;
this.normalSample = normalSample;
somaticPriorProb = Math.pow(10, MTFAC.log10PriorProbOfSomaticEvent);
@@ -66,7 +66,7 @@ private void applyContaminationFilter(final M2FiltersArgumentCollection MTFAC, f
private void applyTriallelicFilter(final VariantContext vc, final FilterResult filterResult) {
if (vc.hasAttribute(GATKVCFConstants.TUMOR_LOD_KEY)) {
- final double[] tumorLods = getDoubleArrayAttribute(vc, GATKVCFConstants.TUMOR_LOD_KEY);
+ final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final long numPassingAltAlleles = Arrays.stream(tumorLods).filter(x -> x > MTFAC.TUMOR_LOD_THRESHOLD).count();
if (numPassingAltAlleles > MTFAC.numAltAllelesThreshold) {
@@ -109,7 +109,7 @@ private static void applyPanelOfNormalsFilter(final M2FiltersArgumentCollection
private void applyBaseQualityFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
final int[] baseQualityByAllele = getIntArrayTumorField(vc, BaseQuality.KEY);
- final double[] tumorLods = getDoubleArrayAttribute(vc, GATKVCFConstants.TUMOR_LOD_KEY);
+ final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
if (baseQualityByAllele != null && baseQualityByAllele[indexOfMaxTumorLod + 1] < MTFAC.minMedianBaseQuality) {
@@ -142,10 +142,10 @@ private void applyReadPositionFilter(final M2FiltersArgumentCollection MTFAC, fi
private void applyGermlineVariantFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
if (vc.hasAttribute(GATKVCFConstants.TUMOR_LOD_KEY) && vc.hasAttribute(GATKVCFConstants.POPULATION_AF_VCF_ATTRIBUTE)) {
- final double[] tumorLog10OddsIfSomatic = getDoubleArrayAttribute(vc, GATKVCFConstants.TUMOR_LOD_KEY);
+ final double[] tumorLog10OddsIfSomatic = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final Optional normalLods = vc.hasAttribute(GATKVCFConstants.NORMAL_LOD_KEY) ?
- Optional.of(getDoubleArrayAttribute(vc, GATKVCFConstants.NORMAL_LOD_KEY)) : Optional.empty();
- final double[] populationAlleleFrequencies = getDoubleArrayAttribute(vc, GATKVCFConstants.POPULATION_AF_VCF_ATTRIBUTE);
+ Optional.of(GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.NORMAL_LOD_KEY)) : Optional.empty();
+ final double[] populationAlleleFrequencies = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.POPULATION_AF_VCF_ATTRIBUTE);
final List segments = tumorSegments.getOverlaps(vc).stream().collect(Collectors.toList());
@@ -187,9 +187,9 @@ private void applyGermlineVariantFilter(final M2FiltersArgumentCollection MTFAC,
}
}
- private static void applyInsufficientEvidenceFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
+ private void applyInsufficientEvidenceFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
if (vc.hasAttribute(GATKVCFConstants.TUMOR_LOD_KEY)) {
- final double[] tumorLods = getDoubleArrayAttribute(vc, GATKVCFConstants.TUMOR_LOD_KEY);
+ final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
if (MathUtils.arrayMax(tumorLods) < MTFAC.TUMOR_LOD_THRESHOLD) {
filterResult.addFilter(GATKVCFConstants.TUMOR_LOD_FILTER_NAME);
@@ -205,7 +205,7 @@ private void applyArtifactInNormalFilter(final M2FiltersArgumentCollection MTFAC
return;
}
- final double[] tumorLods = getDoubleArrayAttribute(vc, GATKVCFConstants.TUMOR_LOD_KEY);
+ final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
Genotype tumorGenotype = vc.getGenotype(tumorSample);
@@ -226,7 +226,7 @@ private void applyArtifactInNormalFilter(final M2FiltersArgumentCollection MTFAC
return;
}
- final double[] normalArtifactLods = getDoubleArrayAttribute(vc, GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE);
+ final double[] normalArtifactLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE);
if (normalArtifactLods[indexOfMaxTumorLod] > MTFAC.NORMAL_ARTIFACT_LOD_THRESHOLD) {
filterResult.addFilter(GATKVCFConstants.ARTIFACT_IN_NORMAL_FILTER_NAME);
return;
@@ -245,10 +245,6 @@ private void applyArtifactInNormalFilter(final M2FiltersArgumentCollection MTFAC
}
}
- private static double[] getDoubleArrayAttribute(final VariantContext vc, final String attribute) {
- return GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, attribute, () -> null, -1);
- }
-
private void applyStrandArtifactFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
Genotype tumorGenotype = vc.getGenotype(tumorSample);
final double[] posteriorProbabilities = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(
@@ -327,28 +323,60 @@ private void applyFilteredHaplotypeFilter(final M2FiltersArgumentCollection MTFA
}
}
+ private void applyChimericOriginalAlignmentFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
+ final Genotype tumorGenotype = vc.getGenotype(tumorSample);
+ final double[] alleleFractions = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(tumorGenotype, VCFConstants.ALLELE_FREQUENCY_KEY,
+ () -> new double[] {1.0}, 1.0);
+ final int maxFractionIndex = MathUtils.maxElementIndex(alleleFractions);
+ final int[] ADs = tumorGenotype.getAD();
+ final int altCount = ADs[maxFractionIndex + 1];
+ if (tumorGenotype.hasAnyAttribute(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY) && vc.isBiallelic()) {
+ final int nonMtOa = GATKProtectedVariantContextUtils.getAttributeAsInt(tumorGenotype, GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY, -1);
+ if ((double) nonMtOa / altCount > MTFAC.nonMtAltByAlt) {
+ filterResult.addFilter(GATKVCFConstants.CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME);
+ }
+ }
+ }
+ private void applyLODFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
+ if(vc.isBiallelic()) {
+ final Double lod = vc.getAttributeAsDouble(GATKVCFConstants.TUMOR_LOD_KEY, 1);
+ final Double depth = vc.getAttributeAsDouble(VCFConstants.DEPTH_KEY, 1);
+ final Double lodByDepth = lod / depth;
+ if (lodByDepth < MTFAC.lodByDepth) {
+ filterResult.addFilter(GATKVCFConstants.LOW_AVG_ALT_QUALITY_FILTER_NAME);
+ }
+ }
+ }
+
+
public FilterResult calculateFilters(final M2FiltersArgumentCollection MTFAC, final VariantContext vc,
final Optional firstPass) {
firstPass.ifPresent(ffp -> Utils.validate(ffp.isReadyForSecondPass(), "First pass information has not been processed into a model for the second pass."));
final FilterResult filterResult = new FilterResult();
- applyFilteredHaplotypeFilter(MTFAC, vc, filterResult, firstPass);
applyInsufficientEvidenceFilter(MTFAC, vc, filterResult);
- applyClusteredEventFilter(vc, filterResult);
applyDuplicatedAltReadFilter(MTFAC, vc, filterResult);
- applyTriallelicFilter(vc, filterResult);
- applyPanelOfNormalsFilter(MTFAC, vc, filterResult);
- applyGermlineVariantFilter(MTFAC, vc, filterResult);
- applyArtifactInNormalFilter(MTFAC, vc, filterResult);
applyStrandArtifactFilter(MTFAC, vc, filterResult);
- applySTRFilter(vc, filterResult);
- applyContaminationFilter(MTFAC, vc, filterResult);
applyBaseQualityFilter(MTFAC, vc, filterResult);
applyMappingQualityFilter(MTFAC, vc, filterResult);
- applyMedianFragmentLengthDifferenceFilter(MTFAC, vc, filterResult);
- applyReadPositionFilter(MTFAC, vc, filterResult);
+ applyContaminationFilter(MTFAC, vc, filterResult);
+
+ if (!MTFAC.mitochondria) {
+ applyFilteredHaplotypeFilter(MTFAC, vc, filterResult, firstPass);
+ applyClusteredEventFilter(vc, filterResult);
+ applyTriallelicFilter(vc, filterResult);
+ applyPanelOfNormalsFilter(MTFAC, vc, filterResult);
+ applyGermlineVariantFilter(MTFAC, vc, filterResult);
+ applyArtifactInNormalFilter(MTFAC, vc, filterResult);
+ applySTRFilter(vc, filterResult);
+ applyMedianFragmentLengthDifferenceFilter(MTFAC, vc, filterResult);
+ applyReadPositionFilter(MTFAC, vc, filterResult);
+ // The ReadOrientation filter uses the information gathered during the first pass
+ applyReadOrientationFilter(vc, filterResult, firstPass);
+ } else {
+ applyChimericOriginalAlignmentFilter(MTFAC, vc, filterResult);
+ applyLODFilter(MTFAC, vc, filterResult);
+ }
- // The following filters use the information gathered during the first pass
- applyReadOrientationFilter(vc, filterResult, firstPass);
return filterResult;
}
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java
index e79c1d7135c..d61a7bc3698 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java
@@ -11,7 +11,6 @@
import org.apache.commons.math3.util.FastMath;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
-import org.broadinstitute.hellbender.tools.walkers.annotator.StrandBiasTest;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculator;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerGenotypingEngine;
@@ -125,7 +124,7 @@ public CalledHaplotypes callMutations(
final Set forcedAlleles = getAllelesConsistentWithGivenAlleles(givenAlleles, loc, mergedVC);
final List tumorAltAlleles = mergedVC.getAlternateAlleles().stream()
- .filter(allele -> forcedAlleles.contains(allele) || tumorLog10Odds.getAlt(allele) > MTAC.emissionLod)
+ .filter(allele -> forcedAlleles.contains(allele) || tumorLog10Odds.getAlt(allele) > MTAC.getEmissionLod())
.collect(Collectors.toList());
final long somaticAltCount = tumorAltAlleles.stream()
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/GATKProtectedVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/GATKProtectedVariantContextUtils.java
index 1ebfccf994e..35a5c2b86dd 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/GATKProtectedVariantContextUtils.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/GATKProtectedVariantContextUtils.java
@@ -49,6 +49,18 @@ public static double[] getAttributeAsDoubleArray(final VariantContext variantCon
return attributeValueToDoubleArray(variantContext.getAttribute(key), key, defaultValue, missingValue);
}
+ /**
+ * Composes the double array from a genotype annotation. Provides default and missing values.
+ *
+ * @param variantContext the target variant-context.
+ * @param attribute the name of the attribute containing the double array.
+ * @return never {@code null}.
+ * @throws IllegalArgumentException if {@code variantContext} is {@code null} or {@code key} is {@code null}.
+ */
+ public static double[] getAttributeAsDoubleArray(final VariantContext variantContext, final String attribute) {
+ return getAttributeAsDoubleArray(variantContext, attribute, () -> null, -1);
+ }
+
/**
* Composes the double array from a genotype annotation.
*
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java
index b522056aff1..8498072f8be 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java
@@ -140,6 +140,8 @@ public final class GATKVCFConstants {
public static final String ROF_POSTERIOR_KEY = "P_RO"; // For read orientation filter
public static final String ROF_PRIOR_KEY = "P_PRIOR_RO";
public static final String ROF_TYPE_KEY = "ROF_TYPE";
+ public static final String ORIGINAL_CONTIG_MISMATCH_KEY = "ORIGINAL_CONTIG_MISMATCH";
+ public static final String POTENTIAL_POLYMORPHIC_NUMT_KEY = "POTENTIAL_POLYMORPHIC_NUMT";
//FILTERS
/* Note that many filters used throughout GATK (most notably in VariantRecalibration) are dynamic,
@@ -162,7 +164,10 @@ their names (or descriptions) depend on some threshold. Those filters are not i
public final static String READ_POSITION_FILTER_NAME = "read_position";
public final static String CONTAMINATION_FILTER_NAME = "contamination";
public final static String READ_ORIENTATION_ARTIFACT_FILTER_NAME = "read_orientation_artifact";
- public final static String BAD_HAPLOTYPE_FILTER_NAME = "bad_haplotype";
+ public final static String BAD_HAPLOTYPE_FILTER_NAME = "bad_haplotype";
+ public final static String CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME = "chimeric_original_alignment"; //mitochondria
+ public final static String LOW_AVG_ALT_QUALITY_FILTER_NAME = "low_avg_alt_quality"; //mitochondria
+
public static final List MUTECT_FILTER_NAMES = Arrays.asList(STR_CONTRACTION_FILTER_NAME,
PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_LOD_FILTER_NAME, GERMLINE_RISK_FILTER_NAME,
@@ -170,7 +175,9 @@ their names (or descriptions) depend on some threshold. Those filters are not i
MEDIAN_BASE_QUALITY_FILTER_NAME, MEDIAN_MAPPING_QUALITY_FILTER_NAME,
MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME,
READ_POSITION_FILTER_NAME, CONTAMINATION_FILTER_NAME, DUPLICATED_EVIDENCE_FILTER_NAME,
- READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME);
+ READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME, CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME,
+ LOW_AVG_ALT_QUALITY_FILTER_NAME
+ );
// Symbolic alleles
public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT";
diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java
index 5547e655067..ae445e09e7d 100644
--- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java
+++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java
@@ -64,7 +64,7 @@ private static void addFilterLine(final VCFFilterHeaderLine line) {
addFilterLine(new VCFFilterHeaderLine(CLUSTERED_EVENTS_FILTER_NAME, "Clustered events observed in the tumor"));
addFilterLine(new VCFFilterHeaderLine(GERMLINE_RISK_FILTER_NAME, "Evidence indicates this site is germline, not somatic"));
addFilterLine(new VCFFilterHeaderLine(PON_FILTER_NAME, "Blacklisted site in panel of normals"));
- addFilterLine(new VCFFilterHeaderLine(TUMOR_LOD_FILTER_NAME, "Tumor does not meet likelihood threshold"));
+ addFilterLine(new VCFFilterHeaderLine(TUMOR_LOD_FILTER_NAME, "Mutation does not meet likelihood threshold"));
addFilterLine(new VCFFilterHeaderLine(STR_CONTRACTION_FILTER_NAME, "Site filtered due to contraction of short tandem repeat region"));
addFilterLine(new VCFFilterHeaderLine(MULTIALLELIC_FILTER_NAME, "Site filtered because too many alt alleles pass tumor LOD"));
addFilterLine(new VCFFilterHeaderLine(STRAND_ARTIFACT_FILTER_NAME, "Evidence for alt allele comes from one read direction only"));
@@ -78,6 +78,9 @@ private static void addFilterLine(final VCFFilterHeaderLine line) {
addFilterLine(new VCFFilterHeaderLine(READ_ORIENTATION_ARTIFACT_FILTER_NAME, "orientation bias detected by the orientation bias mixture model"));
addFilterLine(new VCFFilterHeaderLine(BAD_HAPLOTYPE_FILTER_NAME, "Variant near filtered variant on same haplotype."));
+ //Mitochondrial M2-related filters
+ addFilterLine(new VCFFilterHeaderLine(CHIMERIC_ORIGINAL_ALIGNMENT_FILTER_NAME, "NuMT variant with too many ALT reads originally from autosome"));
+ addFilterLine(new VCFFilterHeaderLine(LOW_AVG_ALT_QUALITY_FILTER_NAME, "Low average alt quality"));
addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype"));
addFormatLine(new VCFFormatHeaderLine(MAPPING_QUALITY_ZERO_BY_SAMPLE_KEY, 1, VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample"));
@@ -110,6 +113,8 @@ private static void addFilterLine(final VCFFilterHeaderLine line) {
addFormatLine(new VCFFormatHeaderLine(ROF_POSTERIOR_KEY, 1, VCFHeaderLineType.Float, "posterior probability of read orientation-based artifacts"));
addFormatLine(new VCFFormatHeaderLine(ROF_PRIOR_KEY, 1, VCFHeaderLineType.Float, "prior probability of read orientation-based artifacts under the present referene context"));
addFormatLine(new VCFFormatHeaderLine(ROF_TYPE_KEY, 1, VCFHeaderLineType.String, "type of read orientation artifact (F1R2 or F2R1)"));
+ addFormatLine(new VCFFormatHeaderLine(ORIGINAL_CONTIG_MISMATCH_KEY, 1, VCFHeaderLineType.Integer, "Number of alt reads whose original alignment doesn't match the current contig."));
+ addFormatLine(new VCFFormatHeaderLine(POTENTIAL_POLYMORPHIC_NUMT_KEY, 1, VCFHeaderLineType.String, "Potentially a polymorphic NuMT false positive rather than a real mitochondrial variant."));
addInfoLine(new VCFInfoHeaderLine(MLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"));
addInfoLine(new VCFInfoHeaderLine(MLE_ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"));
@@ -195,13 +200,11 @@ private static void addFilterLine(final VCFFilterHeaderLine line) {
// M2-related info lines
addInfoLine(new VCFInfoHeaderLine(EVENT_COUNT_IN_HAPLOTYPE_KEY, 1, VCFHeaderLineType.Integer, "Number of events in this haplotype"));
addInfoLine(new VCFInfoHeaderLine(NORMAL_LOD_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Normal LOD score"));
- addInfoLine(new VCFInfoHeaderLine(TUMOR_LOD_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Tumor LOD score"));
+ addInfoLine(new VCFInfoHeaderLine(TUMOR_LOD_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Log odds ratio score for variant"));
addInfoLine(new VCFInfoHeaderLine(IN_PON_VCF_ATTRIBUTE, 0, VCFHeaderLineType.Flag, "site found in panel of normals"));
addInfoLine(new VCFInfoHeaderLine(POPULATION_AF_VCF_ATTRIBUTE, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "population allele frequencies of alt alleles"));
addInfoLine(new VCFInfoHeaderLine(GERMLINE_POSTERIORS_VCF_ATTRIBUTE, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Posterior probability for alt allele to be germline variants"));
addInfoLine(new VCFInfoHeaderLine(POSTERIOR_PROB_OF_CONTAMINATION_ATTRIBUTE, 1, VCFHeaderLineType.Float, "Posterior probability for alt allele to be due to contamination"));
-
-
addInfoLine(new VCFInfoHeaderLine(NORMAL_ARTIFACT_LOD_ATTRIBUTE, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "log odds of artifact in normal with same allele fraction as tumor"));
}
}
diff --git a/src/test/java/org/broadinstitute/hellbender/engine/filters/ReadFilterLibraryUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/filters/ReadFilterLibraryUnitTest.java
index b78526e2442..9322f497dfa 100644
--- a/src/test/java/org/broadinstitute/hellbender/engine/filters/ReadFilterLibraryUnitTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/engine/filters/ReadFilterLibraryUnitTest.java
@@ -1,6 +1,7 @@
package org.broadinstitute.hellbender.engine.filters;
import htsjdk.samtools.*;
+import org.broadinstitute.hellbender.tools.AddOriginalAlignmentTags;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
@@ -803,4 +804,16 @@ public void testPrimaryAlignmentReadFilter() {
}
+ @Test
+ public void testChimericOAFilter() {
+ final GATKRead read = simpleGoodRead(createHeaderWithReadGroups());
+ read.setAttribute(AddOriginalAlignmentTags.OA_TAG_NAME, "*,0,*,*,0,0;");
+
+ Assert.assertTrue(ReadFilterLibrary.NON_CHIMERIC_ORIGINAL_ALIGNMENT_READ_FILTER.test(read));
+
+ read.setAttribute(AddOriginalAlignmentTags.MATE_CONTIG_TAG_NAME, "chrM");
+
+ Assert.assertFalse(ReadFilterLibrary.NON_CHIMERIC_ORIGINAL_ALIGNMENT_READ_FILTER.test(read));
+ }
+
}
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/AddOriginalAlignmentTagsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/AddOriginalAlignmentTagsIntegrationTest.java
new file mode 100644
index 00000000000..47f8a8cffc6
--- /dev/null
+++ b/src/test/java/org/broadinstitute/hellbender/tools/AddOriginalAlignmentTagsIntegrationTest.java
@@ -0,0 +1,83 @@
+package org.broadinstitute.hellbender.tools;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import org.broadinstitute.hellbender.CommandLineProgramTest;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+import static org.testng.Assert.*;
+
+public final class AddOriginalAlignmentTagsIntegrationTest extends CommandLineProgramTest {
+ private File localTestData = new File(getTestDataDir(), "mitochondria/NA12878.bam");
+ private static final ArrayList expectedOATags = new ArrayList<>(Arrays.asList(
+ "chrM,1,+,92S59M,60,0;",
+ "chrM,1,+,19S132M,60,0;",
+ "chrM,1,-,47S104M,60,0;",
+ "*,0,*,*,0,0;",
+ "chrM,2,-,116S35M,60,0;",
+ "chrM,3,-,92S59M,60,0;",
+ "*,0,*,*,0,0;",
+ "*,0,*,*,0,0;"));
+
+ private static final ArrayList expectedXMTags = new ArrayList<>(Arrays.asList(
+ "chrM",
+ "chrM",
+ "*",
+ "chrM",
+ "chrM",
+ "chrM",
+ "*",
+ "*"));
+
+ @Test()
+ public void testAddingOATag() {
+ final File oaAddedBam = createTempFile("oaAdded", ".bam");
+
+ final List args = Arrays.asList("-I", localTestData.getPath(),
+ "-O", oaAddedBam.getPath());
+ runCommandLine(args);
+
+ SamReader out = SamReaderFactory.makeDefault().open(oaAddedBam);
+ Iterator expectedOAIter = expectedOATags.iterator();
+ Iterator expectedXMIter = expectedXMTags.iterator();
+ int readsInBam = 0;
+
+ for(SAMRecord r : out) {
+ assertEquals(r.getAttribute(AddOriginalAlignmentTags.OA_TAG_NAME), expectedOAIter.next());
+ assertEquals(r.getAttribute(AddOriginalAlignmentTags.MATE_CONTIG_TAG_NAME), expectedXMIter.next());
+ readsInBam++;
+ }
+ assertEquals(readsInBam, 8);
+ }
+
+ @Test()
+ public void testUsingIntervalList() {
+ final File oaAddedBam = createTempFile("oaAdded", ".bam");
+
+ final List args = Arrays.asList("-I", localTestData.getPath(),
+ "-O", oaAddedBam.getPath(),
+ "-L", "chrM");
+ runCommandLine(args);
+
+ SamReader out = SamReaderFactory.makeDefault().open(oaAddedBam);
+ Iterator expectedOAIter = expectedOATags.iterator();
+ Iterator expectedXMIter = expectedXMTags.iterator();
+ int readsInBam = 0;
+
+ for(SAMRecord r : out) {
+ assertEquals(r.getAttribute(AddOriginalAlignmentTags.OA_TAG_NAME), expectedOAIter.next());
+ assertEquals(r.getAttribute(AddOriginalAlignmentTags.MATE_CONTIG_TAG_NAME), expectedXMIter.next());
+ readsInBam++;
+ }
+ assertEquals(readsInBam, 6);
+ }
+
+
+}
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/ReadOrientationArtifactUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/ReadOrientationArtifactUnitTest.java
index fba3c0bc001..77bb7b7f896 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/ReadOrientationArtifactUnitTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/ReadOrientationArtifactUnitTest.java
@@ -207,4 +207,4 @@ private ReadLikelihoods createReadLikelihoods(final int depth, final dou
return readLikelihoods;
}
-}
\ No newline at end of file
+}
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java
index b13b1c57677..3566d432b79 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java
@@ -27,7 +27,6 @@
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -54,6 +53,7 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest {
private static final File TEN_PCT_CONTAMINATION_TABLE = new File(toolsTestDir, "mutect/ten-pct-contamination.table");
private static final File NA12878_MITO_BAM = new File(toolsTestDir, "mutect/mito/NA12878.bam");
+ private static final File NA12878_MITO_VCF = new File(toolsTestDir, "mutect/mito/unfiltered.vcf");
private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta");
private static final File DEEP_MITO_BAM = new File(largeFileTestDir, "mutect/highDPMTsnippet.bam");
private static final String DEEP_MITO_SAMPLE_NAME = "mixture";
@@ -504,10 +504,11 @@ public void testMitochondria() throws Exception {
final File unfilteredVcf = createTempFile("unfiltered", ".vcf");
final List args = Arrays.asList("-I", NA12878_MITO_BAM.getAbsolutePath(),
- "-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, "NA12878",
"-R", MITO_REF.getAbsolutePath(),
"-L", "chrM:1-1000",
+ "--" + M2ArgumentCollection.MEDIAN_AUTOSOMAL_COVERAGE_LONG_NAME, "1556", //arbitrary "autosomal" mean coverage used only for testing
"-min-pruning", "5",
+ "--" + M2ArgumentCollection.MITOCHONDIRA_MODE_LONG_NAME,
"-O", unfilteredVcf.getAbsolutePath());
runCommandLine(args);
@@ -523,6 +524,30 @@ public void testMitochondria() throws Exception {
"chrM:310-310 [T*, TC]",
"chrM:750-750 [A*, G]");
Assert.assertTrue(expectedKeys.stream().allMatch(variantKeys::contains));
+
+ Assert.assertEquals(variants.get(0).getGenotype("NA12878").getAnyAttribute(GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY), "1556");
+ Assert.assertEquals(variants.get(0).getGenotype("NA12878").getAnyAttribute(GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_KEY), "true");
+ }
+
+ @Test
+ public void testFilterMitochondria() throws Exception {
+ final File filteredVcf = createTempFile("filtered", ".vcf");
+
+ new Main().instanceMain(makeCommandLineArgs(Arrays.asList("-V", NA12878_MITO_VCF.getPath(),
+ "-O", filteredVcf.getPath(), "--" + M2ArgumentCollection.MITOCHONDIRA_MODE_LONG_NAME), FilterMutectCalls.class.getSimpleName()));
+
+ final List variants = VariantContextTestUtils.streamVcf(filteredVcf).collect(Collectors.toList());
+ final Iterator expectedFilters = Arrays.asList(
+ "[]",
+ "[chimeric_original_alignment]",
+ "[low_lod, low_avg_alt_quality]",
+ "[]",
+ "[]",
+ "[]").iterator();
+
+ for(VariantContext v : variants){
+ Assert.assertEquals(v.getFilters().toString(), expectedFilters.next(), "filters don't match expected");
+ }
}
@Test
@@ -767,4 +792,4 @@ private Pair calculateConcordance(final File outputVcf, final Fi
private static String keyForVariant( final VariantContext variant ) {
return String.format("%s:%d-%d %s", variant.getContig(), variant.getStart(), variant.getEnd(), variant.getAlleles());
}
-}
\ No newline at end of file
+}
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/StrandArtifactUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/StrandArtifactUnitTest.java
index e465529df2e..b2169ff46fe 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/StrandArtifactUnitTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/StrandArtifactUnitTest.java
@@ -217,4 +217,4 @@ public void testMissingTumorLod() throws IOException {
Assert.assertNull(posteriorProbabilities);
Assert.assertNull(mapAlleleFractionEstimates);
}
-}
\ No newline at end of file
+}
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.bai b/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.bai
new file mode 100644
index 00000000000..8029cc08169
Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.bai differ
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.bam b/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.bam
new file mode 100644
index 00000000000..d60107764fd
Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/mitochondria/NA12878.bam differ
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/NA12878.bai b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/NA12878.bai
index db118ea9908..afdcb01e14c 100644
Binary files a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/NA12878.bai and b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/NA12878.bai differ
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/NA12878.bam b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/NA12878.bam
index badc2a9840d..da7bd9174bb 100644
Binary files a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/NA12878.bam and b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/NA12878.bam differ
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered.vcf
new file mode 100644
index 00000000000..c7aa69d4233
--- /dev/null
+++ b/src/test/resources/org/broadinstitute/hellbender/tools/mutect/mito/unfiltered.vcf
@@ -0,0 +1,42 @@
+##fileformat=VCFv4.2
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##Mutect Version=2.1
+##contig=
+##filtering_status=Warning: unfiltered Mutect2 calls. Please run FilterMutectCalls to remove false positives.
+##source=Mutect2
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878
+chrM 152 . T C . . DP=1582;ECNT=1;LOD=5266.19;POP_AF=5.000e-08 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:ORIGINAL_CONTIG_MISMATCH:POTENTIAL_POLYMORPHIC_NUMT 0/1:3,1556:0.998:2,777:1,779:30,30:16270,369:60:42:0:true
+chrM 263 . A G . . DP=858;ECNT=4;LOD=2641.72;POP_AF=5.000e-08 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:ORIGINAL_CONTIG_MISMATCH 0/1:1,831:0.999:0,403:1,428:10,30:292,305:60:32:800
+chrM 301 . A AC . . DP=680;ECNT=4;LOD=3.32;POP_AF=5.000e-08 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:ORIGINAL_CONTIG_MISMATCH 0/1:579,53:0.084:243,27:336,26:30,20:309,324:60:34:0
+chrM 302 . A AC,C,ACC . . DP=659;ECNT=4;LOD=891.23,10.66,67.66;POP_AF=5.000e-08,5.000e-08,5.000e-08 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:ORIGINAL_CONTIG_MISMATCH 0/1/2/3:5,401,67,49:0.768,0.128,0.094:2,163,35,20:3,238,32,29:20,20,30,20:419,316,340,278:60,60,60:41,33,38:0
+chrM 310 . T TC . . DP=705;ECNT=4;LOD=1974.89;POP_AF=5.000e-08;RPA=5,6;RU=C;STR GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:ORIGINAL_CONTIG_MISMATCH 0/1:0,658:1.00:0,273:0,385:0,30:0,311:60:33:0
+chrM 750 . A G . . DP=1568;ECNT=1;LOD=5097.90;POP_AF=5.000e-08 GT:AD:AF:F1R2:F2R1:MBQ:MFRL:MMQ:MPOS:ORIGINAL_CONTIG_MISMATCH:POTENTIAL_POLYMORPHIC_NUMT 0/1:1,1524:0.999:0,728:1,796:2,30:417,335:60:40:0:true
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotator/expected/testWithAllAnnotations.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotator/expected/testWithAllAnnotations.vcf
index 0dcdc441c64..271717981c9 100644
--- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotator/expected/testWithAllAnnotations.vcf
+++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/annotator/VariantAnnotator/expected/testWithAllAnnotations.vcf
@@ -19,6 +19,8 @@
##FORMAT=
##FORMAT=
##FORMAT=
+##FORMAT=
+##FORMAT=
##INFO=
##INFO=
##INFO=