From 3189612e987128275b286448b3659e9bade62594 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Fri, 25 May 2018 12:11:57 -0400 Subject: [PATCH] Fixed M2 bug where triallelic normal artifacts were sometimes hidden from filtering engine (#4809) --- .../walkers/mutect/M2ArgumentCollection.java | 10 ++--- .../tools/walkers/mutect/Mutect2Engine.java | 2 +- .../mutect/SomaticGenotypingEngine.java | 37 ++++++++++++------- 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java index 87dffe25383..ca8527266c1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java @@ -26,7 +26,7 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection public static final String GERMLINE_RESOURCE_LONG_NAME = "germline-resource"; public static final String DEFAULT_AF_LONG_NAME = "af-of-alleles-not-in-resource"; public static final String DEFAULT_AF_SHORT_NAME = "default-af"; - public static final String EMISSION_LOG_LONG_NAME = "tumor-lod-to-emit"; + public static final String EMISSION_LOD_LONG_NAME = "tumor-lod-to-emit"; public static final String EMISSION_LOG_SHORT_NAME = "emit-lod"; public static final String INITIAL_TUMOR_LOD_LONG_NAME = "initial-tumor-lod"; public static final String INITIAL_TUMOR_LOD_SHORT_NAME = "init-lod"; @@ -105,14 +105,14 @@ public double getDefaultAlleleFrequency() { * Default setting of 3 is permissive and will emit some amount of negative training data that * {@link FilterMutectCalls} should then filter. */ - @Argument(fullName = EMISSION_LOG_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit tumor variant to VCF.") - public double emissionLodThreshold = 3.0; + @Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit tumor variant to VCF.") + public double emissionLod = 3.0; /** * Only variants with estimated tumor LODs exceeding this threshold will be considered active. */ @Argument(fullName = INITIAL_TUMOR_LOD_LONG_NAME, shortName = INITIAL_TUMOR_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to consider pileup active.") - public double initialTumorLodThreshold = 2.0; + public double initialTumorLod = 2.0; /** * In tumor-only mode, we discard variants with population allele frequencies greater than this threshold. @@ -140,7 +140,7 @@ public double getDefaultAlleleFrequency() { * but may also increase calling false positive, i.e. germline, variants. */ @Argument(fullName = NORMAL_LOD_LONG_NAME, optional = true, doc = "LOD threshold for calling normal variant non-germline.") - public double normalLodThreshold = 2.2; + public double normalLod = 2.2; /** * Two or more phased substitutions separated by this distance or less are merged into MNPs. diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 210ca0809a0..1be41c7415a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -237,7 +237,7 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer final double tumorLog10Odds = -QualityUtils.qualToErrorProbLog10(tumorAltCountAndQualSum.getSecond()) + MathUtils.log10Factorial(tumorAltCount) + MathUtils.log10Factorial(tumorRefCount) - MathUtils.log10Factorial(tumorPileup.size() + 1); - if (tumorLog10Odds < MTAC.initialTumorLodThreshold) { + if (tumorLog10Odds < MTAC.initialTumorLod) { return new ActivityProfileState(refInterval, 0.0); } else if (hasNormal() && !MTAC.genotypeGermlineSites) { final ReadPileup normalPileup = pileup.getPileupForSample(normalSample, header); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java index d8aec352d52..a3da5a3dd01 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java @@ -126,30 +126,39 @@ public CalledHaplotypes callMutations( final Optional> normalLog10Odds = getForNormal(() -> diploidAltLog10Odds(log10NormalMatrix.get())); final Optional> normalArtifactLog10Odds = getForNormal(() -> somaticLog10Odds(log10NormalMatrix.get())); - final Set allelesConsistentWithGivenAlleles = getAllelesConsistentWithGivenAlleles(givenAlleles, loc, mergedVC); - final List somaticAltAlleles = mergedVC.getAlternateAlleles().stream() - .filter(allele -> allelesConsistentWithGivenAlleles.contains(allele) || - ((tumorLog10Odds.getAlt(allele) > MTAC.emissionLodThreshold) && - (!hasNormal || MTAC.genotypeGermlineSites || normalLog10Odds.get().getAlt(allele) > MTAC.normalLodThreshold))) + final Set forcedAlleles = getAllelesConsistentWithGivenAlleles(givenAlleles, loc, mergedVC); + + final List tumorAltAlleles = mergedVC.getAlternateAlleles().stream() + .filter(allele -> forcedAlleles.contains(allele) || tumorLog10Odds.getAlt(allele) > MTAC.emissionLod) .collect(Collectors.toList()); - final List allSomaticAlleles = ListUtils.union(Arrays.asList(mergedVC.getReference()), somaticAltAlleles); - if (somaticAltAlleles.isEmpty()) { + + final long somaticAltCount = tumorAltAlleles.stream() + .filter(allele -> forcedAlleles.contains(allele) || !hasNormal || MTAC.genotypeGermlineSites || normalLog10Odds.get().getAlt(allele) > MTAC.normalLod) + .count(); + + // if every alt allele is germline, skip this variant. However, if some alt alleles are germline and others + // are not we emit them all so that the filtering engine can see them + if (somaticAltCount == 0) { continue; } - final LikelihoodMatrix subsettedLog10TumorMatrix = new SubsettedLikelihoodMatrix<>(log10TumorMatrix, allSomaticAlleles); + + final List allAllelesToEmit = ListUtils.union(Arrays.asList(mergedVC.getReference()), tumorAltAlleles); + + + final LikelihoodMatrix subsettedLog10TumorMatrix = new SubsettedLikelihoodMatrix<>(log10TumorMatrix, allAllelesToEmit); final Optional> subsettedLog10NormalMatrix = - getForNormal(() -> new SubsettedLikelihoodMatrix<>(log10NormalMatrix.get(), allSomaticAlleles)); + getForNormal(() -> new SubsettedLikelihoodMatrix<>(log10NormalMatrix.get(), allAllelesToEmit)); - final Map populationAFAnnotation = GermlineProbabilityCalculator.getPopulationAFAnnotation(featureContext.getValues(MTAC.germlineResource, loc), somaticAltAlleles, MTAC.getDefaultAlleleFrequency()); + final Map populationAFAnnotation = GermlineProbabilityCalculator.getPopulationAFAnnotation(featureContext.getValues(MTAC.germlineResource, loc), tumorAltAlleles, MTAC.getDefaultAlleleFrequency()); final VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC) - .alleles(allSomaticAlleles) + .alleles(allAllelesToEmit) .attributes(populationAFAnnotation) - .attribute(GATKVCFConstants.TUMOR_LOD_KEY, somaticAltAlleles.stream().mapToDouble(tumorLog10Odds::getAlt).toArray()); + .attribute(GATKVCFConstants.TUMOR_LOD_KEY, tumorAltAlleles.stream().mapToDouble(tumorLog10Odds::getAlt).toArray()); - normalLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, values.asDoubleArray(somaticAltAlleles))); - normalArtifactLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE, values.asDoubleArray(somaticAltAlleles))); + normalLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, values.asDoubleArray(tumorAltAlleles))); + normalArtifactLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE, values.asDoubleArray(tumorAltAlleles))); if (!featureContext.getValues(MTAC.pon, mergedVC.getStart()).isEmpty()) { callVcb.attribute(GATKVCFConstants.IN_PON_VCF_ATTRIBUTE, true);