Skip to content

Commit

Permalink
Fixed M2 bug where triallelic normal artifacts were sometimes hidden …
Browse files Browse the repository at this point in the history
…from filtering engine (#4809)
  • Loading branch information
davidbenjamin authored May 25, 2018
1 parent f005644 commit 3189612
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final String GERMLINE_RESOURCE_LONG_NAME = "germline-resource";
public static final String DEFAULT_AF_LONG_NAME = "af-of-alleles-not-in-resource";
public static final String DEFAULT_AF_SHORT_NAME = "default-af";
public static final String EMISSION_LOG_LONG_NAME = "tumor-lod-to-emit";
public static final String EMISSION_LOD_LONG_NAME = "tumor-lod-to-emit";
public static final String EMISSION_LOG_SHORT_NAME = "emit-lod";
public static final String INITIAL_TUMOR_LOD_LONG_NAME = "initial-tumor-lod";
public static final String INITIAL_TUMOR_LOD_SHORT_NAME = "init-lod";
Expand Down Expand Up @@ -105,14 +105,14 @@ public double getDefaultAlleleFrequency() {
* Default setting of 3 is permissive and will emit some amount of negative training data that
* {@link FilterMutectCalls} should then filter.
*/
@Argument(fullName = EMISSION_LOG_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit tumor variant to VCF.")
public double emissionLodThreshold = 3.0;
@Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit tumor variant to VCF.")
public double emissionLod = 3.0;

/**
* Only variants with estimated tumor LODs exceeding this threshold will be considered active.
*/
@Argument(fullName = INITIAL_TUMOR_LOD_LONG_NAME, shortName = INITIAL_TUMOR_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to consider pileup active.")
public double initialTumorLodThreshold = 2.0;
public double initialTumorLod = 2.0;

/**
* In tumor-only mode, we discard variants with population allele frequencies greater than this threshold.
Expand Down Expand Up @@ -140,7 +140,7 @@ public double getDefaultAlleleFrequency() {
* but may also increase calling false positive, i.e. germline, variants.
*/
@Argument(fullName = NORMAL_LOD_LONG_NAME, optional = true, doc = "LOD threshold for calling normal variant non-germline.")
public double normalLodThreshold = 2.2;
public double normalLod = 2.2;

/**
* Two or more phased substitutions separated by this distance or less are merged into MNPs.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer
final double tumorLog10Odds = -QualityUtils.qualToErrorProbLog10(tumorAltCountAndQualSum.getSecond()) +
MathUtils.log10Factorial(tumorAltCount) + MathUtils.log10Factorial(tumorRefCount) - MathUtils.log10Factorial(tumorPileup.size() + 1);

if (tumorLog10Odds < MTAC.initialTumorLodThreshold) {
if (tumorLog10Odds < MTAC.initialTumorLod) {
return new ActivityProfileState(refInterval, 0.0);
} else if (hasNormal() && !MTAC.genotypeGermlineSites) {
final ReadPileup normalPileup = pileup.getPileupForSample(normalSample, header);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,30 +126,39 @@ public CalledHaplotypes callMutations(
final Optional<PerAlleleCollection<Double>> normalLog10Odds = getForNormal(() -> diploidAltLog10Odds(log10NormalMatrix.get()));
final Optional<PerAlleleCollection<Double>> normalArtifactLog10Odds = getForNormal(() -> somaticLog10Odds(log10NormalMatrix.get()));

final Set<Allele> allelesConsistentWithGivenAlleles = getAllelesConsistentWithGivenAlleles(givenAlleles, loc, mergedVC);
final List<Allele> somaticAltAlleles = mergedVC.getAlternateAlleles().stream()
.filter(allele -> allelesConsistentWithGivenAlleles.contains(allele) ||
((tumorLog10Odds.getAlt(allele) > MTAC.emissionLodThreshold) &&
(!hasNormal || MTAC.genotypeGermlineSites || normalLog10Odds.get().getAlt(allele) > MTAC.normalLodThreshold)))
final Set<Allele> forcedAlleles = getAllelesConsistentWithGivenAlleles(givenAlleles, loc, mergedVC);

final List<Allele> tumorAltAlleles = mergedVC.getAlternateAlleles().stream()
.filter(allele -> forcedAlleles.contains(allele) || tumorLog10Odds.getAlt(allele) > MTAC.emissionLod)
.collect(Collectors.toList());
final List<Allele> allSomaticAlleles = ListUtils.union(Arrays.asList(mergedVC.getReference()), somaticAltAlleles);
if (somaticAltAlleles.isEmpty()) {

final long somaticAltCount = tumorAltAlleles.stream()
.filter(allele -> forcedAlleles.contains(allele) || !hasNormal || MTAC.genotypeGermlineSites || normalLog10Odds.get().getAlt(allele) > MTAC.normalLod)
.count();

// if every alt allele is germline, skip this variant. However, if some alt alleles are germline and others
// are not we emit them all so that the filtering engine can see them
if (somaticAltCount == 0) {
continue;
}

final LikelihoodMatrix<Allele> subsettedLog10TumorMatrix = new SubsettedLikelihoodMatrix<>(log10TumorMatrix, allSomaticAlleles);

final List<Allele> allAllelesToEmit = ListUtils.union(Arrays.asList(mergedVC.getReference()), tumorAltAlleles);


final LikelihoodMatrix<Allele> subsettedLog10TumorMatrix = new SubsettedLikelihoodMatrix<>(log10TumorMatrix, allAllelesToEmit);
final Optional<LikelihoodMatrix<Allele>> subsettedLog10NormalMatrix =
getForNormal(() -> new SubsettedLikelihoodMatrix<>(log10NormalMatrix.get(), allSomaticAlleles));
getForNormal(() -> new SubsettedLikelihoodMatrix<>(log10NormalMatrix.get(), allAllelesToEmit));

final Map<String, Object> populationAFAnnotation = GermlineProbabilityCalculator.getPopulationAFAnnotation(featureContext.getValues(MTAC.germlineResource, loc), somaticAltAlleles, MTAC.getDefaultAlleleFrequency());
final Map<String, Object> populationAFAnnotation = GermlineProbabilityCalculator.getPopulationAFAnnotation(featureContext.getValues(MTAC.germlineResource, loc), tumorAltAlleles, MTAC.getDefaultAlleleFrequency());

final VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC)
.alleles(allSomaticAlleles)
.alleles(allAllelesToEmit)
.attributes(populationAFAnnotation)
.attribute(GATKVCFConstants.TUMOR_LOD_KEY, somaticAltAlleles.stream().mapToDouble(tumorLog10Odds::getAlt).toArray());
.attribute(GATKVCFConstants.TUMOR_LOD_KEY, tumorAltAlleles.stream().mapToDouble(tumorLog10Odds::getAlt).toArray());

normalLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, values.asDoubleArray(somaticAltAlleles)));
normalArtifactLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE, values.asDoubleArray(somaticAltAlleles)));
normalLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, values.asDoubleArray(tumorAltAlleles)));
normalArtifactLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE, values.asDoubleArray(tumorAltAlleles)));

if (!featureContext.getValues(MTAC.pon, mergedVC.getStart()).isEmpty()) {
callVcb.attribute(GATKVCFConstants.IN_PON_VCF_ATTRIBUTE, true);
Expand Down

0 comments on commit 3189612

Please sign in to comment.