Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mutect2 uses natural logarithms internally #5858

Merged
merged 1 commit into from
Apr 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import org.apache.commons.math3.special.Gamma;
import org.apache.commons.math3.util.FastMath;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.NaturalLogUtils;

import java.util.List;
import java.util.stream.IntStream;
Expand Down Expand Up @@ -89,7 +89,7 @@ static double hetLogLikelihood(final AlleleFractionGlobalParameters parameters,

final double outlierLogLikelihood = logPi + log10ToLog(log10Factorial(a) + log10Factorial(r) - log10Factorial(a + r + 1));

return MathUtils.logSumExp(altMinorLogLikelihood, refMinorLogLikelihood, outlierLogLikelihood);
return NaturalLogUtils.logSumExp(altMinorLogLikelihood, refMinorLogLikelihood, outlierLogLikelihood);
}

static double segmentLogLikelihood(final AlleleFractionGlobalParameters parameters,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import org.apache.commons.math3.util.FastMath;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.NaturalLogUtils;
import org.broadinstitute.hellbender.utils.mcmc.MinibatchSliceSampler;
import org.broadinstitute.hellbender.utils.mcmc.ParameterSampler;

Expand Down Expand Up @@ -176,7 +176,7 @@ public CopyRatioState.OutlierIndicators sample(final RandomGenerator rng,
- normalTerm(indexedCopyRatio.getLog2CopyRatioValue(), state.segmentMean(segmentIndex), state.variance());
final double conditionalProbability =
FastMath.exp(outlierUnnormalizedLogProbability -
MathUtils.logSumLog(outlierUnnormalizedLogProbability, notOutlierUnnormalizedLogProbability));
NaturalLogUtils.logSumLog(outlierUnnormalizedLogProbability, notOutlierUnnormalizedLogProbability));
indicators.add(rng.nextDouble() < conditionalProbability);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection;
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.*;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBImport;
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_RMSMappingQuality;
Expand Down Expand Up @@ -493,7 +492,7 @@ private VariantContext callSomaticGenotypes(final VariantContext vc) {

for(final Genotype g : genotypes) {
GenotypeBuilder gb = new GenotypeBuilder(g);
final double[] tlodArray = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(g, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, 0.0);
final double[] tlodArray = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(g, GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY, () -> null, 0.0);
final double[] variantAFArray = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(g, GATKVCFConstants.ALLELE_FRACTION_KEY, () -> null, 0.0);
double variantAFtotal = 0;
final List<Allele> calledAlleles = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ public final class ReferenceConfidenceVariantContextMerger {
protected final OneShotLogger oneShotAnnotationLogger = new OneShotLogger(this.getClass());
protected final OneShotLogger oneShotHeaderLineLogger = new OneShotLogger(this.getClass());
protected final OneShotLogger AS_Warning = new OneShotLogger(this.getClass());
List<String> SOMATIC_INFO_ANNOTATIONS_TO_MOVE = Arrays.asList(GATKVCFConstants.TUMOR_LOD_KEY);
List<String> SOMATIC_INFO_ANNOTATIONS_TO_MOVE = Arrays.asList(GATKVCFConstants.TUMOR_LOG_10_ODDS_KEY);

private static final List<String> SOMATIC_FORMAT_ANNOTATIONS_TO_KEEP = Arrays.asList(
GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY,
GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY,
GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY,
VCFConstants.PHASE_SET_KEY);
private static final List<String> SOMATIC_INFO_ANNOTATIONS_TO_DROP = Arrays.asList(
GATKVCFConstants.POPULATION_AF_VCF_ATTRIBUTE);
GATKVCFConstants.POPULATION_AF_KEY);

public ReferenceConfidenceVariantContextMerger(VariantAnnotatorEngine engine, final VCFHeader inputHeader) {
this(engine, inputHeader, false, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.AddOriginalAlignmentTags;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.Mutect2FilteringEngine;
import org.broadinstitute.hellbender.utils.*;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand Down Expand Up @@ -45,7 +46,7 @@ public Map<String, Object> annotate(ReferenceContext ref, VariantContext vc, Rea
Utils.nonNull(vc);
Utils.nonNull(likelihoods);

final double[] lods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final double[] lods = Mutect2FilteringEngine.getTumorLogOdds(vc);
if (lods==null) {
warning.warn(String.format("One or more variant contexts is missing the 'TLOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY));
return Collections.emptyMap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public class HaplotypeCallerReadThreadingAssemblerArgumentCollection extends Rea
public ReadThreadingAssembler makeReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes,
dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef, numPruningSamples, useAdaptivePruning ? 0 : minPruneFactor,
useAdaptivePruning, initialErrorRateForPruning, pruningLog10OddsThreshold, maxUnprunedVariants);
useAdaptivePruning, initialErrorRateForPruning, pruningLogOddsThreshold, maxUnprunedVariants);
assemblyEngine.setDebugGraphTransformations(debugGraphTransformations);
assemblyEngine.setRecoverDanglingBranches(!doNotRecoverDanglingBranches);
assemblyEngine.setRecoverAllDanglingBranches(recoverAllDanglingBranches);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public class MutectReadThreadingAssemblerArgumentCollection extends ReadThreadin
public ReadThreadingAssembler makeReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes,
dontIncreaseKmerSizesForCycles, allowNonUniqueKmersInRef, numPruningSamples, disableAdaptivePruning ? minPruneFactor : 0,
!disableAdaptivePruning, initialErrorRateForPruning, pruningLog10OddsThreshold, maxUnprunedVariants);
!disableAdaptivePruning, initialErrorRateForPruning, pruningLogOddsThreshold, maxUnprunedVariants);
assemblyEngine.setDebugGraphTransformations(debugGraphTransformations);
assemblyEngine.setRecoverDanglingBranches(true);
assemblyEngine.setRecoverAllDanglingBranches(recoverAllDanglingBranches);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.hellbender.utils.MathUtils;

import java.io.Serializable;
import java.util.List;
Expand All @@ -15,7 +16,7 @@
public abstract class ReadThreadingAssemblerArgumentCollection implements Serializable {
private static final long serialVersionUID = 1L;

public static final double DEFAULT_PRUNING_LOG_ODDS_THRESHOLD = 1.0;
public static final double DEFAULT_PRUNING_LOG_ODDS_THRESHOLD = MathUtils.log10ToLog(1.0);

public static final String ERROR_CORRECT_READS_LONG_NAME = "error-correct-reads";

Expand Down Expand Up @@ -141,8 +142,8 @@ public abstract class ReadThreadingAssemblerArgumentCollection implements Serial
* Log-10 likelihood ratio threshold for adaptive pruning algorithm.
*/
@Advanced
@Argument(fullName="pruning-lod-threshold", doc = "Log-10 likelihood ratio threshold for adaptive pruning algorithm", optional = true)
public double pruningLog10OddsThreshold = DEFAULT_PRUNING_LOG_ODDS_THRESHOLD;
@Argument(fullName="pruning-lod-threshold", doc = "Ln likelihood ratio threshold for adaptive pruning algorithm", optional = true)
public double pruningLogOddsThreshold = DEFAULT_PRUNING_LOG_ODDS_THRESHOLD;

/**
* The maximum number of variants in graph the adaptive pruner will allow
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ private double chainLogOdds(final Path<V,E> chain, final BaseGraph<V,E> graph, f
final int rightMultiplicity = chain.getLastEdge().getMultiplicity();

final double leftLogOdds = graph.isSource(chain.getFirstVertex()) ? 0.0 :
Mutect2Engine.lnLikelihoodRatio(leftTotalMultiplicity - leftMultiplicity, leftMultiplicity, errorRate);
Mutect2Engine.logLikelihoodRatio(leftTotalMultiplicity - leftMultiplicity, leftMultiplicity, errorRate);
final double rightLogOdds = graph.isSink(chain.getLastVertex()) ? 0.0 :
Mutect2Engine.lnLikelihoodRatio(rightTotalMultiplicity - rightMultiplicity, rightMultiplicity, errorRate);
Mutect2Engine.logLikelihoodRatio(rightTotalMultiplicity - rightMultiplicity, rightMultiplicity, errorRate);

return FastMath.max(leftLogOdds, rightLogOdds);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,15 @@ public final class ReadThreadingAssembler {
public ReadThreadingAssembler(final int maxAllowedPathsForReadThreadingAssembler, final List<Integer> kmerSizes,
final boolean dontIncreaseKmerSizesForCycles, final boolean allowNonUniqueKmersInRef,
final int numPruningSamples, final int pruneFactor, final boolean useAdaptivePruning,
final double initialErrorRateForPruning, final double pruningLog10OddsThreshold,
final double initialErrorRateForPruning, final double pruningLogOddsThreshold,
final int maxUnprunedVariants) {
Utils.validateArg( maxAllowedPathsForReadThreadingAssembler >= 1, "numBestHaplotypesPerGraph should be >= 1 but got " + maxAllowedPathsForReadThreadingAssembler);
this.kmerSizes = kmerSizes;
this.dontIncreaseKmerSizesForCycles = dontIncreaseKmerSizesForCycles;
this.allowNonUniqueKmersInRef = allowNonUniqueKmersInRef;
this.numPruningSamples = numPruningSamples;
this.pruneFactor = pruneFactor;
chainPruner = useAdaptivePruning ? new AdaptiveChainPruner<>(initialErrorRateForPruning, MathUtils.log10ToLog(pruningLog10OddsThreshold), maxUnprunedVariants) :
chainPruner = useAdaptivePruning ? new AdaptiveChainPruner<>(initialErrorRateForPruning, pruningLogOddsThreshold, maxUnprunedVariants) :
new LowWeightChainPruner<>(pruneFactor);
numBestHaplotypesPerGraph = maxAllowedPathsForReadThreadingAssembler;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.hellbender.tools.walkers.mutect.filtering.FilterMutectCalls;
import org.broadinstitute.hellbender.tools.walkers.readorientation.CollectF1R2CountsArgumentCollection;
import org.broadinstitute.hellbender.utils.MathUtils;

import java.io.File;
import java.io.Serializable;
Expand All @@ -33,14 +34,14 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final String DEFAULT_AF_SHORT_NAME = "default-af";
public static final String EMISSION_LOD_LONG_NAME = "tumor-lod-to-emit";
public static final String EMISSION_LOG_SHORT_NAME = "emit-lod";
public static final String INITIAL_TUMOR_LOD_LONG_NAME = "initial-tumor-lod";
public static final String INITIAL_TUMOR_LOD_SHORT_NAME = "init-lod";
public static final String INITIAL_TUMOR_LOG_10_ODDS_LONG_NAME = "initial-tumor-lod";
public static final String INITIAL_TUMOR_LOG_10_ODDS_SHORT_NAME = "init-lod";
public static final String MAX_POPULATION_AF_LONG_NAME = "max-population-af";
public static final String MAX_POPULATION_AF_SHORT_NAME = "max-af";
public static final String DOWNSAMPLING_STRIDE_LONG_NAME = "downsampling-stride";
public static final String DOWNSAMPLING_STRIDE_SHORT_NAME = "stride";
public static final String MAX_SUSPICIOUS_READS_PER_ALIGNMENT_START_LONG_NAME = "max-suspicious-reads-per-alignment-start";
public static final String NORMAL_LOD_LONG_NAME = "normal-lod";
public static final String NORMAL_LOG_10_ODDS_LONG_NAME = "normal-lod";
public static final String MAX_MNP_DISTANCE_LONG_NAME = "max-mnp-distance";
public static final String MAX_MNP_DISTANCE_SHORT_NAME = "mnp-dist";
public static final String IGNORE_ITR_ARTIFACTS_LONG_NAME = "ignore-itr-artifacts";
Expand All @@ -53,14 +54,15 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final double DEFAULT_AF_FOR_TUMOR_ONLY_CALLING = 5e-8;
public static final double DEFAULT_AF_FOR_TUMOR_NORMAL_CALLING = 1e-6;
public static final double DEFAULT_AF_FOR_MITO_CALLING = 4e-3;
public static final double DEFAULT_EMISSION_LOD = 3.0;
public static final double DEFAULT_EMISSION_LOG_10_ODDS = 3.0;
public static final double DEFAULT_MITO_EMISSION_LOD = 0;
public static final double DEFAULT_INITIAL_LOD = 2.0;
public static final double DEFAULT_MITO_INITIAL_LOD = 0;
public static final double DEFAULT_GVCF_LOD = Double.NEGATIVE_INFINITY;
public static final double DEFAULT_INITIAL_LOG_10_ODDS = 2.0;
public static final double DEFAULT_NORMAL_LOG_10_ODDS = 2.2;
public static final double DEFAULT_MITO_INITIAL_LOG_10_ODDS = 0;
public static final double DEFAULT_GVCF_LOG_10_ODDS = Double.NEGATIVE_INFINITY;
public static final int DEFAULT_CALLABLE_DEPTH = 10;

public static final double DEFAULT_MITO_PRUNING_LOG_ODDS_THRESHOLD = -4;
public static final double DEFAULT_MITO_PRUNING_LOG_ODDS_THRESHOLD = MathUtils.log10ToLog(-4);

protected ReadThreadingAssemblerArgumentCollection getReadThreadingAssemblerArgumentCollection(){
return new MutectReadThreadingAssemblerArgumentCollection();
Expand All @@ -70,8 +72,8 @@ protected ReadThreadingAssemblerArgumentCollection getReadThreadingAssemblerArgu
public ReadThreadingAssembler createReadThreadingAssembler(){
if(mitochondria ) {
assemblerArgs.recoverAllDanglingBranches = true;
if (assemblerArgs.pruningLog10OddsThreshold == ReadThreadingAssemblerArgumentCollection.DEFAULT_PRUNING_LOG_ODDS_THRESHOLD) {
assemblerArgs.pruningLog10OddsThreshold = DEFAULT_MITO_PRUNING_LOG_ODDS_THRESHOLD;
if (assemblerArgs.pruningLogOddsThreshold == ReadThreadingAssemblerArgumentCollection.DEFAULT_PRUNING_LOG_ODDS_THRESHOLD) {
assemblerArgs.pruningLogOddsThreshold = DEFAULT_MITO_PRUNING_LOG_ODDS_THRESHOLD;
}
}

Expand Down Expand Up @@ -152,27 +154,27 @@ public double getDefaultAlleleFrequency() {
* Default setting of 3 is permissive and will emit some amount of negative training data that
* {@link FilterMutectCalls} should then filter.
*/
@Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "LOD threshold to emit variant to VCF.")
private double emissionLodArg = DEFAULT_EMISSION_LOD;
@Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_LOG_SHORT_NAME, optional = true, doc = "Log 10 odds threshold to emit variant to VCF.")
private double emissionLog10Odds = DEFAULT_EMISSION_LOG_10_ODDS;

public double getEmissionLod() {
public double getEmissionLogOdds() {
if (emitReferenceConfidence != ReferenceConfidenceMode.NONE) {
return DEFAULT_GVCF_LOD;
return MathUtils.log10ToLog(DEFAULT_GVCF_LOG_10_ODDS);
}
return mitochondria && emissionLodArg == DEFAULT_EMISSION_LOD ? DEFAULT_MITO_EMISSION_LOD : emissionLodArg;
return MathUtils.log10ToLog(mitochondria && emissionLog10Odds == DEFAULT_EMISSION_LOG_10_ODDS ? DEFAULT_MITO_EMISSION_LOD : emissionLog10Odds);
}

/**
* Only variants with estimated tumor LODs exceeding this threshold will be considered active.
*/
@Argument(fullName = INITIAL_TUMOR_LOD_LONG_NAME, shortName = INITIAL_TUMOR_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to consider pileup active.")
private double initialLod = DEFAULT_INITIAL_LOD;
@Argument(fullName = INITIAL_TUMOR_LOG_10_ODDS_LONG_NAME, shortName = INITIAL_TUMOR_LOG_10_ODDS_SHORT_NAME, optional = true, doc = "Log 10 odds threshold to consider pileup active.")
private double initialLog10Odds = DEFAULT_INITIAL_LOG_10_ODDS;

public double getInitialLod() {
public double getInitialLogOdds() {
if (emitReferenceConfidence != ReferenceConfidenceMode.NONE) {
return DEFAULT_GVCF_LOD;
return MathUtils.log10ToLog(DEFAULT_GVCF_LOG_10_ODDS);
}
return mitochondria && initialLod == DEFAULT_INITIAL_LOD ? DEFAULT_MITO_INITIAL_LOD : initialLod;
return MathUtils.log10ToLog(mitochondria && initialLog10Odds == DEFAULT_INITIAL_LOG_10_ODDS ? DEFAULT_MITO_INITIAL_LOG_10_ODDS : initialLog10Odds);
}


Expand Down Expand Up @@ -210,8 +212,8 @@ public double getInitialLod() {
* It is unlikely such analyses will require changing the default value. Increasing the parameter may increase the sensitivity of somatic calling,
* but may also increase calling false positive, i.e. germline, variants.
*/
@Argument(fullName = NORMAL_LOD_LONG_NAME, optional = true, doc = "LOD threshold for calling normal variant non-germline.")
public double normalLod = 2.2;
@Argument(fullName = NORMAL_LOG_10_ODDS_LONG_NAME, optional = true, doc = "Log 10 odds threshold for calling normal variant non-germline.")
public double normalLog10Odds = DEFAULT_NORMAL_LOG_10_ODDS;

/**
* Two or more phased substitutions separated by this distance or less are merged into MNPs.
Expand Down
Loading