From 5a02caa5f9ae8dd5db485bc1144f4719e174dea3 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Wed, 7 Aug 2019 13:40:14 -0400 Subject: [PATCH] Unified HaplotypeCaller's force-calling mode with Mutect2's --- .../hellbender/engine/FeatureCache.java | 3 +- .../hellbender/engine/FeatureContext.java | 3 +- .../hellbender/engine/FeatureDataSource.java | 5 +- .../hellbender/engine/FeatureManager.java | 3 +- .../hellbender/engine/ReferenceContext.java | 13 +- .../tools/walkers/GenotypeGVCFsEngine.java | 2 +- .../walkers/genotyper/GenotypingEngine.java | 216 +++--------------- .../GenotypingGivenAllelesUtils.java | 65 ------ .../genotyper/GenotypingOutputMode.java | 23 -- .../genotyper/MinimalGenotypingEngine.java | 6 +- .../tools/walkers/genotyper/OutputMode.java | 3 +- .../StandardCallerArgumentCollection.java | 18 -- .../walkers/genotyper/VariantCallContext.java | 50 ---- ...AssemblyBasedCallerArgumentCollection.java | 9 + .../AssemblyBasedCallerUtils.java | 63 +++-- .../AssemblyRegionTrimmer.java | 14 +- .../HaplotypeCallerArgumentCollection.java | 5 - .../HaplotypeCallerEngine.java | 43 +--- .../HaplotypeCallerGenotypingEngine.java | 68 +----- ...dThreadingAssemblerArgumentCollection.java | 11 - ...dThreadingAssemblerArgumentCollection.java | 10 +- .../walkers/mutect/M2ArgumentCollection.java | 7 - .../tools/walkers/mutect/Mutect2Engine.java | 19 +- .../mutect/SomaticGenotypingEngine.java | 33 +-- .../walkers/variantutils/ReblockGVCF.java | 2 +- .../variant/GATKVariantContextUtils.java | 3 + .../ReducibleAnnotationBaseTest.java | 2 +- .../genotyper/GenotypingEngineUnitTest.java | 10 +- .../GenotypingGivenAllelesUtilsUnitTest.java | 40 ---- .../AssemblyBasedCallerUtilsUnitTest.java | 15 +- .../HaplotypeCallerIntegrationTest.java | 41 +--- .../mutect/Mutect2IntegrationTest.java | 2 +- ...ted.testGenotypeGivenAllelesMode.gatk4.vcf | 35 --- ...testGenotypeGivenAllelesMode.gatk4.vcf.idx | Bin 428 -> 0 bytes ...tGenotypeGivenAllelesMode_givenAlleles.vcf | 5 +- 35 files changed, 167 insertions(+), 680 deletions(-) delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingOutputMode.java delete mode 100644 src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/VariantCallContext.java delete mode 100644 src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtilsUnitTest.java delete mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGenotypeGivenAllelesMode.gatk4.vcf delete mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGenotypeGivenAllelesMode.gatk4.vcf.idx diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureCache.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureCache.java index b33b35ecd0f..91f82dfac35 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureCache.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureCache.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.engine; +import htsjdk.samtools.util.Locatable; import htsjdk.tribble.Feature; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -150,7 +151,7 @@ public void fill( final Iterator featureIter, final SimpleInterv * @param interval the interval to check against the contents of our cache * @return true if all records overlapping the provided interval are already contained in our cache, otherwise false */ - public boolean cacheHit( final SimpleInterval interval ) { + public boolean cacheHit( final Locatable interval ) { final boolean cacheHit = cachedInterval != null && cachedInterval.contains(interval); if ( cacheHit ) { diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureContext.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureContext.java index 0a8f6b2599b..75e0e3e2124 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureContext.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureContext.java @@ -1,6 +1,7 @@ package org.broadinstitute.hellbender.engine; import com.google.common.annotations.VisibleForTesting; +import htsjdk.samtools.util.Locatable; import htsjdk.tribble.Feature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -165,7 +166,7 @@ public List getValues(final FeatureInput featureDescri * this FeatureContext's query interval as expanded by the specified number of leading/trailing bases. * Empty List if there is no backing data source and/or interval. */ - public List getValues(final FeatureInput featureDescriptor, final SimpleInterval queryInterval) { + public List getValues(final FeatureInput featureDescriptor, final Locatable queryInterval) { if (featureManager == null || queryInterval == null || featureDescriptor == null) { return Collections.emptyList(); } diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java index 7c14bc78052..aa745e395b8 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureDataSource.java @@ -2,6 +2,7 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.IOUtil; +import htsjdk.samtools.util.Locatable; import htsjdk.tribble.*; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; @@ -509,7 +510,7 @@ public Iterator query(final SimpleInterval interval) { * @param interval retrieve all Features overlapping this interval * @return a List of all Features in this data source that overlap the provided interval */ - public List queryAndPrefetch(final SimpleInterval interval) { + public List queryAndPrefetch(final Locatable interval) { if (!supportsRandomAccess) { throw new UserException("Input " + featureInput.getFeaturePath() + " must support random access to enable queries by interval. " + "If it's a file, please index it using the bundled tool " + IndexFeatureFile.class.getSimpleName()); @@ -540,7 +541,7 @@ public List queryAndPrefetch(final SimpleInterval interval) { * * @param interval the query interval that produced a cache miss */ - private void refillQueryCache(final SimpleInterval interval) { + private void refillQueryCache(final Locatable interval) { // Tribble documentation states that having multiple iterators open simultaneously over the same FeatureReader // results in undefined behavior closeOpenIterationIfNecessary(); diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java index 2283d783485..25553b5b225 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureManager.java @@ -2,6 +2,7 @@ import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.Locatable; import htsjdk.tribble.Feature; import htsjdk.tribble.FeatureCodec; import htsjdk.variant.vcf.VCFHeader; @@ -342,7 +343,7 @@ public List getAllSequenceDictionaries() { * @return A List of all Features in the backing data source for the provided FeatureInput that overlap * the provided interval (may be empty if there are none, but never null) */ - public List getFeatures( final FeatureInput featureDescriptor, final SimpleInterval interval ) { + public List getFeatures( final FeatureInput featureDescriptor, final Locatable interval ) { final FeatureDataSource dataSource = lookupDataSource(featureDescriptor); // No danger of a ClassCastException here, since we verified that the FeatureDataSource for this diff --git a/src/main/java/org/broadinstitute/hellbender/engine/ReferenceContext.java b/src/main/java/org/broadinstitute/hellbender/engine/ReferenceContext.java index 4ad4fff65ac..e36e154640b 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/ReferenceContext.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/ReferenceContext.java @@ -2,6 +2,7 @@ import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.reference.ReferenceSequence; +import htsjdk.samtools.util.Locatable; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -30,7 +31,7 @@ * return empty arrays / iterators. You can determine whether there is a backing source of reference * data via {@link #hasBackingDataSource()}, and whether there is an interval via {@link #getInterval}. */ -public final class ReferenceContext implements Iterable { +public final class ReferenceContext implements Iterable, Locatable { /** * Backing data source. Null if there is no reference data. @@ -142,6 +143,16 @@ public ReferenceContext( final ReferenceDataSource dataSource, final SimpleInter } } + @Override + public String getContig() { return interval.getContig(); } + + @Override + public int getStart() { return interval.getStart(); } + + @Override + public int getEnd() { return interval.getEnd(); } + + /** * Determines whether this ReferenceContext has a backing reference data source. A ReferenceContext with * no backing data source will always return an empty bases array from {@link #getBases()} and an diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java index abc370a23dd..eb1338f1d49 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java @@ -259,7 +259,7 @@ private VariantContext calculateGenotypes(VariantContext vc){ final GenotypeLikelihoodsCalculationModel model = vc.getType() == VariantContext.Type.INDEL ? GenotypeLikelihoodsCalculationModel.INDEL : GenotypeLikelihoodsCalculationModel.SNP; - return genotypingEngine.calculateGenotypes(vc, model, null); + return genotypingEngine.calculateGenotypes(vc, model); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java index ea94ffcea3f..44523b01bf6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java @@ -1,22 +1,17 @@ package org.broadinstitute.hellbender.tools.walkers.genotyper; import com.google.common.annotations.VisibleForTesting; -import htsjdk.samtools.SAMFileHeader; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFInfoHeaderLine; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.CommandLineException; -import org.broadinstitute.hellbender.engine.AlignmentContext; -import org.broadinstitute.hellbender.engine.FeatureContext; -import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.*; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils; import org.broadinstitute.hellbender.utils.*; -import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods; import org.broadinstitute.hellbender.utils.genotyper.SampleList; -import org.broadinstitute.hellbender.utils.pileup.ReadPileup; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; @@ -193,48 +188,20 @@ public Config getConfiguration() { return configuration; } - /** - * Completes a variant context with genotype calls and associated annotations given the genotype likelihoods and - * the model that need to be applied. - * - * @param vc variant-context to complete. - * @param model model name. - * - * @throws IllegalArgumentException if {@code model} or {@code vc} is {@code null}. - * - * @return can be {@code null} indicating that genotyping it not possible with the information provided. - */ - public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel model, final SAMFileHeader header) { - Utils.nonNull(vc, "vc cannot be null"); - Utils.nonNull(model, "the model cannot be null"); - return calculateGenotypes(null,null,null,null,vc,model,false,null,header); - } - /** * Main entry function to calculate genotypes of a given VC with corresponding GL's that is shared across genotypers (namely UG and HC). * - * @param features Features - * @param refContext Reference context - * @param rawContext Raw context - * @param stratifiedContexts Stratified alignment contexts - * @param vc Input VC + * Completes a variant context with genotype calls and associated annotations given the genotype likelihoods and + * the model that need to be applied. + * + * @param vc Input variant context to complete. * @param model GL calculation model - * @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc * @return VC with assigned genotypes */ - protected VariantCallContext calculateGenotypes(final FeatureContext features, - final ReferenceContext refContext, - final AlignmentContext rawContext, - Map stratifiedContexts, - final VariantContext vc, - final GenotypeLikelihoodsCalculationModel model, - final boolean inheritAttributesFromInputVC, - final ReadLikelihoods likelihoods, - final SAMFileHeader header) { - final boolean limitedContext = features == null || refContext == null || rawContext == null || stratifiedContexts == null; + public VariantContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel model, final List givenAlleles) { // if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call if (hasTooManyAlternativeAlleles(vc) || vc.getNSamples() == 0) { - return emptyCallContext(features, refContext, rawContext, header); + return null; } final int defaultPloidy = configuration.genotypeArgs.samplePloidy; @@ -253,17 +220,16 @@ protected VariantCallContext calculateGenotypes(final FeatureContext features, final AFCalculator afCalculator = configuration.genotypeArgs.useOldAFCalculator ? afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles) : newAFCalculator; final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(reducedVC, defaultPloidy, maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model)); - final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc); + + + final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc, givenAlleles); // posterior probability that at least one alt allele exists in the samples final double probOfAtLeastOneAltAllele = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0()); // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice - final double log10Confidence = - ! outputAlternativeAlleles.siteIsMonomorphic || - configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs - ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 - : AFresult.getLog10PosteriorOfAFGT0() + 0.0 ; + final double log10Confidence = !outputAlternativeAlleles.siteIsMonomorphic || configuration.annotateAllSitesWithPLs + ? AFresult.getLog10PosteriorOfAFEq0() + 0.0 : AFresult.getLog10PosteriorOfAFGT0() + 0.0; // Add 0.0 removes -0.0 occurrences. @@ -271,20 +237,13 @@ protected VariantCallContext calculateGenotypes(final FeatureContext features, // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero // skip this if we are already looking at a vc with NON_REF as the first alt allele i.e. if we are in GenotypeGVCFs - if ( !passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic) - && !forceSiteEmission() - && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) { - // technically, at this point our confidence in a reference call isn't accurately estimated - // because it didn't take into account samples with no data, so let's get a better estimate - final double[] AFpriors = getAlleleFrequencyPriors(vc, defaultPloidy, model); - final int INDEX_FOR_AC_EQUALS_1 = 1; - return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, AFpriors[INDEX_FOR_AC_EQUALS_1], true, probOfAtLeastOneAltAllele); + if ( !passesEmitThreshold(phredScaledConfidence, outputAlternativeAlleles.siteIsMonomorphic) && !forceSiteEmission() + && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles) && givenAlleles.isEmpty()) { + return null; } // return a null call if we aren't forcing site emission and the only alt allele is a spanning deletion - if (! forceSiteEmission() - && outputAlternativeAlleles.alleles.size() == 1 - && Allele.SPAN_DEL.equals(outputAlternativeAlleles.alleles.get(0))) { + if (! forceSiteEmission() && outputAlternativeAlleles.alleles.size() == 1 && Allele.SPAN_DEL.equals(outputAlternativeAlleles.alleles.get(0))) { return null; } @@ -303,27 +262,14 @@ && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles)) { AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0)); // calculating strand bias involves overwriting data structures, so we do it last - final Map attributes = composeCallAttributes(inheritAttributesFromInputVC, vc, rawContext, stratifiedContexts, features, refContext, - outputAlternativeAlleles.alternativeAlleleMLECounts(), outputAlternativeAlleles.siteIsMonomorphic, AFresult, outputAlternativeAlleles.outputAlleles(vc.getReference()),genotypes,model,likelihoods); - - VariantContext vcCall = builder.genotypes(genotypes).attributes(attributes).make(); + final Map attributes = composeCallAttributes(vc, outputAlternativeAlleles.alternativeAlleleMLECounts(), + AFresult, outputAlternativeAlleles.outputAlleles(vc.getReference()),genotypes); - if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine - // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations - final ReadPileup pileup = rawContext.getBasePileup(); - stratifiedContexts = AlignmentContext.splitContextBySampleName(pileup, header); - - vcCall = annotationEngine.annotateContext(vcCall, features, refContext, likelihoods, a -> true); - } - - // if we are subsetting alleles (either because there were too many or because some were not polymorphic) - // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). - if ( outputAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their alleles in sync - { - vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall); - } + return builder.genotypes(genotypes).attributes(attributes).make(); + } - return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, probOfAtLeastOneAltAllele)); + public VariantContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel model) { + return calculateGenotypes(vc, model, Collections.emptyList()); } @VisibleForTesting @@ -371,13 +317,16 @@ public List alternativeAlleleMLECounts() { * @param vc the variant context * @return information about the alternative allele subsetting {@code null}. */ - private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afCalculationResult, final VariantContext vc) { + private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afCalculationResult, final VariantContext vc, final List givenAlleles) { final List outputAlleles = new ArrayList<>(); final List mleCounts = new ArrayList<>(); boolean siteIsMonomorphic = true; final List alleles = afCalculationResult.getAllelesUsedInGenotyping(); final int alternativeAlleleCount = alleles.size() - 1; int referenceAlleleSize = 0; + + final Set forcedAlleles = AssemblyBasedCallerUtils.getAllelesConsistentWithGivenAlleles(givenAlleles, vc); + for (final Allele allele : alleles) { if (allele.isReference() ) { referenceAlleleSize = allele.length(); @@ -389,7 +338,9 @@ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult //it's possible that the upstream deletion that spanned this site was not emitted, mooting the symbolic spanning deletion allele final boolean isSpuriousSpanningDeletion = GATKVCFConstants.isSpanningDeletion(allele) && !isVcCoveredByDeletion(vc); - final boolean toOutput = (isPlausible || forceKeepAllele(allele) || isNonRefWhichIsLoneAltAllele) && !isSpuriousSpanningDeletion; + + //TODO: force-clling logic goes here + final boolean toOutput = (isPlausible || forceKeepAllele(allele) || isNonRefWhichIsLoneAltAllele || forcedAlleles.contains(allele) ) && !isSpuriousSpanningDeletion; siteIsMonomorphic &= !(isPlausible && !isSpuriousSpanningDeletion); @@ -456,20 +407,6 @@ boolean isVcCoveredByDeletion(final VariantContext vc) { */ protected abstract boolean forceKeepAllele(final Allele allele); - /** - * Checks whether a variant site seems confidently called base on user threshold that the score provided - * by the exact model. - * - * @param conf the phred scaled quality score - * @param PofF - * @return {@code true} iff the variant is confidently called. - */ - protected final boolean confidentlyCalled(final double conf, final double PofF) { - return passesCallThreshold(conf) || - (configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - && passesCallThreshold(QualityUtils.phredScaleErrorRate(PofF))); - } - /** * Checks whether the variant context has too many alternative alleles for progress to genotyping the site. @@ -494,52 +431,6 @@ protected final boolean hasTooManyAlternativeAlleles(final VariantContext vc) { return true; } - /** - * Produces an empty variant-call context to output when there is no enough data provided to call anything. - * - * @param features feature context - * @param ref the reference context. - * @param rawContext the read alignment at that location. - * @return it might be null if no enough information is provided to do even an empty call. For example when - * we have limited-context (i.e. any of the tracker, reference or alignment is {@code null}. - */ - protected final VariantCallContext emptyCallContext(final FeatureContext features, - final ReferenceContext ref, - final AlignmentContext rawContext, - final SAMFileHeader header) { - if (features == null || ref == null || rawContext == null || !forceSiteEmission()) { - return null; - } - - VariantContext vc; - - if ( configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(features, - rawContext.getLocation(), configuration.genotypeFilteredAlleles, configuration.alleles); - if (ggaVc == null) { - return null; - } - vc = new VariantContextBuilder(callSourceString(), ref.getInterval().getContig(), ggaVc.getStart(), - ggaVc.getEnd(), ggaVc.getAlleles()).make(); - } else { - // deal with bad/non-standard reference bases - if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) { - return null; - } - final Set alleles = new LinkedHashSet<>(Collections.singleton(Allele.create(ref.getBase(),true))); - vc = new VariantContextBuilder(callSourceString(), ref.getInterval().getContig(), - ref.getInterval().getStart(), ref.getInterval().getStart(), alleles).make(); - } - - if ( vc != null && annotationEngine != null ) { - // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations - final ReadPileup pileup = rawContext.getBasePileup(); - vc = annotationEngine.annotateContext(vc, features, ref, null, a -> true); - } - - return new VariantCallContext(vc, false); - } - /** * Indicates whether we have to emit any site no matter what. *

@@ -551,22 +442,6 @@ protected final VariantCallContext emptyCallContext(final FeatureContext feature */ protected abstract boolean forceSiteEmission(); - protected final VariantCallContext estimateReferenceConfidence(final VariantContext vc, final Map contexts, - final double log10OfTheta, final boolean ignoreCoveredSamples, final double initialPofRef) { - if ( contexts == null ) { - return null; - } - - // add contribution from each sample that we haven't examined yet i.e. those with null contexts - final double log10POfRef = Math.log10(initialPofRef) + contexts.values().stream() - .filter(context -> context == null || !ignoreCoveredSamples ) - .mapToInt(context -> context == null ? 0 : context.getBasePileup().size()) //get the depth - .mapToDouble(depth -> estimateLog10ReferenceConfidenceForOneSample(depth, log10OfTheta)) - .sum(); - - return new VariantCallContext(vc, passesCallThreshold(QualityUtils.phredScaleLog10CorrectRate(log10POfRef)), false); - } - /** * Returns the log10 prior probability for all possible allele counts from 0 to N where N is the total number of * genomes (total-ploidy). @@ -591,22 +466,6 @@ protected final double[] getAlleleFrequencyPriors( final VariantContext vc, fina } } - /** - * Compute the log10 probability of a sample with sequencing depth and no alt allele is actually truly homozygous reference - * - * Assumes the sample is diploid - * - * @param depth the depth of the sample - * @param log10OfTheta the heterozygosity of this species (in log10-space) - * - * @return a valid log10 probability of the sample being hom-ref - */ - protected final double estimateLog10ReferenceConfidenceForOneSample(final int depth, final double log10OfTheta) { - Utils.validateArg(depth >= 0, "depth may not be negative"); - final double log10PofNonRef = log10OfTheta + MathUtils.log10BinomialProbability(depth, 0); - return MathUtils.log10OneMinusX(Math.pow(10.0, log10PofNonRef)); - } - protected final boolean passesEmitThreshold(final double conf, final boolean bestGuessIsRef) { return (configuration.outputMode == OutputMode.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) && passesCallThreshold(conf); @@ -616,23 +475,10 @@ protected final boolean passesCallThreshold(final double conf) { return conf >= configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING; } - protected Map composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc, - final AlignmentContext rawContext, final Map stratifiedContexts, final FeatureContext tracker, final ReferenceContext refContext, final List alleleCountsofMLE, final boolean bestGuessIsRef, - final AFCalculationResult AFresult, final List allAllelesToUse, final GenotypesContext genotypes, - final GenotypeLikelihoodsCalculationModel model, final ReadLikelihoods likelihoods) { + protected Map composeCallAttributes(final VariantContext vc, final List alleleCountsofMLE, + final AFCalculationResult AFresult, final List allAllelesToUse, final GenotypesContext genotypes) { final Map attributes = new LinkedHashMap<>(); - final boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; - - // inherit attributes from input vc if requested - if (inheritAttributesFromInputVC) { - attributes.putAll(vc.getAttributes()); - } - // if the site was down-sampled, record that fact - if ( !limitedContext && rawContext.hasPileupBeenDownsampled() ) { - attributes.put(GATKVCFConstants.DOWNSAMPLED_KEY, true); - } - // add the MLE AC and AF annotations if (!alleleCountsofMLE.isEmpty()) { attributes.put(GATKVCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java deleted file mode 100644 index f32ef8a7b00..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java +++ /dev/null @@ -1,65 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper; - -import com.google.common.annotations.VisibleForTesting; -import htsjdk.samtools.util.Locatable; -import htsjdk.variant.variantcontext.VariantContext; -import org.broadinstitute.hellbender.engine.FeatureContext; -import org.broadinstitute.hellbender.engine.FeatureInput; -import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; - -import java.util.List; -import java.util.stream.Collectors; - -import static org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED; -import static org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils.FilteredRecordMergeType.KEEP_UNCONDITIONAL; - -/** - * Compendium of utils to work in GENOTYPE_GIVEN_ALLELES mode. - */ -public final class GenotypingGivenAllelesUtils { - - /** - * Composes the given allele variant-context providing information about the given variant alleles and reference location. - * @param tracker the meta data tracker. - * @param loc the query location. - * @param keepFiltered whether to include filtered variants - * @param allelesBinding the target variation context binding containing the given alleles. - * @return never {@code null} - */ - public static VariantContext composeGivenAllelesVariantContextFromVariantList(final FeatureContext tracker, - final Locatable loc, - final boolean keepFiltered, - final FeatureInput allelesBinding) { - Utils.nonNull(tracker, "tracker may not be null"); - Utils.nonNull(loc, "location may not be null"); - Utils.nonNull(allelesBinding, "alleles binding may not be null"); - - final List variantContextsInFeatureContext = tracker.getValues(allelesBinding, new SimpleInterval(loc)); - return composeGivenAllelesVariantContextFromVariantList(variantContextsInFeatureContext, loc, keepFiltered); - } - - @VisibleForTesting - protected static VariantContext composeGivenAllelesVariantContextFromVariantList(final List variantContextsInFeatureContext, - final Locatable loc, - final boolean keepFiltered) { - final List vcsAtLoc = variantContextsInFeatureContext - .stream() - .filter(vc -> vc.getStart() == loc.getStart() && - (keepFiltered || vc.isNotFiltered())) - .collect(Collectors.toList()); - - - if (vcsAtLoc.isEmpty()) { - return null; - } - final List haplotypeSources = vcsAtLoc.stream().map(VariantContext::getSource).collect(Collectors.toList()); - final VariantContext mergedVc = GATKVariantContextUtils.simpleMerge(vcsAtLoc, haplotypeSources, - keepFiltered ? KEEP_UNCONDITIONAL : KEEP_IF_ANY_UNFILTERED, - GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false); - - return mergedVc; - } - -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingOutputMode.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingOutputMode.java deleted file mode 100644 index 2576fc2122a..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingOutputMode.java +++ /dev/null @@ -1,23 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper; - -/** - * Enumeration of possible genotyping modes. - * - *

- * A genotyping mode represents the general approach taken when detecting and calculate variant genotypes. - *

- * - * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> - */ -public enum GenotypingOutputMode { - - /** - * The genotyper will choose the most likely alternate allele - */ - DISCOVERY, - - /** - * Only the alleles passed by the user should be considered. - */ - GENOTYPE_GIVEN_ALLELES -} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/MinimalGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/MinimalGenotypingEngine.java index ab355d9e5b8..ce714dc357b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/MinimalGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/MinimalGenotypingEngine.java @@ -35,9 +35,7 @@ public MinimalGenotypingEngine(final UnifiedArgumentCollection configuration, fi final AFCalculatorProvider afCalculatorProvider, boolean doAlleleSpecificCalcs ) { super(configuration, samples, afCalculatorProvider, doAlleleSpecificCalcs); - if ( configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - throw new UserException("GENOTYPE_GIVEN_ALLELES mode not supported in the MinimalGenotypingEngine"); - } else if ( configuration.GLmodel != GenotypeLikelihoodsCalculationModel.SNP ) { + if ( configuration.GLmodel != GenotypeLikelihoodsCalculationModel.SNP ) { throw new UserException("Only the diploid SNP model is supported in the MinimalGenotypingEngine"); } else if ( configuration.COMPUTE_SLOD ) { throw new UserException("--computeSLOD not supported in the MinimalGenotypingEngine"); @@ -46,7 +44,7 @@ public MinimalGenotypingEngine(final UnifiedArgumentCollection configuration, fi @Override protected boolean forceKeepAllele(final Allele allele) { - return configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs; + return configuration.annotateAllSitesWithPLs; } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/OutputMode.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/OutputMode.java index b3965dd441d..09549112a5b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/OutputMode.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/OutputMode.java @@ -11,7 +11,6 @@ public enum OutputMode { EMIT_ALL_CONFIDENT_SITES, /** produces calls at any callable site regardless of confidence; this argument is intended only for point - * mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by - * no means produce a comprehensive set of indels in DISCOVERY mode */ + * mutations (SNPs); it will not produce a comprehensive set of indels. */ EMIT_ALL_SITES } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/StandardCallerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/StandardCallerArgumentCollection.java index c1767cd59b8..36bc64c3063 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/StandardCallerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/StandardCallerArgumentCollection.java @@ -30,8 +30,6 @@ public void copyStandardCallerArgsFrom( final StandardCallerArgumentCollection o Utils.nonNull(other); this.genotypeArgs = new GenotypeCalculationArgumentCollection(other.genotypeArgs); - this.genotypingOutputMode = other.genotypingOutputMode; - this.alleles = other.alleles; // FeatureInputs are immutable outside of the engine, so this shallow copy is safe this.CONTAMINATION_FRACTION = other.CONTAMINATION_FRACTION; this.CONTAMINATION_FRACTION_FILE = other.CONTAMINATION_FRACTION_FILE != null ? new File(other.CONTAMINATION_FRACTION_FILE.getAbsolutePath()) : null; if ( other.sampleContamination != null ) { @@ -46,22 +44,6 @@ public void copyStandardCallerArgsFrom( final StandardCallerArgumentCollection o @ArgumentCollection public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection(); - @Argument(fullName = "genotyping-mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", optional=true) - public GenotypingOutputMode genotypingOutputMode = GenotypingOutputMode.DISCOVERY; - - /** - * When the caller is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding - */ - @Argument(fullName="alleles", doc="The set of alleles at which to genotype when --genotyping-mode is GENOTYPE_GIVEN_ALLELES", optional=true) - public FeatureInput alleles; - - /** - * When set to true an when in GENOTYPE_GIVEN_ALLELES mode all given alleles, even filtered ones, are genotyped - */ - @Advanced - @Argument(fullName = "genotype-filtered-alleles", doc = "Whether to genotype all given alleles, even filtered ones, --genotyping-mode is GENOTYPE_GIVEN_ALLELES", optional = true) - public boolean genotypeFilteredAlleles = false; - /** * If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads. * Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/VariantCallContext.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/VariantCallContext.java deleted file mode 100644 index a1bf2d078a6..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/VariantCallContext.java +++ /dev/null @@ -1,50 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper; - -import htsjdk.variant.variantcontext.VariantContext; - -/** - * Useful helper class to communicate the results of calculateGenotype to framework - */ -public final class VariantCallContext extends VariantContext { - - private static final long serialVersionUID = 1L; - - // Was the site called confidently, either reference or variant? - private boolean confidentlyCalled = false; - - // Should this site be emitted? - private boolean shouldEmit = true; - - VariantCallContext(VariantContext vc, boolean confidentlyCalledP) { - super(vc); - this.confidentlyCalled = confidentlyCalledP; - } - - VariantCallContext(VariantContext vc, boolean confidentlyCalledP, boolean shouldEmit) { - super(vc); - this.confidentlyCalled = confidentlyCalledP; - this.shouldEmit = shouldEmit; - } - - /* these methods are only implemented for GENOTYPE_GIVEN_ALLELES MODE */ - //todo -- expand these methods to all modes - - /** - * - * @param callConfidenceThreshold the Unified Argument Collection STANDARD_CONFIDENCE_FOR_CALLING - * @return true if call was confidently ref - */ - public boolean isCalledRef(double callConfidenceThreshold) { - return (confidentlyCalled && (getPhredScaledQual() < callConfidenceThreshold)); - } - - /** - * - * @param callConfidenceThreshold the Unified Argument Collection STANDARD_CONFIDENCE_FOR_CALLING - * @return true if call was confidently alt - */ - public boolean isCalledAlt(double callConfidenceThreshold) { - return (confidentlyCalled && (getPhredScaledQual() > callConfidenceThreshold)); - } - -} \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java index 14ac333d8de..9d8b02483e6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java @@ -1,9 +1,11 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; +import htsjdk.variant.variantcontext.VariantContext; import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; import org.broadinstitute.barclay.argparser.Hidden; +import org.broadinstitute.hellbender.engine.FeatureInput; import org.broadinstitute.hellbender.tools.walkers.genotyper.StandardCallerArgumentCollection; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; import org.broadinstitute.hellbender.utils.haplotype.HaplotypeBAMWriter; @@ -125,4 +127,11 @@ public ReadThreadingAssembler createReadThreadingAssembler() { @Argument(fullName = MAX_MNP_DISTANCE_LONG_NAME, shortName = MAX_MNP_DISTANCE_SHORT_NAME, doc = "Two or more phased substitutions separated by this distance or less are merged into MNPs.", optional = true) public int maxMnpDistance = getDefaultMaxMnpDistance(); + + @Argument(fullName="alleles", doc="The set of alleles for which to force genotyping regardless of evidence", optional=true) + public FeatureInput alleles; + + @Advanced + @Argument(fullName = "genotype-filtered-alleles", doc = "Whether to force genotype even filtered alleles", optional = true) + public boolean forceCallFiltered = false; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java index 45bdbfd401e..3c9f5d0bcd4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java @@ -8,6 +8,7 @@ import htsjdk.samtools.util.Locatable; import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.Logger; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy; @@ -527,20 +528,15 @@ public static List getVariantContextsFromActiveHaplotypes(final /** * Returns a mapping from Allele in the mergedVC, which represents all of the alleles being genotyped at loc, - * to a list of Haplotypes that support that allele. If activeAllelesToGenotype contains any entries, haplotypes supporting - * spanning events that do not start at this location are included only if they match one of the given alleles, a - * necessary check for the desired behavior of HaplotypeCaller's genotype given alleles mode. Otherwise, if the mergedVC - * includes a spanning deletion allele, all haplotypes that support spanning deletions will be assigned to that allele in the map. + * to a list of Haplotypes that support that allele. If the mergedVC includes a spanning deletion allele, all haplotypes + * that support spanning deletions will be assigned to that allele in the map. + * * @param mergedVC The merged variant context for the locus, which includes all active alternate alleles merged to a single reference allele * @param loc The active locus being genotyped * @param haplotypes Haplotypes for the current active region - * @param activeAllelesToGenotype Given alleles being genotyped in the active region, if running in GGA mode; can be null or empty otherwise * @return */ - public static Map> createAlleleMapper(final VariantContext mergedVC, - final int loc, - final List haplotypes, - final List activeAllelesToGenotype) { + public static Map> createAlleleMapper(final VariantContext mergedVC, final int loc, final List haplotypes) { final Map> result = new LinkedHashMap<>(); @@ -591,23 +587,10 @@ public static Map> createAlleleMapper(final VariantConte } else { // the event starts prior to the current location, so it's a spanning deletion - if (activeAllelesToGenotype != null && activeAllelesToGenotype.size() > 0) { - // in HC GGA mode we need to check to make sure that spanning deletion - // events actually match one of the alleles we were given to genotype - final boolean eventMatchesGivenAllele = eventMatchesGivenAllele(activeAllelesToGenotype, spanningEvent); - if (eventMatchesGivenAllele) { - if (!result.containsKey(Allele.SPAN_DEL)) { - result.put(Allele.SPAN_DEL, new ArrayList<>()); - } - result.get(Allele.SPAN_DEL).add(h); - } - - } else { - if (! result.containsKey(Allele.SPAN_DEL)) { - result.put(Allele.SPAN_DEL, new ArrayList<>()); - } - result.get(Allele.SPAN_DEL).add(h); + if (! result.containsKey(Allele.SPAN_DEL)) { + result.put(Allele.SPAN_DEL, new ArrayList<>()); } + result.get(Allele.SPAN_DEL).add(h); break; } } @@ -872,4 +855,34 @@ private static VariantContext phaseVC(final VariantContext vc, final String ID, return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make(); } + public static Set getAllelesConsistentWithGivenAlleles(final List givenAlleles, final VariantContext mergedVC) { + if (givenAlleles.isEmpty()) { + return Collections.emptySet(); + } + + final List> givenAltAndRefAllelesInOriginalContext = getVariantContextsFromGivenAlleles(mergedVC.getStart(), givenAlleles, false).stream() + .flatMap(vc -> vc.getAlternateAlleles().stream().map(allele -> ImmutablePair.of(allele, vc.getReference()))).collect(Collectors.toList()); + + return mergedVC.getAlternateAlleles().stream() + .map(allele -> ImmutablePair.of(allele, mergedVC.getReference())) + .filter(altAndRef -> givenAltAndRefAllelesInOriginalContext.stream().anyMatch(givenAltAndRef -> allelesAreConsistent(givenAltAndRef, altAndRef))) + .map(altAndRefPair -> altAndRefPair.getLeft()) + .collect(Collectors.toSet()); + } + + // check whether two alleles coming from different variant contexts and with possibly different reference alleles + // could in fact be the same. The condition is that one is a prefix of the other + private static boolean allelesAreConsistent(final Pair altAndRef1, final Pair altAndRef2) { + final Allele alt1 = altAndRef1.getLeft(); + final Allele alt2 = altAndRef2.getLeft(); + if (alt1.isSymbolic() || alt2.isSymbolic()) { + return false; + } else { + final int sizeDiff1 = alt1.length() - altAndRef1.getRight().length(); + final int sizeDiff2 = alt2.length() - altAndRef2.getRight().length(); + return (sizeDiff1 == sizeDiff2) && (alt1.length() < alt2.length() ? + alt1.basesMatch(Arrays.copyOf(alt2.getBases(), alt1.length())) : + alt2.basesMatch(Arrays.copyOf(alt1.getBases(), alt2.length()))); + } + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyRegionTrimmer.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyRegionTrimmer.java index 56e31a8d52b..4c98cddba69 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyRegionTrimmer.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyRegionTrimmer.java @@ -31,7 +31,7 @@ public final class AssemblyRegionTrimmer { private boolean debug; /** - * Holds the extension to be used based on whether GGA mode is on or off. + * Holds the extension to be used */ private int usableExtension; @@ -58,7 +58,6 @@ public final class AssemblyRegionTrimmer { * * @param assemblyArgs user arguments for the trimmer * @param sequenceDictionary dictionary to determine the bounds of contigs - * @param isGGA whether the trimming region calculator should act as if we are in GGA mode or not. * @param emitReferenceConfidence indicates whether we plan to use this trimmer to generate trimmed regions * to be used for emitting reference confidence. * @@ -66,7 +65,7 @@ public final class AssemblyRegionTrimmer { * @throws IllegalArgumentException if the input location parser is {@code null}. * @throws CommandLineException.BadArgumentValue if any of the user argument values is invalid. */ - public void initialize(final ReadThreadingAssemblerArgumentCollection assemblyArgs, final SAMSequenceDictionary sequenceDictionary, final boolean isGGA, final boolean emitReferenceConfidence) { + public void initialize(final ReadThreadingAssemblerArgumentCollection assemblyArgs, final SAMSequenceDictionary sequenceDictionary, final boolean emitReferenceConfidence) { Utils.validate(this.assemblyArgs == null, () -> getClass().getSimpleName() + " instance initialized twice"); this.assemblyArgs = Utils.nonNull(assemblyArgs);; @@ -74,7 +73,7 @@ public void initialize(final ReadThreadingAssemblerArgumentCollection assemblyAr checkUserArguments(); this.debug = assemblyArgs.debugAssembly; - usableExtension = isGGA ? this.assemblyArgs.ggaExtension : this.assemblyArgs.discoverExtension; + usableExtension = this.assemblyArgs.extension; this.emitReferenceConfidence = emitReferenceConfidence; } @@ -90,11 +89,8 @@ private void checkUserArguments() { if ( assemblyArgs.indelPadding < 0 ) { throw new CommandLineException.BadArgumentValue("paddingAroundIndels", "" + assemblyArgs.indelPadding + "< 0"); } - if ( assemblyArgs.discoverExtension < 0) { - throw new CommandLineException.BadArgumentValue("maxDiscARExtension", "" + assemblyArgs.discoverExtension + "< 0"); - } - if ( assemblyArgs.ggaExtension < 0) { - throw new CommandLineException.BadArgumentValue("maxGGAAREExtension", "" + assemblyArgs.ggaExtension + "< 0"); + if ( assemblyArgs.extension < 0) { + throw new CommandLineException.BadArgumentValue("maxDiscARExtension", "" + assemblyArgs.extension + "< 0"); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java index 8ba196e6141..77eee13841a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java @@ -108,11 +108,6 @@ protected ReadThreadingAssemblerArgumentCollection getReadThreadingAssemblerArgu @Argument(fullName = "indel-size-to-eliminate-in-ref-model", doc = "The size of an indel to check for in the reference model", optional = true) public int indelSizeToEliminateInRefModel = 10; - - @Advanced - @Argument(fullName = "use-alleles-trigger", doc = "Use additional trigger on variants found in an external alleles file", optional = true) - public boolean USE_ALLELES_TRIGGER = false; - /** * If set, certain "early exit" optimizations in HaplotypeCaller, which aim to save compute and time by skipping * calculations if an ActiveRegion is determined to contain no variants, will be disabled. This is most likely to be useful if diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index ffc22168928..f5b73746003 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -92,6 +92,8 @@ public final class HaplotypeCallerEngine implements AssemblyRegionEvaluator { private Set sampleSet; private SampleList samplesList; + private final boolean forceCallingAllelesPresent; + private byte minTailQuality; private SmithWatermanAligner aligner; @@ -156,6 +158,7 @@ public HaplotypeCallerEngine( final HaplotypeCallerArgumentCollection hcArgs, bo this.referenceReader = Utils.nonNull(referenceReader); this.annotationEngine = Utils.nonNull(annotationEngine); this.aligner = SmithWatermanAligner.getAligner(hcArgs.smithWatermanImplementation); + forceCallingAllelesPresent = hcArgs.alleles != null; initialize(createBamOutIndex, createBamOutMD5); } @@ -215,8 +218,7 @@ private void initialize(boolean createBamOutIndex, final boolean createBamOutMD5 assemblyEngine = hcArgs.createReadThreadingAssembler(); likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(hcArgs.likelihoodArgs); - trimmer.initialize(hcArgs.assemblerArgs, readsHeader.getSequenceDictionary(), - hcArgs.standardArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES, emitReferenceConfidence()); + trimmer.initialize(hcArgs.assemblerArgs, readsHeader.getSequenceDictionary(), emitReferenceConfidence()); } private boolean isVCFMode() { @@ -244,10 +246,6 @@ private void validateAndInitializeArgs() { } if ( emitReferenceConfidence() ) { - if ( hcArgs.standardArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) { - throw new CommandLineException.BadArgumentValue("ERC/gt_mode", "you cannot request reference confidence output and GENOTYPE_GIVEN_ALLELES at the same time"); - } - hcArgs.standardArgs.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = -0.0; logger.info("Standard Emitting and Calling confidence set to 0.0 for reference-model confidence output"); @@ -264,10 +262,6 @@ private void validateAndInitializeArgs() { hcArgs.standardArgs.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(hcArgs.standardArgs.CONTAMINATION_FRACTION_FILE, hcArgs.standardArgs.CONTAMINATION_FRACTION, sampleSet, logger)); } - if ( hcArgs.standardArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && hcArgs.assemblerArgs.consensusMode() ) { - throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode at the same time. Please choose one or the other."); - } - Utils.validateArg(hcArgs.likelihoodArgs.BASE_QUALITY_SCORE_THRESHOLD >= QualityUtils.MIN_USABLE_Q_SCORE, "BASE_QUALITY_SCORE_THRESHOLD must be greater than or equal to " + QualityUtils.MIN_USABLE_Q_SCORE + " (QualityUtils.MIN_USABLE_Q_SCORE)"); if ( emitReferenceConfidence() && samplesList.numberOfSamples() != 1 ) { @@ -306,7 +300,6 @@ private void initializeActiveRegionEvaluationGenotyperEngine() { simpleUAC.copyStandardCallerArgsFrom(hcArgs.standardArgs); simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY; - simpleUAC.genotypingOutputMode = GenotypingOutputMode.DISCOVERY; simpleUAC.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = Math.min(MAXMIN_CONFIDENCE_FOR_CONSIDERING_A_SITE_AS_POSSIBLE_VARIANT_IN_ACTIVE_REGION_DISCOVERY, hcArgs.standardArgs.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling simpleUAC.CONTAMINATION_FRACTION = 0.0; simpleUAC.CONTAMINATION_FRACTION_FILE = null; @@ -450,17 +443,8 @@ public void writeHeader( final VariantContextWriter vcfWriter, final SAMSequence */ @Override public ActivityProfileState isActive( final AlignmentContext context, final ReferenceContext ref, final FeatureContext features ) { - - if ( hcArgs.standardArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - final VariantContext vcFromGivenAlleles = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(features, - ref.getInterval(), hcArgs.standardArgs.genotypeFilteredAlleles, hcArgs.standardArgs.alleles); - if( vcFromGivenAlleles != null ) { - return new ActivityProfileState(ref.getInterval(), 1.0); - } - } - - if ( hcArgs.USE_ALLELES_TRIGGER ) { - return new ActivityProfileState( ref.getInterval(), features.getValues(hcArgs.standardArgs.alleles, ref.getInterval()).size() > 0 ? 1.0 : 0.0 ); + if ( forceCallingAllelesPresent && features.getValues(hcArgs.alleles, ref).stream().anyMatch(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered())) { + return new ActivityProfileState(ref.getInterval(), 1.0); } if( context == null || context.getBasePileup().isEmpty() ) { @@ -500,7 +484,7 @@ public ActivityProfileState isActive( final AlignmentContext context, final Refe // This is the case when doing GVCF output. isActiveProb = activeRegionEvaluationGenotyperEngine.calculateSingleSampleRefVsAnyActiveStateProfileValue(genotypes.get(0).getLikelihoods().getAsVector()); } else { - final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getEnd(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.SNP, readsHeader); + final VariantContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getEnd(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.SNP); isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb(vcOut.getPhredScaledQual()); } return new ActivityProfileState(ref.getInterval(), isActiveProb, averageHQSoftClips.mean() > AVERAGE_HQ_SOFTCLIPS_HQ_BASES_THRESHOLD ? ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileState.Type.NONE, averageHQSoftClips.mean() ); @@ -533,15 +517,10 @@ public List callRegion(final AssemblyRegion region, final Featur return referenceModelForNoVariation(region, true, VCpriors); } - final List givenAlleles = new ArrayList<>(); - if ( hcArgs.standardArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - features.getValues(hcArgs.standardArgs.alleles).stream().filter(vc -> hcArgs.standardArgs.genotypeFilteredAlleles || vc.isNotFiltered()).forEach(givenAlleles::add); + final List givenAlleles = features.getValues(hcArgs.alleles).stream() + .filter(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered()).collect(Collectors.toList()); - // No alleles found in this region so nothing to do! - if ( givenAlleles.isEmpty() ) { - return referenceModelForNoVariation(region, true, VCpriors); - } - } else if( region.size() == 0 ) { + if( givenAlleles.isEmpty() && region.size() == 0 ) { // No reads here so nothing to do! return referenceModelForNoVariation(region, true, VCpriors); } @@ -616,7 +595,7 @@ public List callRegion(final AssemblyRegion region, final Featur assemblyResult.getPaddedReferenceLoc(), regionForGenotyping.getSpan(), features, - (hcArgs.assemblerArgs.consensusMode() ? Collections.emptyList() : givenAlleles), + givenAlleles, emitReferenceConfidence(), hcArgs.maxMnpDistance, readsHeader, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index d3eef7f972e..43f2f4ebbf0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -72,7 +72,7 @@ public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection c @Override protected boolean forceSiteEmission() { - return configuration.outputMode == OutputMode.EMIT_ALL_SITES || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES; + return configuration.outputMode == OutputMode.EMIT_ALL_SITES; } @Override @@ -82,8 +82,7 @@ protected String callSourceString() { @Override protected boolean forceKeepAllele(final Allele allele) { - return allele.equals(Allele.NON_REF_ALLELE,false) || referenceConfidenceMode != ReferenceConfidenceMode.NONE - || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES; + return allele.equals(Allele.NON_REF_ALLELE,false) || referenceConfidenceMode != ReferenceConfidenceMode.NONE; } @@ -100,7 +99,7 @@ protected boolean forceKeepAllele(final Allele allele) { * @param ref Reference bytes at active region * @param refLoc Corresponding active region genome location * @param activeRegionWindow Active window - * @param activeAllelesToGenotype Alleles to genotype + * @param givenAlleles Alleles to genotype * @param emitReferenceConfidence whether we should add a <NON_REF> alternative allele to the result variation contexts. * @param maxMnpDistance Phased substitutions separated by this distance or less are merged into MNPs. More than * two substitutions occuring in the same alignment block (ie the same M/X/EQ CIGAR element) @@ -119,7 +118,7 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List haplotyp final SimpleInterval refLoc, final SimpleInterval activeRegionWindow, final FeatureContext tracker, - final List activeAllelesToGenotype, + final List givenAlleles, final boolean emitReferenceConfidence, final int maxMnpDistance, final SAMFileHeader header, @@ -131,13 +130,13 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List haplotyp Utils.nonNull(refLoc, "refLoc must be non-null"); Utils.validateArg(refLoc.size() == ref.length, " refLoc length must match ref bytes"); Utils.nonNull(activeRegionWindow, "activeRegionWindow must be non-null"); - Utils.nonNull(activeAllelesToGenotype, "activeAllelesToGenotype must be non-null"); + Utils.nonNull(givenAlleles, "givenAlleles must be non-null"); Utils.validateArg(refLoc.contains(activeRegionWindow), "refLoc must contain activeRegionWindow"); ParamUtils.isPositiveOrZero(maxMnpDistance, "maxMnpDistance may not be negative."); // update the haplotypes so we're ready to call, getting the ordered list of positions on the reference // that carry events among the haplotypes - final SortedSet startPosKeySet = decomposeHaplotypesIntoVariantContexts(haplotypes, ref, refLoc, activeAllelesToGenotype, maxMnpDistance); + final SortedSet startPosKeySet = EventMap.buildEventMapsForHaplotypes(haplotypes, ref, refLoc, hcArgs.assemblerArgs.debugAssembly, maxMnpDistance); // Walk along each position in the key set and create each event to be outputted final Set calledHaplotypes = new HashSet<>(); @@ -155,27 +154,20 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List haplotyp continue; } - final List activeEventVariantContexts; - if( activeAllelesToGenotype.isEmpty() ) { - activeEventVariantContexts = AssemblyBasedCallerUtils.getVariantContextsFromActiveHaplotypes(loc, haplotypes, true); - } else { // we are in GGA mode! - activeEventVariantContexts = AssemblyBasedCallerUtils.getVariantContextsFromGivenAlleles(loc, activeAllelesToGenotype, true); - } + final List eventsAtThisLoc = AssemblyBasedCallerUtils.getVariantContextsFromActiveHaplotypes(loc, haplotypes, true); - final List eventsAtThisLocWithSpanDelsReplaced = - replaceSpanDels(activeEventVariantContexts, - Allele.create(ref[loc - refLoc.getStart()], true), - loc); + final List eventsAtThisLocWithSpanDelsReplaced = replaceSpanDels(eventsAtThisLoc, + Allele.create(ref[loc - refLoc.getStart()], true), loc); VariantContext mergedVC = AssemblyBasedCallerUtils.makeMergedVariantContext(eventsAtThisLocWithSpanDelsReplaced); if( mergedVC == null ) { continue; } - + int mergedAllelesListSizeBeforePossibleTrimming = mergedVC.getAlleles().size(); - final Map> alleleMapper = AssemblyBasedCallerUtils.createAlleleMapper(mergedVC, loc, haplotypes, activeAllelesToGenotype); + final Map> alleleMapper = AssemblyBasedCallerUtils.createAlleleMapper(mergedVC, loc, haplotypes); if( hcArgs.assemblerArgs.debugAssembly && logger != null ) { logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); @@ -195,7 +187,7 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List haplotyp } final GenotypesContext genotypes = calculateGLsForThisEvent(readAlleleLikelihoods, mergedVC, noCallAlleles); - final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), getGLModel(mergedVC), header); + final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), getGLModel(mergedVC), givenAlleles); if( call != null ) { readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList, @@ -419,42 +411,6 @@ protected static GenotypeLikelihoodsCalculationModel getGLModel(final VariantCon return isSNP ? GenotypeLikelihoodsCalculationModel.SNP : GenotypeLikelihoodsCalculationModel.INDEL; } - /** - * Go through the haplotypes we assembled, and decompose them into their constituent variant contexts - * - * @param haplotypes the list of haplotypes we're working with - * @param ref the reference bases (over the same interval as the haplotypes) - * @param refLoc the span of the reference bases - * @param activeAllelesToGenotype alleles we want to ensure are scheduled for genotyping (GGA mode) - * @param maxMnpDistance Phased substitutions separated by this distance or less are merged into MNPs. More than - * two substitutions occurring in the same alignment block (ie the same M/X/EQ CIGAR element) - * are merged until a substitution is separated from the previous one by a greater distance. - * That is, if maxMnpDistance = 1, substitutions at positions 10,11,12,14,15,17 are partitioned into a MNP - * at 10-12, a MNP at 14-15, and a SNP at 17. - * @return never {@code null} but perhaps an empty list if there is no variants to report. - */ - private TreeSet decomposeHaplotypesIntoVariantContexts(final List haplotypes, - final byte[] ref, - final SimpleInterval refLoc, - final List activeAllelesToGenotype, - final int maxMnpDistance) { - final boolean inGGAMode = ! activeAllelesToGenotype.isEmpty(); - - // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file - // IMPORTANT NOTE: This needs to be done even in GGA mode, as this method call has the side effect of setting the - // event maps in the Haplotype objects! - final TreeSet startPosKeySet = EventMap.buildEventMapsForHaplotypes(haplotypes, ref, refLoc, hcArgs.assemblerArgs.debugAssembly, maxMnpDistance); - - if ( inGGAMode ) { - startPosKeySet.clear(); - for( final VariantContext compVC : activeAllelesToGenotype ) { - startPosKeySet.add(compVC.getStart()); - } - } - - return startPosKeySet; - } - // Builds the read-likelihoods collection to use for annotation considering user arguments and the collection // used for genotyping. private ReadLikelihoods prepareReadAlleleLikelihoodsForAnnotation( diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerReadThreadingAssemblerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerReadThreadingAssemblerArgumentCollection.java index 27e5ddc835b..9b1148fe416 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerReadThreadingAssemblerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerReadThreadingAssemblerArgumentCollection.java @@ -32,14 +32,6 @@ public class HaplotypeCallerReadThreadingAssemblerArgumentCollection extends Rea @Argument(fullName="recover-dangling-heads", doc="This argument is deprecated since version 3.3", optional = true) public boolean DEPRECATED_RecoverDanglingHeads = false; - /** - * This argument is specifically intended for 1000G consensus analysis mode. Setting this flag will inject all - * provided alleles to the assembly graph but will not forcibly genotype all of them. - */ - @Advanced - @Argument(fullName="consensus", doc="1000G consensus mode", optional = true) - public boolean consensusMode = false; - @Override public ReadThreadingAssembler makeReadThreadingAssembler() { final ReadThreadingAssembler assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, @@ -59,7 +51,4 @@ public ReadThreadingAssembler makeReadThreadingAssembler() { return assemblyEngine; } - - @Override - public boolean consensusMode() { return consensusMode; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReadThreadingAssemblerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReadThreadingAssemblerArgumentCollection.java index 7420451eea9..b481a001185 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReadThreadingAssemblerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/ReadThreadingAssemblerArgumentCollection.java @@ -36,12 +36,8 @@ public abstract class ReadThreadingAssemblerArgumentCollection implements Serial * the maximum extent into the full active region extension that we're willing to go in genotyping our events */ @Hidden - @Argument(fullName="max-disc-ar-extension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for discovery", optional = true) - protected int discoverExtension = 25; - - @Hidden - @Argument(fullName="max-gga-ar-extension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for GGA mode", optional = true) - protected int ggaExtension = 300; + @Argument(fullName="max-extension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping", optional = true) + protected int extension = 25; /** * Include at least this many bases around an event for calling it @@ -201,6 +197,4 @@ public abstract class ReadThreadingAssemblerArgumentCollection implements Serial public int minObservationsForKmerToBeSolid = 20; public abstract ReadThreadingAssembler makeReadThreadingAssembler(); - - public boolean consensusMode() { return false; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java index dfd339b6271..d90a882cb58 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2ArgumentCollection.java @@ -250,11 +250,4 @@ public double getInitialLogOdds() { @Advanced @Argument(fullName = "minimum-allele-fraction", shortName = "min-AF", doc = "Lower bound of variant allele fractions to consider when calculating variant LOD", optional = true) public double minAF = 0.00; - - @Argument(fullName="alleles", doc="The set of alleles for which to force genotyping regardless of evidence", optional=true) - public FeatureInput alleles; - - @Advanced - @Argument(fullName = "genotype-filtered-alleles", doc = "Whether to force genotype even filtered alleles", optional = true) - public boolean genotypeFilteredAlleles = false; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 2c9c8b56c76..0892b7bc51a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -21,7 +21,6 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation; import org.broadinstitute.hellbender.tools.walkers.annotator.StandardMutectAnnotation; import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine; -import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypingGivenAllelesUtils; import org.broadinstitute.hellbender.tools.walkers.genotyper.HomogeneousPloidyModel; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; @@ -140,7 +139,7 @@ public Mutect2Engine(final M2ArgumentCollection MTAC, final boolean createBamOut likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(MTAC.likelihoodArgs); genotypingEngine = new SomaticGenotypingEngine(MTAC, normalSamples, annotationEngine); haplotypeBAMWriter = AssemblyBasedCallerUtils.createBamWriter(MTAC, createBamOutIndex, createBamOutMD5, header); - trimmer.initialize(MTAC.assemblerArgs, header.getSequenceDictionary(), forceCallingAllelesPresent, emitReferenceConfidence()); + trimmer.initialize(MTAC.assemblerArgs, header.getSequenceDictionary(), emitReferenceConfidence()); referenceConfidenceModel = new SomaticReferenceConfidenceModel(samplesList, header, 0, genotypingEngine); //TODO: do something classier with the indel size arg final List tumorSamples = ReadUtils.getSamplesFromHeader(header).stream().filter(this::isTumorSample).collect(Collectors.toList()); f1R2CountsCollector = MTAC.f1r2TarGz == null ? Optional.empty() : Optional.of(new F1R2CountsCollector(MTAC.f1r2Args, header, MTAC.f1r2TarGz, tumorSamples)); @@ -219,7 +218,7 @@ public List callRegion(final AssemblyRegion originalAssemblyRegi removeUnmarkedDuplicates(originalAssemblyRegion); final List givenAlleles = featureContext.getValues(MTAC.alleles).stream() - .filter(vc -> MTAC.genotypeFilteredAlleles || vc.isNotFiltered()).collect(Collectors.toList()); + .filter(vc -> MTAC.forceCallFiltered || vc.isNotFiltered()).collect(Collectors.toList()); final AssemblyRegion assemblyActiveRegion = AssemblyBasedCallerUtils.assemblyRegionWithWellMappedReads(originalAssemblyRegion, READ_QUALITY_FILTER_THRESHOLD, header); final AssemblyResultSet untrimmedAssemblyResult = AssemblyBasedCallerUtils.assembleReads(assemblyActiveRegion, givenAlleles, MTAC, header, samplesList, logger, referenceReader, assemblyEngine, aligner, false); @@ -355,13 +354,9 @@ public void shutdown() { } @Override - public ActivityProfileState isActive(final AlignmentContext context, final ReferenceContext ref, final FeatureContext featureContext) { - if ( forceCallingAllelesPresent ) { - final VariantContext vcFromGivenAlleles = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(featureContext, - ref.getInterval(), MTAC.genotypeFilteredAlleles, MTAC.alleles); - if( vcFromGivenAlleles != null ) { - return new ActivityProfileState(ref.getInterval(), 1.0); - } + public ActivityProfileState isActive(final AlignmentContext context, final ReferenceContext ref, final FeatureContext features) { + if ( forceCallingAllelesPresent && features.getValues(MTAC.alleles, ref).stream().anyMatch(vc -> MTAC.forceCallFiltered || vc.isNotFiltered())) { + return new ActivityProfileState(ref.getInterval(), 1.0); } final byte refBase = ref.getBase(); @@ -391,7 +386,7 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer return new ActivityProfileState(refInterval, 0.0); } } else if (!MTAC.genotypeGermlineSites) { - final List germline = featureContext.getValues(MTAC.germlineResource, refInterval); + final List germline = features.getValues(MTAC.germlineResource, refInterval); if (!germline.isEmpty()){ final VariantContext germlineVariant = germline.get(0); final List germlineAlleleFrequencies = getAttributeAsDoubleList(germlineVariant, VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); @@ -401,7 +396,7 @@ public ActivityProfileState isActive(final AlignmentContext context, final Refer } } - if (!MTAC.genotypePonSites && !featureContext.getValues(MTAC.pon, new SimpleInterval(context.getContig(), (int) context.getPosition(), (int) context.getPosition())).isEmpty()) { + if (!MTAC.genotypePonSites && !features.getValues(MTAC.pon, new SimpleInterval(context.getContig(), (int) context.getPosition(), (int) context.getPosition())).isEmpty()) { return new ActivityProfileState(refInterval, 0.0); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java index 2d455fd4570..4558c462a18 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java @@ -7,8 +7,6 @@ import htsjdk.variant.variantcontext.*; import htsjdk.variant.vcf.VCFConstants; import org.apache.commons.collections4.ListUtils; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; import org.apache.commons.math3.linear.Array2DRowRealMatrix; import org.apache.commons.math3.linear.DefaultRealMatrixChangingVisitor; import org.apache.commons.math3.linear.RealMatrix; @@ -95,7 +93,7 @@ public CalledHaplotypes callMutations( } // converting ReadLikelihoods to ReadLikelihoods - final Map> alleleMapper = AssemblyBasedCallerUtils.createAlleleMapper(mergedVC, loc, haplotypes, null); + final Map> alleleMapper = AssemblyBasedCallerUtils.createAlleleMapper(mergedVC, loc, haplotypes); final ReadLikelihoods logLikelihoods = logReadLikelihoods.marginalize(alleleMapper, new SimpleInterval(mergedVC).expandWithinContig(HaplotypeCallerGenotypingEngine.ALLELE_EXTENSION, header.getSequenceDictionary())); @@ -120,7 +118,7 @@ public CalledHaplotypes callMutations( final PerAlleleCollection normalArtifactLogOdds = somaticLogOdds(logNormalMatrix); - final Set forcedAlleles = getAllelesConsistentWithGivenAlleles(givenAlleles, loc, mergedVC); + final Set forcedAlleles = AssemblyBasedCallerUtils.getAllelesConsistentWithGivenAlleles(givenAlleles, mergedVC); final List tumorAltAlleles = mergedVC.getAlternateAlleles().stream() .filter(allele -> forcedAlleles.contains(allele) || tumorLogOdds.getAlt(allele) > MTAC.getEmissionLogOdds()) .collect(Collectors.toList()); @@ -182,33 +180,6 @@ public CalledHaplotypes callMutations( return new CalledHaplotypes(outputCallsWithEventCountAnnotation, calledHaplotypes); } - private Set getAllelesConsistentWithGivenAlleles(List givenAlleles, int loc, VariantContext mergedVC) { - final List> givenAltAndRefAllelesInOriginalContext = AssemblyBasedCallerUtils.getVariantContextsFromGivenAlleles(loc, givenAlleles, false).stream() - .flatMap(vc -> vc.getAlternateAlleles().stream().map(allele -> ImmutablePair.of(allele, vc.getReference()))).collect(Collectors.toList()); - - return mergedVC.getAlternateAlleles().stream() - .map(allele -> ImmutablePair.of(allele, mergedVC.getReference())) - .filter(altAndRef -> givenAltAndRefAllelesInOriginalContext.stream().anyMatch(givenAltAndRef -> allelesAreConsistent(givenAltAndRef, altAndRef))) - .map(altAndRefPair -> altAndRefPair.getLeft()) - .collect(Collectors.toSet()); - } - - // check whether two alleles coming from different variant contexts and with possibly different reference alleles - // could in fact be the same. The condition is that one is a prefix of the other - private boolean allelesAreConsistent(final Pair altAndRef1, final Pair altAndRef2) { - final Allele alt1 = altAndRef1.getLeft(); - final Allele alt2 = altAndRef2.getLeft(); - if (alt1.isSymbolic() || alt2.isSymbolic()) { - return false; - } else { - final int sizeDiff1 = alt1.length() - altAndRef1.getRight().length(); - final int sizeDiff2 = alt2.length() - altAndRef2.getRight().length(); - return (sizeDiff1 == sizeDiff2) && (alt1.length() < alt2.length() ? - alt1.basesMatch(Arrays.copyOf(alt2.getBases(), alt1.length())) : - alt2.basesMatch(Arrays.copyOf(alt1.getBases(), alt2.length()))); - } - } - // compute the likelihoods that each allele is contained at some allele fraction in the sample protected PerAlleleCollection somaticLogOdds(final LikelihoodMatrix logMatrix) { int alleleListEnd = logMatrix.alleles().size()-1; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java index 03e170fecef..70350aab4c5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/ReblockGVCF.java @@ -242,7 +242,7 @@ private VariantContext regenotypeVC(final VariantContext originalVC) { final GenotypeLikelihoodsCalculationModel model = result.getType() == VariantContext.Type.INDEL ? GenotypeLikelihoodsCalculationModel.INDEL : GenotypeLikelihoodsCalculationModel.SNP; - result = genotypingEngine.calculateGenotypes(originalVC, model, null); + result = genotypingEngine.calculateGenotypes(originalVC, model); } if (result == null) { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 076209c9554..9eeece5ef42 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -15,11 +15,13 @@ import java.nio.file.Path; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.ArrayUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.MutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.tools.walkers.genotyper.*; +import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils; import org.broadinstitute.hellbender.utils.BaseUtils; import org.broadinstitute.hellbender.utils.MathUtils; import org.broadinstitute.hellbender.utils.Utils; @@ -1820,5 +1822,6 @@ public static boolean isUnmixedMnpIgnoringNonRef(final VariantContext vc) { return true; } + } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/ReducibleAnnotationBaseTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/ReducibleAnnotationBaseTest.java index 99744322c44..bd125505c2c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/ReducibleAnnotationBaseTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/allelespecific/ReducibleAnnotationBaseTest.java @@ -110,7 +110,7 @@ public void testFinalizeAnnotationGATK3Concordance(List VCs, Var GenotypeLikelihoodsCalculationModel model = result.getType() == VariantContext.Type.INDEL ? GenotypeLikelihoodsCalculationModel.INDEL : GenotypeLikelihoodsCalculationModel.SNP; - VariantContext withGenotypes = genotypingEngine.calculateGenotypes(result, model, null); + VariantContext withGenotypes = genotypingEngine.calculateGenotypes(result, model); withGenotypes = new VariantContextBuilder(withGenotypes).attributes(result.getAttributes()).make(); VariantContext finalized = annotatorEngine.finalizeAnnotations(withGenotypes, result); finalized = GATKVariantContextUtils.reverseTrimAlleles(finalized); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngineUnitTest.java index 4fbd70a59db..aa8f873c3d1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngineUnitTest.java @@ -85,7 +85,7 @@ public void testCalculateGenotypes() { new GenotypeBuilder("sample1").alleles(gtAlleles).PL(new double[]{0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}).make(), // homVar for first alt -- note that these are doubles, so they get renormalized new GenotypeBuilder("sample2").alleles(gtAlleles).PL(new double[]{0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0}).make()); // homVar for second alt final VariantContext vc = new VariantContextBuilder("test", "1",1, refAllele.length(), allelesDel).genotypes(genotypes).make(); - final VariantContext vcOut = genotypingEngine.calculateGenotypes(vc, GenotypeLikelihoodsCalculationModel.INDEL, null); + final VariantContext vcOut = genotypingEngine.calculateGenotypes(vc, GenotypeLikelihoodsCalculationModel.INDEL); Assert.assertFalse(vcOut.getAlleles().contains(altT)); // Make sure the spanning deletion is removed since the deletion was removed @@ -97,7 +97,7 @@ public void testCalculateGenotypes() { new GenotypeBuilder("sample2").alleles(gtAlleles).PL(new double[]{0, 0, 0, 0, 0, 0}).make()); final VariantContext vcSpanDel = new VariantContextBuilder("test1", "1",2, 2 + refAlleleSpanDel.length() - 1, vcAllelesSpanDel). genotypes(genotypesSpanDel).make(); - final VariantContext vcOut1 = genotypingEngine.calculateGenotypes(vcSpanDel, GenotypeLikelihoodsCalculationModel.INDEL, null); + final VariantContext vcOut1 = genotypingEngine.calculateGenotypes(vcSpanDel, GenotypeLikelihoodsCalculationModel.INDEL); //the site is monomorphic, which becomes null Assert.assertTrue(vcOut1 == null); @@ -125,7 +125,7 @@ public void testCalculateGenotypes() { new GenotypeBuilder("s2157").alleles(gtAlleles).PL(new int[]{0,21,315,21,315,315}).make()); final VariantContext vcMultiWithSpanDel = new VariantContextBuilder("test2", "1",2, 2 + refAlleleSpanDel.length() - 1, mutliAllelicWithSpanDel). genotypes(regressionGenotypes).make(); - final VariantContext vcOut2 = genotypingEngine.calculateGenotypes(vcMultiWithSpanDel, GenotypeLikelihoodsCalculationModel.SNP, null); + final VariantContext vcOut2 = genotypingEngine.calculateGenotypes(vcMultiWithSpanDel, GenotypeLikelihoodsCalculationModel.SNP); //low quality T should get dropped, leaving only star, which shouldn't be output Assert.assertTrue(vcOut2 == null); } @@ -134,7 +134,7 @@ public void testCalculateGenotypes() { public void testNoIndexOutOfBoundsExceptionWhenSubsettingToNoAlleles(){ final VariantContext vc = new VariantContextBuilder(null, "1", 100, 100, Arrays.asList(refA, altT)) .genotypes(GenotypeBuilder.create(SAMPLES.getSample(0), Arrays.asList(refA, refA))).make(); - getGenotypingEngine().calculateGenotypes(vc, GenotypeLikelihoodsCalculationModel.SNP, null); + getGenotypingEngine().calculateGenotypes(vc, GenotypeLikelihoodsCalculationModel.SNP); } @Test @@ -142,7 +142,7 @@ public void testGenotypesWithNonRefSymbolicAllelesAreNotNulled(){ final VariantContext vc = new VariantContextBuilder(null, "1", 100, 100, Arrays.asList(refA, Allele.NON_REF_ALLELE)) .genotypes(GenotypeBuilder.create(SAMPLES.getSample(0), Arrays.asList(refA, Allele.NON_REF_ALLELE))).make(); - Assert.assertNotNull(getGenotypingEngine().calculateGenotypes(vc, GenotypeLikelihoodsCalculationModel.SNP, null)); + Assert.assertNotNull(getGenotypingEngine().calculateGenotypes(vc, GenotypeLikelihoodsCalculationModel.SNP)); } @DataProvider diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtilsUnitTest.java deleted file mode 100644 index dcb31c9ffe1..00000000000 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtilsUnitTest.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.broadinstitute.hellbender.tools.walkers.genotyper; - -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.variantcontext.VariantContextBuilder; -import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -public class GenotypingGivenAllelesUtilsUnitTest { - - @Test - public void testComposeGivenAllelesVariantContextFromVariantList() { - - final SimpleInterval loc = new SimpleInterval("20", 68, 68); - - final List sameLocDelAlleles1 = Arrays.asList(Allele.create("GT", true), Allele.create("G")); - final VariantContext sameLocDelVc1 = new VariantContextBuilder("a", "20", 68, 69, sameLocDelAlleles1).make(); - - final List sameLocSnpAlleles1 = Arrays.asList(Allele.create("G", true), Allele.create("C")); - final VariantContext sameLocSnpVc1 = new VariantContextBuilder("a", "20", 68, 68, sameLocSnpAlleles1).make(); - - final List spanningDelAlleles1 = Arrays.asList(Allele.create("ATGTA", true), Allele.create("A")); - final VariantContext spanningDelVc1 = new VariantContextBuilder("a", "20", 66, 70, spanningDelAlleles1).make(); - - final List expectedAlleles = Arrays.asList(Allele.create("GT", true), Allele.create("G"), Allele.create("CT")); - final VariantContext expectedVC = new VariantContextBuilder("a", "20", 68, 69, expectedAlleles).make(); - - final VariantContext variantContext = - GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(Arrays.asList(spanningDelVc1, sameLocDelVc1, sameLocSnpVc1), - loc, - true); - - VariantContextTestUtils.assertVariantContextsAreEqual(variantContext, expectedVC, Collections.emptyList()); - } -} \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java index d4067a9442d..b62004179b6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java @@ -590,7 +590,6 @@ public Object[][] getEventMapperData() { snpVc, snpVc.getStart(), Arrays.asList(snpHaplotype, refHaplotype), - emptyGivenAllelesList, Maps.asMap(new HashSet<>(snpAlleles), (key) -> { if (snpAlleles.get(1).equals(key)) return Arrays.asList(snpHaplotype); @@ -601,7 +600,6 @@ public Object[][] getEventMapperData() { mergedSnpAndDelVC, mergedSnpAndDelVC.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionHaplotype), - emptyGivenAllelesList, Maps.asMap(new HashSet<>(mergedSnpAndDelVC.getAlleles()), (key) -> { if (snpAlleles.get(1).equals(key)) return Arrays.asList(snpHaplotype); @@ -614,7 +612,6 @@ public Object[][] getEventMapperData() { snpVc, snpVc.getStart(), Arrays.asList(snpHaplotype, refHaplotype, snpHaplotypeNotPresentInEventsAtThisLoc), - Arrays.asList(snpVc), Maps.asMap(new HashSet<>(snpVc.getAlleles()), (key) -> { if (snpAlleles.get(1).equals(key)) return Arrays.asList(snpHaplotype); @@ -627,7 +624,6 @@ public Object[][] getEventMapperData() { mergedSnpAndDelVC, snpVc.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionHaplotype, deletionHaplotype2), - emptyGivenAllelesList, Maps.asMap(new HashSet<>(mergedSnpAndDelVC.getAlleles()), (key) -> { if (snpAlleles.get(1).equals(key)) return Arrays.asList(snpHaplotype); @@ -636,16 +632,15 @@ public Object[][] getEventMapperData() { }) }); - // two spanning deletions, one in given alleles -> only the matching deletion should be in the event map for the span del + // two spanning deletions, one in given alleles tests.add(new Object[]{ mergedSnpAndDelVC, snpVc.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionHaplotype, deletionHaplotype2), - Arrays.asList(snpVc, deletionVc2), Maps.asMap(new HashSet<>(mergedSnpAndDelVC.getAlleles()), (key) -> { if (snpAlleles.get(1).equals(key)) return Arrays.asList(snpHaplotype); - if (Allele.SPAN_DEL.equals(key)) return Arrays.asList(deletionHaplotype2); + if (Allele.SPAN_DEL.equals(key)) return Arrays.asList(deletionHaplotype, deletionHaplotype2); return Arrays.asList(refHaplotype); }) }); @@ -655,7 +650,6 @@ public Object[][] getEventMapperData() { deletionStartingAtLocVc, deletionStartingAtLocVc.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionStartingAtLocHaplotype), - Arrays.asList(deletionStartingAtLocVc), Maps.asMap(new HashSet<>(deletionStartingAtLocVc.getAlleles()), (key) -> { if (deletionStartingAtLocAlleles.get(1).equals(key)) return Arrays.asList(deletionStartingAtLocHaplotype); @@ -668,7 +662,6 @@ public Object[][] getEventMapperData() { snpVc, snpVc.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionStartingAtLocHaplotype), - Arrays.asList(snpVc), Maps.asMap(new HashSet<>(snpVc.getAlleles()), (key) -> { if (snpAlleles.get(1).equals(key)) return Arrays.asList(snpHaplotype); @@ -681,7 +674,6 @@ public Object[][] getEventMapperData() { mergedSnpAndDelStartingAtLocVC, snpVc.getStart(), Arrays.asList(snpHaplotype, refHaplotype, deletionStartingAtLocHaplotype), - Arrays.asList(deletionStartingAtLocVc, snpVc), Maps.asMap(new HashSet<>(mergedSnpAndDelStartingAtLocVC.getAlleles()), (key) -> { if (deletionStartingAtLocAlleles.get(1).equals(key)) return Arrays.asList(deletionStartingAtLocHaplotype); @@ -698,9 +690,8 @@ public Object[][] getEventMapperData() { public void testGetEventMapper(final VariantContext mergedVc, final int loc, final List haplotypes, - final List activeAllelesToGenotype, final Map> expectedEventMap) { - final Map> actualEventMap = AssemblyBasedCallerUtils.createAlleleMapper(mergedVc, loc, haplotypes, activeAllelesToGenotype); + final Map> actualEventMap = AssemblyBasedCallerUtils.createAlleleMapper(mergedVc, loc, haplotypes); Assert.assertEquals(actualEventMap.size(), expectedEventMap.size()); for (final Allele key : actualEventMap.keySet()) { Assert.assertTrue(expectedEventMap.containsKey(key), "Got unexpected allele " + key + " with values " + actualEventMap.get(key)); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 08fd330e0fd..59adcea434b 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -410,55 +410,32 @@ public void testFloorGVCFBlocks(final String inputFileName, final String referen } @Test - public void testGenotypeGivenAllelesMode() throws IOException { + public void testForceCalling() throws IOException { Utils.resetRandomGenerator(); final File output = createTempFile("testGenotypeGivenAllelesMode", ".vcf"); - final File expected = new File(TEST_FILES_DIR, "expected.testGenotypeGivenAllelesMode.gatk4.vcf"); - - final String outputPath = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expected.getAbsolutePath() : output.getAbsolutePath(); + final File forceCallingVcf = new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles.vcf"); final String[] args = { "-I", NA12878_20_21_WGS_bam, "-R", b37_reference_20_21, "-L", "20:10000000-10010000", - "-O", outputPath, + "-O", output.getAbsolutePath(), "-pairHMM", "AVX_LOGLESS_CACHING", "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false", - "--genotyping-mode", "GENOTYPE_GIVEN_ALLELES", - "--alleles", new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles.vcf").getAbsolutePath() + "--alleles", forceCallingVcf.getAbsolutePath() }; runCommandLine(args); - // Test for an exact match against past results - if ( ! UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) { - IntegrationTestSpec.assertEqualTextFiles(output, expected); + final Map> altAllelesByPosition = VariantContextTestUtils.streamVcf(output) + .collect(Collectors.toMap(vc -> vc.getStart(), vc -> vc.getAlternateAlleles())); + for (final VariantContext vc : new FeatureDataSource(forceCallingVcf)) { + final List altAllelesAtThisLocus = altAllelesByPosition.get(vc.getStart()); + vc.getAlternateAlleles().forEach(a -> Assert.assertTrue(altAllelesAtThisLocus.contains(a))); } } - @Test(expectedExceptions = CommandLineException.BadArgumentValue.class) - public void testGenotypeGivenAllelesModeNotAllowedInGVCFMode() throws IOException { - Utils.resetRandomGenerator(); - - final File output = createTempFile("testGenotypeGivenAllelesModeNotAllowedInGVCFMode", ".g.vcf"); - - final String[] args = { - "-I", NA12878_20_21_WGS_bam, - "-R", b37_reference_20_21, - "-L", "20:10000000-10010000", - "-O", output.getAbsolutePath(), - "-pairHMM", "AVX_LOGLESS_CACHING", - "--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false", - "--genotyping-mode", "GENOTYPE_GIVEN_ALLELES", - "--alleles", new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles.vcf").getAbsolutePath(), - "-ERC", "GVCF" - }; - - // Should throw, since -ERC GVCF is incompatible with GENOTYPE_GIVEN_ALLELES mode - runCommandLine(args); - } - @Test public void testBamoutProducesReasonablySizedOutput() { final Path bamOutput = createTempFile("testBamoutProducesReasonablySizedOutput", ".bam").toPath(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java index 372035d357c..41f9c6d24fb 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2IntegrationTest.java @@ -445,7 +445,7 @@ private static void checkMnpOutput(int maxMnpDistance, File outputVcf) { } @Test - public void testGivenAllelesMode() throws Exception { + public void testForceCalling() throws Exception { Utils.resetRandomGenerator(); final File unfilteredVcf = createTempFile("unfiltered", ".vcf"); diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGenotypeGivenAllelesMode.gatk4.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGenotypeGivenAllelesMode.gatk4.vcf deleted file mode 100644 index 845770843a8..00000000000 --- a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGenotypeGivenAllelesMode.gatk4.vcf +++ /dev/null @@ -1,35 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##contig= -##contig= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 -20 10000694 . G A 1029.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=3.599;DP=82;ExcessHet=3.0103;FS=1.822;MLEAC=1;MLEAF=0.500;MQ=48.66;MQRankSum=-5.114;QD=12.56;ReadPosRankSum=0.657;SOR=0.566 GT:AD:DP:GQ:PL 0/1:45,37:82:99:1037,0,1601 -20 10001436 . A AAGGCT 2326.06 . AC=2;AF=1.00;AN=2;DP=55;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=41.07;QD=25.36;SOR=3.442 GT:AD:DP:GQ:PL 1/1:0,50:50:99:2340,156,0 -20 10001661 . T C 3621.03 . AC=2;AF=1.00;AN=2;DP=81;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.33;QD=28.73;SOR=1.193 GT:AD:DP:GQ:PL 1/1:0,81:81:99:3635,249,0 -20 10004094 . A T 0 LowQual AC=0;AF=0.00;AN=2;DP=53;ExcessHet=3.0103;FS=0.000;MLEAC=0;MLEAF=0.00;MQ=52.75;SOR=0.527 GT:AD:DP:GQ:PL 0/0:20,0:20:45:0,45,237 -20 10004769 . TAAAACTATGC T 1010.60 . AC=1;AF=0.500;AN=2;BaseQRankSum=1.603;DP=78;ExcessHet=3.0103;FS=3.758;MLEAC=1;MLEAF=0.500;MQ=52.60;MQRankSum=-6.625;QD=15.79;ReadPosRankSum=1.879;SOR=1.306 GT:AD:DP:GQ:PL 0/1:37,27:64:99:1018,0,1471 -20 10004771 . A *,T 0 LowQual AC=1,0;AF=0.500,0.00;AN=2;BaseQRankSum=-1.270;DP=72;ExcessHet=3.0103;FS=2.397;MLEAC=1,0;MLEAF=0.500,0.00;MQ=51.93;MQRankSum=-6.325;QD=-0.00;ReadPosRankSum=2.185;SOR=1.115 GT:AD:DP:GQ:PL 0/1:32,27,0:59:99:1027,0,1345,1167,1315,2965 -20 10006819 . AAAAC T 0 LowQual AC=0;AF=0.00;AN=2;DP=75;ExcessHet=3.0103;FS=0.000;MLEAC=0;MLEAF=0.00;MQ=61.14;SOR=0.572 GT:AD:DP:GQ:PL 0/0:75,0:75:99:0,244,2147483647 -20 10006823 . C *,G 0 LowQual AC=0,0;AF=0.00,0.00;AN=2;DP=73;ExcessHet=3.0103;FS=0.000;MLEAC=0,0;MLEAF=0.00,0.00;MQ=61.18;SOR=0.069 GT:AD:DP:GQ:PL 0/0:17,0,0:17:51:0,229,2147483647,51,819,590 -20 10008738 . GGTTTGTTT GGTTTGTTTGTTT,GGTTTGTTTGTTTGTTT,G 0 LowQual AC=0,0,0;AF=0.00,0.00,0.00;AN=2;DP=50;ExcessHet=3.0103;FS=0.000;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;MQ=37.07;SOR=0.488 GT:AD:DP:GQ:PL 0/0:50,0,0,0:50:99:0,154,2147483647,154,2147483647,2147483647,154,2147483647,2147483647,2147483647 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGenotypeGivenAllelesMode.gatk4.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testGenotypeGivenAllelesMode.gatk4.vcf.idx deleted file mode 100644 index 8240045d087ffc2842144cdba4bf5ae040af20b0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 428 zcmb`D+e*Vg5Qawzih3cwNH*E?_AVqb7TO4PTf7UJY@p5BXcL0IjXt9<;4V^8ul?X> zhG7mrqi5;e5dch_nvFG6gwS}uM@?sIv+oh($jWLfxls5)2=el52@$jf{S>|$$;HT|Ni4PinlfkVL*Rh&o~@?fkQ__%Zj9kM-{Y`p=`u~Rxr z^c07{Nr)M9$yXr)DkH3F zQ9-~@mxqn(csYxUBm*o0@e_%^_a!&0LW2VD*|~$a3pjJ>xs&JA>D?FpF&=*P3F{D6 AMF0Q* diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles.vcf index 65d8b217bf9..d0e089ac547 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles.vcf @@ -32,7 +32,6 @@ 20 10004094 . A T . . . GT 0|1 20 10004769 . TAAAACTATGC T . . . GT 0|1 20 10004771 . A T . . . GT 0|1 -20 10006819 . AAAAC T . . . GT 0|1 +20 10006819 . AAAAC A . . . GT 0|1 20 10006823 . C G . . . GT 0|1 -20 10008738 . G GGTTT,GGTTTGTTT . PASS . GT 1|2 -20 10008738 . GGTTTGTTT G . PASS . GT 0|1 +20 10008738 . C CGTTT,CGTTTGTTT . PASS . GT 1|2