From d91e3b924da61dbe5099aa55f1e9c2bc50fb2c93 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Sat, 1 Jun 2024 12:35:54 +0300 Subject: [PATCH 1/5] Changed allele filtering in Mutect2 to work only on the tumor samples --- .../walkers/haplotypecaller/AlleleFilteringMutect.java | 7 ++++++- .../hellbender/tools/walkers/mutect/Mutect2Engine.java | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java index 28a497af064..28a298dc492 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java @@ -11,6 +11,7 @@ import java.io.OutputStreamWriter; import java.util.List; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -25,11 +26,14 @@ public class AlleleFilteringMutect extends AlleleFiltering { private SomaticGenotypingEngine genotypingEngine; + private Set normalSamples; public AlleleFilteringMutect(M2ArgumentCollection _m2args, OutputStreamWriter assemblyDebugStream, - SomaticGenotypingEngine _genotypingEngine){ + SomaticGenotypingEngine _genotypingEngine, + Set _normalSamples){ super(_m2args, assemblyDebugStream); genotypingEngine = _genotypingEngine; + normalSamples = _normalSamples; } /** @@ -48,6 +52,7 @@ public AlleleFilteringMutect(M2ArgumentCollection _m2args, int getAlleleLikelihoodVsInverse(final AlleleLikelihoods alleleLikelihoods, Allele allele) { final List> allMatrices = IntStream.range(0, alleleLikelihoods.numberOfSamples()) + .filter(i -> !normalSamples.contains(alleleLikelihoods.getSample(i))) .mapToObj(alleleLikelihoods::sampleMatrix) .collect(Collectors.toList()); final AlleleList alleleList = allMatrices.get(0); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index c831a4169e0..f69533241d9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -329,7 +329,7 @@ public List callRegion(final AssemblyRegion originalAssemblyRegi Set suspiciousLocations = new HashSet<>(); if (MTAC.filterAlleles) { logger.debug("Filtering alleles"); - AlleleFilteringMutect alleleFilter = new AlleleFilteringMutect(MTAC, null, genotypingEngine); + AlleleFilteringMutect alleleFilter = new AlleleFilteringMutect(MTAC, null, genotypingEngine, normalSamples); EventMap.buildEventMapsForHaplotypes(uncollapsedReadLikelihoods.alleles(), assemblyResult.getFullReferenceWithPadding(), assemblyResult.getPaddedReferenceLoc(), From e4729822b559980864c951ee6fd11d2ee5027f48 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Sat, 1 Jun 2024 14:48:00 +0300 Subject: [PATCH 2/5] Now filter by both tumor and normal but separately --- .../AlleleFilteringMutect.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java index 28a298dc492..176c235600f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java @@ -42,6 +42,7 @@ public AlleleFilteringMutect(M2ArgumentCollection _m2args, * This is very similar to what is done in the callMutations function in MutectEngine, but here the haplotypes that do * not support the allele are all haplotypes that do not contain the allele rather than only the haplotypes that support reference * etc. + * The log odds is calculated separately for the tumor and the normal samples and the smalles (the strongest) is returned * * @param alleleLikelihoods * @param allele @@ -57,9 +58,23 @@ int getAlleleLikelihoodVsInverse(final AlleleLikelihoods allel .collect(Collectors.toList()); final AlleleList alleleList = allMatrices.get(0); final LikelihoodMatrix logAllMatrix = SomaticGenotypingEngine.combinedLikelihoodMatrix(allMatrices, alleleList); - double alleleLogOdds = somaticAltLogOdds(logAllMatrix); - logger.debug(() -> String.format("GAL:: %s: %f", allele.toString(), alleleLogOdds)); + final double alleleLogOddsTumor = somaticAltLogOdds(logAllMatrix); + logger.debug(() -> String.format("GAL:: Tumor %s: %f", allele.toString(), alleleLogOddsTumor)); + double alleleLogOdds = alleleLogOddsTumor; + if (!normalSamples.isEmpty()) { + final List> allMatricesNormal = IntStream.range(0, alleleLikelihoods.numberOfSamples()) + .filter(i -> normalSamples.contains(alleleLikelihoods.getSample(i))) + .mapToObj(alleleLikelihoods::sampleMatrix) + .collect(Collectors.toList()); + final LikelihoodMatrix logAllMatrixNormal = SomaticGenotypingEngine.combinedLikelihoodMatrix(allMatricesNormal, alleleList); + double alleleLogOddsNormal = somaticAltLogOdds(logAllMatrixNormal); + logger.debug(() -> String.format("GAL:: Normal %s: %f", allele.toString(), alleleLogOddsNormal)); + if (alleleLogOddsNormal < alleleLogOddsTumor) { + alleleLogOdds = alleleLogOddsNormal; + } + } return (int)(10*alleleLogOdds); + } From 17cae0e1fe426f35584e6a78cf77de1f3bcd354b Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Mon, 3 Jun 2024 19:26:39 +0300 Subject: [PATCH 3/5] Selecting only the informative reads in filtering --- .../haplotypecaller/AlleleFiltering.java | 17 +++-- .../haplotypecaller/AlleleFilteringHC.java | 6 +- .../AlleleFilteringMutect.java | 38 +++++++++-- .../HaplotypeCallerEngine.java | 6 +- .../RampedHaplotypeCallerEngine.java | 2 +- .../tools/walkers/mutect/Mutect2Engine.java | 2 +- .../AlleleFilteringUnitTest.java | 60 +++++++++++------- .../HaplotypeCallerIntegrationTest.java | 2 +- ...ingFlowModeAdvanced.expected.flowbased.vcf | 28 ++++---- ...lowModeAdvanced.expected.flowbased.vcf.idx | Bin 438905 -> 438913 bytes ...ityAnnotationRevamp.expected.flowbased.vcf | 23 +++---- ...nnotationRevamp.expected.flowbased.vcf.idx | Bin 438910 -> 438917 bytes 12 files changed, 117 insertions(+), 67 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java index 3a350228128..8b775a5d5dc 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java @@ -1,5 +1,7 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.CollectionUtil; import htsjdk.variant.variantcontext.Allele; import org.apache.commons.lang3.tuple.ImmutablePair; @@ -11,6 +13,7 @@ import org.broadinstitute.hellbender.tools.walkers.annotator.StrandOddsRatio; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.InverseAllele; import org.broadinstitute.hellbender.utils.BaseUtils; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; import org.broadinstitute.hellbender.utils.haplotype.Event; import org.broadinstitute.hellbender.utils.haplotype.Haplotype; @@ -44,9 +47,13 @@ public abstract class AlleleFiltering { final protected static Logger logger = LogManager.getLogger(AlleleFiltering.class); final protected AssemblyBasedCallerArgumentCollection assemblyArgs; final private OutputStreamWriter assemblyDebugOutStream; - AlleleFiltering(final AssemblyBasedCallerArgumentCollection assemblyArgs, final OutputStreamWriter assemblyDebugOutStream){ + final private SAMSequenceDictionary sequenceDictionary; + AlleleFiltering(final AssemblyBasedCallerArgumentCollection assemblyArgs, + final OutputStreamWriter assemblyDebugOutStream, + final SAMFileHeader header){ this.assemblyArgs = assemblyArgs; this.assemblyDebugOutStream = assemblyDebugOutStream; + this.sequenceDictionary = header.getSequenceDictionary(); } /** @@ -177,8 +184,6 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A } logger.debug("AHM::printout end"); - - final List> alleleLikelihoods = activeAlleles.stream().map(al -> getAlleleLikelihoodMatrix(readLikelihoods, al, haplotypeAlleleMap, activeHaplotypes)).collect(Collectors.toList()); @@ -334,8 +339,10 @@ private AlleleLikelihoods getAlleleLikelihoodMatrix(final Alle final Map> haplotypeAlleleMap, final Set enabledHaplotypes ){ + final Map> alleleHaplotypeMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); + final Allele notAllele= InverseAllele.of(allele.altAllele(), true); readLikelihoods.alleles().stream().filter(enabledHaplotypes::contains) .filter(h->haplotypeAlleleMap.get(h).contains(allele)) @@ -345,6 +352,9 @@ private AlleleLikelihoods getAlleleLikelihoodMatrix(final Alle .forEach(alleleHaplotypeMap.get(notAllele)::add); final AlleleLikelihoods alleleLikelihoods = readLikelihoods.marginalize(alleleHaplotypeMap); + final SimpleInterval variantCallingRelevantFragmentOverlap = new SimpleInterval(allele).expandWithinContig(assemblyArgs.informativeReadOverlapMargin, sequenceDictionary); + alleleLikelihoods.retainEvidence(variantCallingRelevantFragmentOverlap::overlaps); + logger.debug(() -> String.format("GALM: %s %d %d", allele, alleleHaplotypeMap.get(allele.altAllele()).size(), alleleHaplotypeMap.get(notAllele).size())); return alleleLikelihoods; } @@ -358,7 +368,6 @@ private double getAlleleSOR(final AlleleLikelihoods alleleLike final double sor = StrandOddsRatio.calculateSOR(contingency_table); logger.debug(() -> String.format("GAS:: %s: %f (%d %d %d %d)", allele.toString(), sor, contingency_table[0][0], contingency_table[0][1], contingency_table[1][0], contingency_table[1][1])); return sor; - } //filters pairs of alleles by distance diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringHC.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringHC.java index 010a9e21398..7a128de4ceb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringHC.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringHC.java @@ -1,5 +1,6 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; +import htsjdk.samtools.SAMFileHeader; import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.hellbender.tools.walkers.genotyper.*; import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculationResult; @@ -29,8 +30,9 @@ public class AlleleFilteringHC extends AlleleFiltering { private HaplotypeCallerGenotypingEngine genotypingEngine; private AlleleFrequencyCalculator afCalc; - public AlleleFilteringHC(HaplotypeCallerArgumentCollection _hcargs, OutputStreamWriter assemblyDebugStream, HaplotypeCallerGenotypingEngine _genotypingEngine){ - super(_hcargs, assemblyDebugStream); + public AlleleFilteringHC(HaplotypeCallerArgumentCollection _hcargs, OutputStreamWriter assemblyDebugStream, + HaplotypeCallerGenotypingEngine _genotypingEngine, final SAMFileHeader header){ + super(_hcargs, assemblyDebugStream, header); genotypingEngine = _genotypingEngine; GenotypeCalculationArgumentCollection config = genotypingEngine.getConfiguration().genotypeArgs; afCalc = AlleleFrequencyCalculator.makeCalculator(config); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java index 176c235600f..162ef987ee0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java @@ -1,17 +1,21 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.util.CollectionUtil; import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.graphs.InverseAllele; import org.broadinstitute.hellbender.tools.walkers.mutect.*; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; import org.broadinstitute.hellbender.utils.genotyper.AlleleList; import org.broadinstitute.hellbender.utils.genotyper.LikelihoodMatrix; +import org.broadinstitute.hellbender.utils.haplotype.Event; +import org.broadinstitute.hellbender.utils.haplotype.Haplotype; import org.broadinstitute.hellbender.utils.read.GATKRead; import java.io.OutputStreamWriter; -import java.util.List; -import java.util.Set; +import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -27,15 +31,40 @@ public class AlleleFilteringMutect extends AlleleFiltering { private SomaticGenotypingEngine genotypingEngine; private Set normalSamples; + public AlleleFilteringMutect(M2ArgumentCollection _m2args, OutputStreamWriter assemblyDebugStream, SomaticGenotypingEngine _genotypingEngine, - Set _normalSamples){ - super(_m2args, assemblyDebugStream); + Set _normalSamples, + final SAMFileHeader header){ + super(_m2args, assemblyDebugStream, header); genotypingEngine = _genotypingEngine; normalSamples = _normalSamples; } + private AlleleLikelihoods getAlleleLikelihoodMatrix(final AlleleLikelihoods readLikelihoods, + final Event allele, + final Map> haplotypeAlleleMap, + final Set enabledHaplotypes + ){ + + final Map> alleleHaplotypeMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); + + + final Allele notAllele= InverseAllele.of(allele.altAllele(), true); + readLikelihoods.alleles().stream().filter(enabledHaplotypes::contains) + .filter(h->haplotypeAlleleMap.get(h).contains(allele)) + .forEach(alleleHaplotypeMap.get(allele.altAllele())::add); + readLikelihoods.alleles().stream().filter(enabledHaplotypes::contains) + .filter(h -> !haplotypeAlleleMap.get(h).contains(allele)) + .forEach(alleleHaplotypeMap.get(notAllele)::add); + + final AlleleLikelihoods alleleLikelihoods = readLikelihoods.marginalize(alleleHaplotypeMap); + + logger.debug(() -> String.format("GALM: %s %d %d", allele, alleleHaplotypeMap.get(allele.altAllele()).size(), alleleHaplotypeMap.get(notAllele).size())); + return alleleLikelihoods; + } + /** * Calculate calculate genotype likelihood of requirement of an allele. Specifically, calculates the likelihood * of the data given that allele versus the likelihood of the data when all haplotypes containing the allele are removed @@ -89,6 +118,7 @@ int getAlleleLikelihoodVsInverse(final AlleleLikelihoods allel private double somaticAltLogOdds(final LikelihoodMatrix matrix) { final LikelihoodMatrix initialMatrix = matrix; + if (matrix.getAllele(1-matrix.indexOfReference()) instanceof InverseAllele){ throw new GATKException.ShouldNeverReachHereException("Inverse allele removed in filtering"); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index 36918f1fb91..96658503983 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -926,14 +926,16 @@ public List callRegion(final AssemblyRegion region, final Featur if (hcArgs.filterAlleles) { logger.debug("Filtering alleles"); - AlleleFilteringHC alleleFilter = new AlleleFilteringHC(hcArgs, assemblyDebugOutStream, localGenotypingEngine); + AlleleFilteringHC alleleFilter = new AlleleFilteringHC(hcArgs, assemblyDebugOutStream, localGenotypingEngine, readsHeader); //need to update haplotypes to find the alleles EventMap.buildEventMapsForHaplotypes(readLikelihoods.alleles(), assemblyResult.getFullReferenceWithPadding(), assemblyResult.getPaddedReferenceLoc(), hcArgs.assemblerArgs.debugAssembly, hcArgs.maxMnpDistance); - subsettedReadLikelihoodsFinal = alleleFilter.filterAlleles(readLikelihoods, assemblyResult.getPaddedReferenceLoc().getStart(), suspiciousLocations); + subsettedReadLikelihoodsFinal = alleleFilter.filterAlleles(readLikelihoods, + assemblyResult.getPaddedReferenceLoc().getStart(), + suspiciousLocations); } else { subsettedReadLikelihoodsFinal = readLikelihoods; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/RampedHaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/RampedHaplotypeCallerEngine.java index e47245c9f91..b1ef3fda3c5 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/RampedHaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/RampedHaplotypeCallerEngine.java @@ -480,7 +480,7 @@ private void filter(final CallRegionContext context) { context.suspiciousLocations = new HashSet<>(); if (hcArgs.filterAlleles) { logger.debug("Filtering alleles"); - AlleleFilteringHC alleleFilter = new AlleleFilteringHC(hcArgs, assemblyDebugOutStream, localGenotypingEngine); + AlleleFilteringHC alleleFilter = new AlleleFilteringHC(hcArgs, assemblyDebugOutStream, localGenotypingEngine, readsHeader); //need to update haplotypes to find the alleles EventMap.buildEventMapsForHaplotypes(context.readLikelihoods.alleles(), context.assemblyResult.getFullReferenceWithPadding(), diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index f69533241d9..5f6e6f1f557 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -329,7 +329,7 @@ public List callRegion(final AssemblyRegion originalAssemblyRegi Set suspiciousLocations = new HashSet<>(); if (MTAC.filterAlleles) { logger.debug("Filtering alleles"); - AlleleFilteringMutect alleleFilter = new AlleleFilteringMutect(MTAC, null, genotypingEngine, normalSamples); + AlleleFilteringMutect alleleFilter = new AlleleFilteringMutect(MTAC, null, genotypingEngine, normalSamples, header); EventMap.buildEventMapsForHaplotypes(uncollapsedReadLikelihoods.alleles(), assemblyResult.getFullReferenceWithPadding(), assemblyResult.getPaddedReferenceLoc(), diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java index 1ccc0f46233..dad9bddcf99 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringUnitTest.java @@ -1,5 +1,9 @@ package org.broadinstitute.hellbender.tools.walkers.haplotypecaller; +import com.google.common.collect.ImmutableList; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.TextCigarCodec; import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.hellbender.utils.SimpleInterval; @@ -16,18 +20,26 @@ public class AlleleFilteringUnitTest { + SAMFileHeader initReference() { + SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); + dictionary.addSequence(new SAMSequenceRecord("1", 50000)); + SAMFileHeader header = new SAMFileHeader(dictionary); + return header; + } + @Test public void testNoNeedToFilter(){ List haplotypeList = new ArrayList<>(); final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); + Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotypeList.add(haplotype); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); @@ -58,7 +70,7 @@ public void testNoNeedToFilter(){ HaplotypeCallerGenotypingEngine genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samples, ! hcArgs.doNotRunPhysicalPhasing, false); - AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine); + AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine, initReference()); AlleleLikelihoods filtered_lks = alleleFiltering.filterAlleles(lks, 0, new HashSet<>()); Assert.assertEquals(filtered_lks.alleles(), lks.alleles()); } @@ -140,18 +152,18 @@ public void testFilterCloseMis(){ List haplotypeList = new ArrayList<>(); final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGTCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); @@ -186,7 +198,7 @@ public void testFilterCloseMis(){ HaplotypeCallerGenotypingEngine genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samples, ! hcArgs.doNotRunPhysicalPhasing, false); - AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine); + AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine, initReference()); AlleleLikelihoods filtered_lks = alleleFiltering.filterAlleles(lks, 0, new HashSet<>()); Assert.assertEquals(filtered_lks.alleles(), haplotypeList.subList(0,2)); } @@ -198,18 +210,18 @@ public void testFilterDistantHindel(){ List haplotypeList = new ArrayList<>(); final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATTG".getBytes(), false, 0, TextCigarCodec.decode("7M1I1M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 109)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10009)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); @@ -244,7 +256,7 @@ public void testFilterDistantHindel(){ HaplotypeCallerGenotypingEngine genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samples, ! hcArgs.doNotRunPhysicalPhasing, false); - AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine); + AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine, initReference()); AlleleLikelihoods filtered_lks = alleleFiltering.filterAlleles(lks, 0, new HashSet<>()); Assert.assertEquals(filtered_lks.alleles(), haplotypeList.subList(0,2)); } @@ -256,18 +268,18 @@ public void testNotFilterDistantM(){ List haplotypeList = new ArrayList<>(); final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CATGCATC".getBytes(), false, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); @@ -302,7 +314,7 @@ public void testNotFilterDistantM(){ HaplotypeCallerGenotypingEngine genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samples, ! hcArgs.doNotRunPhysicalPhasing, false); - AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine); + AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine,initReference()); AlleleLikelihoods filtered_lks = alleleFiltering.filterAlleles(lks, 100, new HashSet<>()); Assert.assertEquals(filtered_lks.alleles(), haplotypeList.subList(0,1)); } @@ -315,12 +327,12 @@ public void testNotFilterLoneWeakAllele(){ List haplotypeList = new ArrayList<>(); final byte[] fullReferenceWithPadding = "CATGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATG".getBytes(), false, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); @@ -352,13 +364,13 @@ public void testNotFilterLoneWeakAllele(){ HaplotypeCallerGenotypingEngine genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samples, ! hcArgs.doNotRunPhysicalPhasing, false); - AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine); + AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine, initReference()); AlleleLikelihoods filtered_lks = alleleFiltering.filterAlleles(lks, 100, new HashSet<>()); //hcArgs.filterLoneAlleles is false, so we keep the weak lone allele Assert.assertEquals(filtered_lks.alleles(), haplotypeList); hcArgs.filterLoneAlleles = true; - alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine); + alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine, initReference()); filtered_lks = alleleFiltering.filterAlleles(lks, 100, new HashSet<>()); //hcArgs.filterLoneAlleles is true, so we keep the remove the weak lone allele @@ -373,17 +385,17 @@ public void testFilterDistantHindelSor() { List haplotypeList = new ArrayList<>(); final byte[] fullReferenceWithPadding = "CAGGCATG".getBytes(); Haplotype haplotype = new Haplotype(fullReferenceWithPadding, true, 0, TextCigarCodec.decode("8M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 108)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10008)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATTG".getBytes(), false, 0, TextCigarCodec.decode("7M1I1M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 109)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10009)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); haplotype = new Haplotype("CAGGCATTTG".getBytes(), false, 0, TextCigarCodec.decode("7M2I1M")); - haplotype.setGenomeLocation(new SimpleInterval("chr", 100, 110)); + haplotype.setGenomeLocation(new SimpleInterval("1", 10000, 10010)); haplotype.setEventMap(EventMap.fromHaplotype(haplotype, fullReferenceWithPadding, 0)); haplotypeList.add(haplotype); @@ -427,7 +439,7 @@ public void testFilterDistantHindelSor() { HaplotypeCallerGenotypingEngine genotypingEngine = new HaplotypeCallerGenotypingEngine(hcArgs, samples, !hcArgs.doNotRunPhysicalPhasing, false); - AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine); + AlleleFiltering alleleFiltering = new AlleleFilteringHC(hcArgs, null, genotypingEngine, initReference()); AlleleLikelihoods filtered_lks = alleleFiltering.filterAlleles(lks, 0, new HashSet<>()); Assert.assertEquals(filtered_lks.alleles(), haplotypeList.subList(0, 2)); } @@ -443,7 +455,7 @@ public void testIdentifyBadAlleles(){ List sors = List.of(0.0,1.0,3.5); HaplotypeCallerGenotypingEngine ge = new HaplotypeCallerGenotypingEngine(new HaplotypeCallerArgumentCollection(), SampleList.singletonSampleList("test"), false, false); - AlleleFiltering af = new AlleleFilteringHC(null, null,ge); + AlleleFiltering af = new AlleleFilteringHC(null, null,ge, initReference()); List badAlleles = af.identifyBadAlleles(rpls, sors, events, 30, 3); Assert.assertEquals(badAlleles, List.of(b, a, c)); rpls = List.of(-100, -200, 0); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 1e059b444b5..03e0dbecad3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -54,7 +54,7 @@ public class HaplotypeCallerIntegrationTest extends CommandLineProgramTest { // instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=HaplotypeCallerIntegrationTest" // to update all of the exact-match tests at once. After you do this, you should look at the // diffs in the new expected outputs in git to confirm that they are consistent with expectations. - public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false; + public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = true; public static final String TEST_FILES_DIR = toolsTestDir + "haplotypecaller/"; diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfBeforeRebaseUsingFlowModeAdvanced.expected.flowbased.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfBeforeRebaseUsingFlowModeAdvanced.expected.flowbased.vcf index 309061d6bb7..1c7af7131fc 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfBeforeRebaseUsingFlowModeAdvanced.expected.flowbased.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfBeforeRebaseUsingFlowModeAdvanced.expected.flowbased.vcf @@ -5430,8 +5430,12 @@ chr9 81162397 . C . . END=81162397 GT:DP:GQ:MIN_DP:PL 0/0:44:98:44:0,9 chr9 81162398 . A . . END=81162403 GT:DP:GQ:MIN_DP:PL 0/0:45:99:44:0,107,1800 chr9 81162404 . A . . END=81162404 GT:DP:GQ:MIN_DP:PL 0/0:45:91:45:0,91,1882 chr9 81162405 . C . . END=81162430 GT:DP:GQ:MIN_DP:PL 0/0:47:99:45:0,120,1800 -chr9 81162431 . G . . END=81162431 GT:DP:GQ:MIN_DP:PL 0/0:51:59:51:0,59,1437 -chr9 81162432 . G . . END=81162477 GT:DP:GQ:MIN_DP:PL 0/0:49:99:47:0,105,1800 +chr9 81162431 . G C, 0 . ASSEMBLED_HAPS=14;BaseQRankSum=-1.126;DP=48;ExcessHet=0.0000;FILTERED_HAPS=11;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=0.000;RAW_MQandDP=172800,48;ReadPosRankSum=-1.175;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PGT:PID:PL:PS:SB 0|0:44,3,0:47:45:0|1:81162431_G_C:0,45,85,132,1847,172:81162431:23,21,0,3 +chr9 81162432 . G . . END=81162432 GT:DP:GQ:MIN_DP:PL 0/0:46:99:46:0,120,1800 +chr9 81162433 . A AG, 0 . ASSEMBLED_HAPS=14;BaseQRankSum=-2.125;DP=48;ExcessHet=0.0000;FILTERED_HAPS=11;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=0.000;RAW_MQandDP=172800,48;ReadPosRankSum=-1.258;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PGT:PID:PL:PS:SB 0|0:43,4,0:47:16:0|1:81162431_G_C:0,16,56,125,948,165:81162431:23,20,0,4 +chr9 81162434 . G . . END=81162469 GT:DP:GQ:MIN_DP:PL 0/0:45:99:44:0,104,1800 +chr9 81162470 . A . . END=81162470 GT:DP:GQ:MIN_DP:PL 0/0:44:94:44:0,94,1853 +chr9 81162471 . G . . END=81162477 GT:DP:GQ:MIN_DP:PL 0/0:48:99:44:0,113,1776 chr9 81162478 . C . . END=81162479 GT:DP:GQ:MIN_DP:PL 0/0:47:97:47:0,97,1928 chr9 81162480 . A . . END=81162487 GT:DP:GQ:MIN_DP:PL 0/0:46:99:45:0,105,1494 chr9 81162488 . T . . END=81162488 GT:DP:GQ:MIN_DP:PL 0/0:45:91:45:0,91,1496 @@ -6026,19 +6030,13 @@ chr9 81167733 . A . . END=81167733 GT:DP:GQ:MIN_DP:PL 0/0:37:67:37:0,6 chr9 81167734 . T . . END=81167735 GT:DP:GQ:MIN_DP:PL 0/0:37:99:37:0,111,1527 chr9 81167736 . T . . END=81167736 GT:DP:GQ:MIN_DP:PL 0/0:37:90:37:0,90,1461 chr9 81167737 . A . . END=81167740 GT:DP:GQ:MIN_DP:PL 0/0:38:99:35:0,105,1439 -chr9 81167741 . G . . END=81167741 GT:DP:GQ:MIN_DP:PL 0/0:37:57:37:0,57,855 -chr9 81167742 . A . . END=81167742 GT:DP:GQ:MIN_DP:PL 0/0:30:0:30:0,0,101 -chr9 81167743 . A . . END=81167743 GT:DP:GQ:MIN_DP:PL 0/0:34:20:34:0,20,512 -chr9 81167744 . A . . END=81167744 GT:DP:GQ:MIN_DP:PL 0/0:38:45:38:0,45,845 -chr9 81167745 . A . . END=81167745 GT:DP:GQ:MIN_DP:PL 0/0:38:46:38:0,46,1239 -chr9 81167746 . A . . END=81167747 GT:DP:GQ:MIN_DP:PL 0/0:38:73:37:0,73,1452 -chr9 81167748 . A . . END=81167750 GT:DP:GQ:MIN_DP:PL 0/0:37:99:37:0,108,1620 -chr9 81167751 . A . . END=81167751 GT:DP:GQ:MIN_DP:PL 0/0:30:84:30:0,84,428 -chr9 81167752 . C . . END=81167754 GT:DP:GQ:MIN_DP:PL 0/0:37:99:37:0,100,1129 -chr9 81167755 . A . . END=81167755 GT:DP:GQ:MIN_DP:PL 0/0:37:79:37:0,79,1404 -chr9 81167756 . T . . END=81167756 GT:DP:GQ:MIN_DP:PL 0/0:36:99:36:0,108,1537 -chr9 81167757 . C . . END=81167757 GT:DP:GQ:MIN_DP:PL 0/0:36:96:36:0,96,1440 -chr9 81167758 . A . . END=81167758 GT:DP:GQ:MIN_DP:PL 0/0:34:55:34:0,55,1039 +chr9 81167741 . GAAAA G, 0.04 . ASSEMBLED_HAPS=7;BaseQRankSum=0.500;DP=32;ExcessHet=0.0000;FILTERED_HAPS=4;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=-0.611;RAW_MQandDP=110827,32;ReadPosRankSum=-2.151;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PGT:PID:PL:PS:SB 0|1:26,4,0:30:3:0|1:81167741_GAAAA_G:3,0,40,81,997,121:81167741:12,14,3,1 +chr9 81167746 . A . . END=81167746 GT:DP:GQ:MIN_DP:PL 0/0:31:54:31:0,54,1133 +chr9 81167747 . A . . END=81167747 GT:DP:GQ:MIN_DP:PL 0/0:30:52:30:0,52,1164 +chr9 81167748 . A . . END=81167750 GT:DP:GQ:MIN_DP:PL 0/0:30:84:30:0,84,1260 +chr9 81167751 . A . . END=81167751 GT:DP:GQ:MIN_DP:PL 0/0:23:65:23:0,65,316 +chr9 81167752 . C . . END=81167756 GT:DP:GQ:MIN_DP:PL 0/0:30:87:29:0,87,1223 +chr9 81167757 . CA C, 20.60 . ASSEMBLED_HAPS=7;BaseQRankSum=0.648;DP=30;ExcessHet=0.0000;FILTERED_HAPS=4;MLEAC=1,0;MLEAF=0.500,0.00;MQRankSum=-0.402;RAW_MQandDP=102731,30;ReadPosRankSum=-1.994;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PGT:PID:PL:PS:SB 0|1:25,4,0:29:28:0|1:81167741_GAAAA_G:28,0,40,103,634,143:81167741:11,14,3,1 chr9 81167759 . A . . END=81167776 GT:DP:GQ:MIN_DP:PL 0/0:39:99:34:0,102,1198 chr9 81167777 . T . . END=81167777 GT:DP:GQ:MIN_DP:PL 0/0:40:58:40:0,58,1639 chr9 81167778 . G . . END=81167795 GT:DP:GQ:MIN_DP:PL 0/0:40:99:38:0,101,1445 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfBeforeRebaseUsingFlowModeAdvanced.expected.flowbased.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfBeforeRebaseUsingFlowModeAdvanced.expected.flowbased.vcf.idx index 79273dfcf536597a692a22260cb527a1d5122803..38225ebdf023d88628f209b80d1a1f6c14b352bb 100644 GIT binary patch delta 255 zcmezQM5^(r)C3WkjQreG{mh)o#A5w|qWmoV^u&^E-GZXbvc!_qiRzua($6^W$d~k?Myo+lXgQ0Bn0Y>K&|nTdt5iMgSXxv@EeudAb9yql|INNA92e3)a9 zr=y> . . END=81162353 GT:DP:GQ:MIN_DP:PL 0/0:42:82:42:0,8 chr9 81162354 . G . . END=81162377 GT:DP:GQ:MIN_DP:PL 0/0:41:99:41:0,117,1755 chr9 81162378 . T . . END=81162378 GT:DP:GQ:MIN_DP:PL 0/0:41:89:41:0,89,1756 chr9 81162379 . G . . END=81162430 GT:DP:GQ:MIN_DP:PL 0/0:45:91:40:0,91,1732 -chr9 81162431 . G . . END=81162431 GT:DP:GQ:MIN_DP:PL 0/0:51:59:51:0,59,1437 -chr9 81162432 . G . . END=81162503 GT:DP:GQ:MIN_DP:PL 0/0:48:90:43:0,90,1494 +chr9 81162431 . G C, 0 . ASSEMBLED_HAPS=14;AS_RAW_BaseQRankSum=|-1.2,1|NaN;AS_RAW_MQ=158400.00|10800.00|0.00;AS_RAW_MQRankSum=|0.0,1|NaN;AS_RAW_ReadPosRankSum=|-1.2,1|NaN;AS_SB_TABLE=23,21|0,3|0,0;BaseQRankSum=-1.126;DP=48;ExcessHet=0.0000;FILTERED_HAPS=11;HAPCOMP=1,0;HAPDOM=0.500,0.00;HEC=55,3,2,1,1,1,1,0,0,0,0,0;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=0.000;RAW_MQandDP=172800,48;ReadPosRankSum=-1.175;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PGT:PID:PL:PS:SB 0|0:44,3,0:47:45:0|1:81162431_G_C:0,45,85,132,1847,172:81162431:23,21,0,3 +chr9 81162432 . G . . END=81162432 GT:DP:GQ:MIN_DP:PL 0/0:46:99:46:0,120,1800 +chr9 81162433 . A AG, 0 . ASSEMBLED_HAPS=14;AS_RAW_BaseQRankSum=|-2.2,1|NaN;AS_RAW_MQ=154800.00|14400.00|0.00;AS_RAW_MQRankSum=|0.0,1|NaN;AS_RAW_ReadPosRankSum=|-1.3,1|NaN;AS_SB_TABLE=23,20|0,4|0,0;BaseQRankSum=-2.125;DP=48;ExcessHet=0.0000;FILTERED_HAPS=11;HAPCOMP=1,0;HAPDOM=0.400,0.00;HEC=56,2,2,1,1,1,1,0,0,0,0;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=0.000;RAW_MQandDP=172800,48;ReadPosRankSum=-1.258;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PGT:PID:PL:PS:SB 0|0:43,4,0:47:16:0|1:81162431_G_C:0,16,56,125,948,165:81162431:23,20,0,4 +chr9 81162434 . G . . END=81162503 GT:DP:GQ:MIN_DP:PL 0/0:45:90:43:0,90,1494 chr9 81162504 . G . . END=81162504 GT:DP:GQ:MIN_DP:PL 0/0:44:88:44:0,88,1671 chr9 81162505 . G . . END=81162581 GT:DP:GQ:MIN_DP:PL 0/0:45:92:39:0,92,1276 chr9 81162582 . T . . END=81162582 GT:DP:GQ:MIN_DP:PL 0/0:40:7:40:0,7,1498 @@ -5232,17 +5234,12 @@ chr9 81167684 . T . . END=81167684 GT:DP:GQ:MIN_DP:PL 0/0:38:80:38:0,8 chr9 81167685 . C . . END=81167732 GT:DP:GQ:MIN_DP:PL 0/0:36:95:34:0,95,1458 chr9 81167733 . A . . END=81167733 GT:DP:GQ:MIN_DP:PL 0/0:37:67:37:0,67,1414 chr9 81167734 . T . . END=81167740 GT:DP:GQ:MIN_DP:PL 0/0:37:90:35:0,90,1439 -chr9 81167741 . G . . END=81167741 GT:DP:GQ:MIN_DP:PL 0/0:37:57:37:0,57,855 -chr9 81167742 . A . . END=81167742 GT:DP:GQ:MIN_DP:PL 0/0:30:0:30:0,0,101 -chr9 81167743 . A . . END=81167743 GT:DP:GQ:MIN_DP:PL 0/0:34:20:34:0,20,512 -chr9 81167744 . A . . END=81167745 GT:DP:GQ:MIN_DP:PL 0/0:38:45:38:0,45,845 -chr9 81167746 . A . . END=81167747 GT:DP:GQ:MIN_DP:PL 0/0:38:73:37:0,73,1452 -chr9 81167748 . A . . END=81167750 GT:DP:GQ:MIN_DP:PL 0/0:37:99:37:0,108,1620 -chr9 81167751 . A . . END=81167751 GT:DP:GQ:MIN_DP:PL 0/0:30:84:30:0,84,428 -chr9 81167752 . C . . END=81167754 GT:DP:GQ:MIN_DP:PL 0/0:37:99:37:0,100,1129 -chr9 81167755 . A . . END=81167755 GT:DP:GQ:MIN_DP:PL 0/0:37:79:37:0,79,1404 -chr9 81167756 . T . . END=81167757 GT:DP:GQ:MIN_DP:PL 0/0:36:96:36:0,96,1440 -chr9 81167758 . A . . END=81167758 GT:DP:GQ:MIN_DP:PL 0/0:34:55:34:0,55,1039 +chr9 81167741 . GAAAA G, 0.04 . ASSEMBLED_HAPS=7;AS_RAW_BaseQRankSum=|0.5,1|NaN;AS_RAW_MQ=90226.00|13401.00|0.00;AS_RAW_MQRankSum=|-0.7,1|NaN;AS_RAW_ReadPosRankSum=|-2.2,1|NaN;AS_SB_TABLE=12,14|3,1|0,0;BaseQRankSum=0.500;DP=32;ExcessHet=0.0000;FILTERED_HAPS=4;HAPCOMP=1,0;HAPDOM=1.00,0.00;HEC=53,5;MLEAC=0,0;MLEAF=0.00,0.00;MQRankSum=-0.611;RAW_MQandDP=110827,32;ReadPosRankSum=-2.151;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PGT:PID:PL:PS:SB 0|1:26,4,0:30:3:0|1:81167741_GAAAA_G:3,0,40,81,997,121:81167741:12,14,3,1 +chr9 81167746 . A . . END=81167747 GT:DP:GQ:MIN_DP:PL 0/0:31:52:30:0,52,1133 +chr9 81167748 . A . . END=81167750 GT:DP:GQ:MIN_DP:PL 0/0:30:84:30:0,84,1260 +chr9 81167751 . A . . END=81167751 GT:DP:GQ:MIN_DP:PL 0/0:23:65:23:0,65,316 +chr9 81167752 . C . . END=81167756 GT:DP:GQ:MIN_DP:PL 0/0:30:87:29:0,87,1223 +chr9 81167757 . CA C, 20.60 . ASSEMBLED_HAPS=7;AS_RAW_BaseQRankSum=|0.6,1|NaN;AS_RAW_MQ=85730.00|13401.00|0.00;AS_RAW_MQRankSum=|-0.5,1|NaN;AS_RAW_ReadPosRankSum=|-2.0,1|NaN;AS_SB_TABLE=11,14|3,1|0,0;BaseQRankSum=0.648;DP=30;ExcessHet=0.0000;FILTERED_HAPS=4;HAPCOMP=1,0;HAPDOM=0.800,0.00;HEC=47,7,4;MLEAC=1,0;MLEAF=0.500,0.00;MQRankSum=-0.402;RAW_MQandDP=102731,30;ReadPosRankSum=-1.994;SUSP_NOISY_ADJACENT_TP_VARIANT GT:AD:DP:GQ:PGT:PID:PL:PS:SB 0|1:25,4,0:29:28:0|1:81167741_GAAAA_G:28,0,40,103,634,143:81167741:11,14,3,1 chr9 81167759 . A . . END=81167776 GT:DP:GQ:MIN_DP:PL 0/0:39:99:34:0,102,1198 chr9 81167777 . T . . END=81167777 GT:DP:GQ:MIN_DP:PL 0/0:40:58:40:0,58,1639 chr9 81167778 . G . . END=81167795 GT:DP:GQ:MIN_DP:PL 0/0:40:99:38:0,101,1445 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfWithAssemblyComplexityAnnotationRevamp.expected.flowbased.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGvcfWithAssemblyComplexityAnnotationRevamp.expected.flowbased.vcf.idx index cfc4af15754202c473d98c7ba66823df81df8e5e..8a865b46c8eb7325f02dd05a445e751696c96929 100644 GIT binary patch delta 249 zcmezOM5^_v)C3WkjQreG{mh)o#A5w|qWmoV^u&^E-GZXbvc!_qiRyj4e4XqJARzkj z^}YVaLt76q?%cy?s%K$oVrgn*WNK(&WHh~NFQbg7rGbfok%57!rHQG9p&5g(tD|4M zo2z3;Xpn1sm}8Kqqo1=YgQ=OJk+Ghsp^3SXfw8d>P#sK(vwx^x2!oNOvC;Godl{t| zOWL{jF#<6Y5HkZY%XaR4tj1p)zLm0q-ScK4gf{pBp{2svz~VeBA@s|i5Sl**;(nMv E02``8RR910 delta 276 zcmZpDD)sM))C3XX(BjmhV*QlFvdomE)I9z4#FFfZ27SD&&Fl;yuqyOrO-JMDt*04x z?y)q}GcYwYF|jZ(F|e>OF=uddbqom&a*g+K^>YvLhz|~NiFa`gV=yu{)3Y!(HZig= zG%~a_Ha0b%zGp9^q`iTqfu4c6k(s%vrG>eHxd~jqvwx^x2!oNOu_*&1(9YzHB1?t~ zK>K^=F)$ou0=t8uoV#6mA0rSm0WmWWvuu~%$NK5>blrW79MgF>u~|55Eo23&T|E~< btG|QLf`M#capuJk`oU)i%@zjnBh0S=z+y#9 From 112d6af10d7cbfdf4c798d8bb9650fbb5dae796c Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Thu, 6 Jun 2024 22:31:41 +0300 Subject: [PATCH 4/5] Threshold updated --- .../haplotypecaller/AlleleFiltering.java | 6 ++--- .../haplotypecaller/AlleleFilteringHC.java | 1 + .../AlleleFilteringMutect.java | 23 +------------------ 3 files changed, 5 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java index 8b775a5d5dc..a7ddf7e25a7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFiltering.java @@ -43,11 +43,11 @@ */ public abstract class AlleleFiltering { - final protected static Logger logger = LogManager.getLogger(AlleleFiltering.class); final protected AssemblyBasedCallerArgumentCollection assemblyArgs; final private OutputStreamWriter assemblyDebugOutStream; final private SAMSequenceDictionary sequenceDictionary; + AlleleFiltering(final AssemblyBasedCallerArgumentCollection assemblyArgs, final OutputStreamWriter assemblyDebugOutStream, final SAMFileHeader header){ @@ -55,6 +55,7 @@ public abstract class AlleleFiltering { this.assemblyDebugOutStream = assemblyDebugOutStream; this.sequenceDictionary = header.getSequenceDictionary(); } + protected abstract double getStringentQuality(); /** * Finds alleles that are likely not contributing much to explaining the data and remove the haplotypes @@ -204,8 +205,7 @@ private AlleleLikelihoods subsetHaplotypesByAlleles(final A // the very weak quality is hardcoded final List filteringCandidatesStringent = identifyBadAlleles(collectedRPLs, collectedSORs, - activeAlleles, - 1, + activeAlleles, getStringentQuality(), Integer.MAX_VALUE); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringHC.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringHC.java index 7a128de4ceb..aaae7e84b93 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringHC.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringHC.java @@ -38,6 +38,7 @@ public AlleleFilteringHC(HaplotypeCallerArgumentCollection _hcargs, OutputStream afCalc = AlleleFrequencyCalculator.makeCalculator(config); } + protected double getStringentQuality() { return 1; } /** * Calculate genotype likelihood of requirement of an allele. Specifically, calculates the likelihood * of the data given that allele versus the likelihood of the data when all haplotypes containing the allele are removed diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java index 162ef987ee0..8813beb3bbf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AlleleFilteringMutect.java @@ -42,28 +42,7 @@ public AlleleFilteringMutect(M2ArgumentCollection _m2args, normalSamples = _normalSamples; } - private AlleleLikelihoods getAlleleLikelihoodMatrix(final AlleleLikelihoods readLikelihoods, - final Event allele, - final Map> haplotypeAlleleMap, - final Set enabledHaplotypes - ){ - - final Map> alleleHaplotypeMap = new CollectionUtil.DefaultingMap<>((k) -> new ArrayList<>(), true); - - - final Allele notAllele= InverseAllele.of(allele.altAllele(), true); - readLikelihoods.alleles().stream().filter(enabledHaplotypes::contains) - .filter(h->haplotypeAlleleMap.get(h).contains(allele)) - .forEach(alleleHaplotypeMap.get(allele.altAllele())::add); - readLikelihoods.alleles().stream().filter(enabledHaplotypes::contains) - .filter(h -> !haplotypeAlleleMap.get(h).contains(allele)) - .forEach(alleleHaplotypeMap.get(notAllele)::add); - - final AlleleLikelihoods alleleLikelihoods = readLikelihoods.marginalize(alleleHaplotypeMap); - - logger.debug(() -> String.format("GALM: %s %d %d", allele, alleleHaplotypeMap.get(allele.altAllele()).size(), alleleHaplotypeMap.get(notAllele).size())); - return alleleLikelihoods; - } + protected double getStringentQuality() { return -50; } /** * Calculate calculate genotype likelihood of requirement of an allele. Specifically, calculates the likelihood From c012ed9bacfee9dbe0c4e80634599a390b02e7f5 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Fri, 7 Jun 2024 08:50:59 +0300 Subject: [PATCH 5/5] Test fix --- .../walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 03e0dbecad3..1e059b444b5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -54,7 +54,7 @@ public class HaplotypeCallerIntegrationTest extends CommandLineProgramTest { // instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=HaplotypeCallerIntegrationTest" // to update all of the exact-match tests at once. After you do this, you should look at the // diffs in the new expected outputs in git to confirm that they are consistent with expectations. - public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = true; + public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false; public static final String TEST_FILES_DIR = toolsTestDir + "haplotypecaller/";