Skip to content

Commit

Permalink
review edits
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin committed Aug 20, 2019
1 parent 5a02caa commit b48b007
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,6 @@ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult
//it's possible that the upstream deletion that spanned this site was not emitted, mooting the symbolic spanning deletion allele
final boolean isSpuriousSpanningDeletion = GATKVCFConstants.isSpanningDeletion(allele) && !isVcCoveredByDeletion(vc);

//TODO: force-clling logic goes here
final boolean toOutput = (isPlausible || forceKeepAllele(allele) || isNonRefWhichIsLoneAltAllele || forcedAlleles.contains(allele) ) && !isSpuriousSpanningDeletion;

siteIsMonomorphic &= !(isPlausible && !isSpuriousSpanningDeletion);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ public abstract class AssemblyBasedCallerArgumentCollection {

public static final String MIN_BASE_QUALITY_SCORE_LONG_NAME = "min-base-quality-score";
public static final String SMITH_WATERMAN_LONG_NAME = "smith-waterman";
public static final String EMIT_REFERENCE_CONFIDENCE_LONG_NAME = "emit-ref-confidence";
public static final String FORCE_CALL_ALLELES_LONG_NAME = "alleles";
public static final String GENOTYPE_FILTERED_ALLELES_LONG_NAME = "genotype-filtered-alleles";
public static final String EMIT_REF_CONFIDENCE_LONG_NAME = "emit-ref-confidence";
public static final String EMIT_REF_CONFIDENCE_SHORT_NAME = "ERC";

public ReadThreadingAssembler createReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = assemblerArgs.makeReadThreadingAssembler();
Expand Down Expand Up @@ -115,7 +118,7 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
* For Mutect2, this is a BETA feature that functions similarly to the HaplotypeCaller reference confidence/GVCF mode.
*/
@Advanced
@Argument(fullName=EMIT_REFERENCE_CONFIDENCE_LONG_NAME, shortName="ERC", doc="Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true)
@Argument(fullName= EMIT_REF_CONFIDENCE_LONG_NAME, shortName= EMIT_REF_CONFIDENCE_SHORT_NAME, doc="Mode for emitting reference confidence scores (For Mutect2, this is a BETA feature)", optional = true)
public ReferenceConfidenceMode emitReferenceConfidence = ReferenceConfidenceMode.NONE;

protected abstract int getDefaultMaxMnpDistance();
Expand All @@ -128,10 +131,10 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
doc = "Two or more phased substitutions separated by this distance or less are merged into MNPs.", optional = true)
public int maxMnpDistance = getDefaultMaxMnpDistance();

@Argument(fullName="alleles", doc="The set of alleles for which to force genotyping regardless of evidence", optional=true)
@Argument(fullName= FORCE_CALL_ALLELES_LONG_NAME, doc="The set of alleles for which to force genotyping regardless of evidence", optional=true)
public FeatureInput<VariantContext> alleles;

@Advanced
@Argument(fullName = "genotype-filtered-alleles", doc = "Whether to force genotype even filtered alleles", optional = true)
@Argument(fullName = GENOTYPE_FILTERED_ALLELES_LONG_NAME, doc = "Force genotyping of filtered alleles included in the resource specified by --alleles", optional = true)
public boolean forceCallFiltered = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -265,12 +265,12 @@ private void validateAndInitializeArgs() {
Utils.validateArg(hcArgs.likelihoodArgs.BASE_QUALITY_SCORE_THRESHOLD >= QualityUtils.MIN_USABLE_Q_SCORE, "BASE_QUALITY_SCORE_THRESHOLD must be greater than or equal to " + QualityUtils.MIN_USABLE_Q_SCORE + " (QualityUtils.MIN_USABLE_Q_SCORE)");

if ( emitReferenceConfidence() && samplesList.numberOfSamples() != 1 ) {
throw new CommandLineException.BadArgumentValue("--emit-ref-confidence", "Can only be used in single sample mode currently. Use the --sample-name argument to run on a single sample out of a multi-sample BAM file.");
throw new CommandLineException.BadArgumentValue(AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, "Can only be used in single sample mode currently. Use the --sample-name argument to run on a single sample out of a multi-sample BAM file.");
}

if (hcArgs.floorBlocks && !emitReferenceConfidence()) {
throw new UserException(HaplotypeCallerArgumentCollection.OUTPUT_BLOCK_LOWER_BOUNDS + " refers to GVCF blocks," +
" so reference confidence mode (" + AssemblyBasedCallerArgumentCollection.EMIT_REFERENCE_CONFIDENCE_LONG_NAME +
" so reference confidence mode (" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME +
") must be specified.");
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.broadinstitute.hellbender.utils.variant;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.TribbleException;
import htsjdk.variant.variantcontext.*;
Expand All @@ -12,22 +11,20 @@
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFSimpleHeaderLine;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import java.nio.file.Path;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.tools.walkers.genotyper.*;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;

import java.io.Serializable;
import java.nio.file.Path;
import java.util.*;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.ReferenceConfidenceMode;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.testutils.SparkTestUtils;
Expand Down Expand Up @@ -200,7 +202,7 @@ public void testGVCFModeIsConcordantWithGATK3_8Results() throws Exception {
"-R", b37_reference_20_21,
"-L", "20:10000000-10100000",
"-O", output.getAbsolutePath(),
"-ERC", "GVCF",
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
"-pairHMM", "AVX_LOGLESS_CACHING"
};

Expand All @@ -224,7 +226,7 @@ public void testBrokenGVCFConfigurationsAreDisallowed(String extension) {
"-I", NA12878_20_21_WGS_bam,
"-R", b37_reference_20_21,
"-O", createTempFile("testGVCF_GZ_throw_exception", extension).getAbsolutePath(),
"-ERC", "GVCF",
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
};

runCommandLine(args);
Expand Down Expand Up @@ -259,7 +261,7 @@ public void testGVCFModeIsConcordantWithGATK3_8AlelleSpecificResults(String exte
"-O", output.getAbsolutePath(),
"-G", "StandardAnnotation",
"-G", "AS_StandardAnnotation",
"-ERC", "GVCF",
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
"-pairHMM", "AVX_LOGLESS_CACHING"
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
import htsjdk.variant.vcf.VCFHeader;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
import org.broadinstitute.hellbender.engine.AssemblyRegionWalker;
Expand Down Expand Up @@ -220,7 +218,7 @@ public void testGVCFModeIsConsistentWithPastResults(final String inputFileName,
"-R", referenceFileName,
"-L", "20:10000000-10100000",
"-O", outputPath,
"-ERC", "GVCF",
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
"-pairHMM", "AVX_LOGLESS_CACHING",
"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"
};
Expand Down Expand Up @@ -255,7 +253,7 @@ public void testGVCFModeIsConsistentWithPastResults_AlleleSpecificAnnotations(fi
"-G", "StandardAnnotation",
"-G", "StandardHCAnnotation",
"-G", "AS_StandardAnnotation",
"-ERC", "GVCF",
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
"-pairHMM", "AVX_LOGLESS_CACHING",
"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"
};
Expand Down Expand Up @@ -290,7 +288,7 @@ public void testGVCFModeIsConcordantWithGATK3_8Results(final String inputFileNam
"-R", referenceFileName,
"-L", "20:10000000-10100000",
"-O", output.getAbsolutePath(),
"-ERC", "GVCF",
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
"-pairHMM", "AVX_LOGLESS_CACHING",
};

Expand Down Expand Up @@ -322,7 +320,7 @@ public void testGVCFModeIsConcordantWithGATK3_8AlelleSpecificResults(final Strin
"-G", "StandardAnnotation",
"-G", "StandardHCAnnotation",
"-G", "AS_StandardAnnotation",
"-ERC", "GVCF",
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
"-pairHMM", "AVX_LOGLESS_CACHING",
};

Expand All @@ -346,7 +344,7 @@ public void testGVCFModeGenotypePosteriors() throws Exception {
"-R", referenceFileName,
"-L", "20:10000000-10100000",
"-O", output.getAbsolutePath(),
"-ERC", "GVCF",
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString(),
"--" + GenotypeCalculationArgumentCollection.SUPPORTING_CALLSET_LONG_NAME,
largeFileTestDir + "1000G.phase3.broad.withGenotypes.chr20.10100000.vcf",
"--" + GenotypeCalculationArgumentCollection.NUM_REF_SAMPLES_LONG_NAME, "2500",
Expand All @@ -371,33 +369,33 @@ else if (!vc.getAlternateAllele(0).equals(Allele.NON_REF_ALLELE)){ //there
}
}
}

/*
* Test that GQs are correct when the --floor-blocks argument is supplied
*/
@Test(dataProvider="HaplotypeCallerTestInputs")
public void testFloorGVCFBlocks(final String inputFileName, final String referenceFileName) throws Exception {
Utils.resetRandomGenerator();

final File output = createTempFile("testFloorGVCFBlocks", ".vcf");

final List<String> requestedGqBands = Arrays.asList("10","20","30","40","50","60");

final ArgumentsBuilder args = new ArgumentsBuilder().addInput(new File(inputFileName))
.addReference(new File(referenceFileName))
.addInterval(new SimpleInterval("20:10009880-10012631"))
.addBooleanArgument(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, false)
.addArgument("pairHMM", "AVX_LOGLESS_CACHING")
.addArgument("floor-blocks")
.addArgument("ERC", "GVCF")
.addOutput(output);
.addReference(new File(referenceFileName))
.addInterval(new SimpleInterval("20:10009880-10012631"))
.addBooleanArgument(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, false)
.addArgument("pairHMM", "AVX_LOGLESS_CACHING")
.addArgument("floor-blocks")
.addArgument("ERC", "GVCF")
.addOutput(output);
requestedGqBands.forEach(x -> args.addArgument("GQB",x));
runCommandLine(args);

final List<String> allGqBands = new ArrayList<>(requestedGqBands);
allGqBands.add("99");
allGqBands.add("0");

//The interval here is big, so use a FeatureDataSource to limit memory usage
try (final FeatureDataSource<VariantContext> actualVcs = new FeatureDataSource<>(output)) {
actualVcs.forEach(vc -> {
Expand All @@ -409,21 +407,28 @@ public void testFloorGVCFBlocks(final String inputFileName, final String referen
}
}

@Test
public void testForceCalling() throws IOException {
Utils.resetRandomGenerator();
// force calling bam (File) vcf (File) and intervals (String)
@DataProvider(name="ForceCallingInputs")
public Object[][] getForceCallingInputs() {
return new Object[][] {
{NA12878_20_21_WGS_bam, new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles.vcf"), "20:10000000-10010000"},
{NA12878_20_21_WGS_bam, new File(toolsTestDir, "mutect/gga_mode.vcf"), "20:9998500-10010000"}
};
}

@Test(dataProvider = "ForceCallingInputs")
public void testForceCalling(final String bamPath, final File forceCallingVcf, final String intervalString) throws IOException {
Utils.resetRandomGenerator();
final File output = createTempFile("testGenotypeGivenAllelesMode", ".vcf");

final File forceCallingVcf = new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles.vcf");
final String[] args = {
"-I", NA12878_20_21_WGS_bam,
"-R", b37_reference_20_21,
"-L", "20:10000000-10010000",
"-I", bamPath,
"-R", b37Reference,
"-L", intervalString,
"-O", output.getAbsolutePath(),
"-pairHMM", "AVX_LOGLESS_CACHING",
"--" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false",
"--alleles", forceCallingVcf.getAbsolutePath()
"--" + AssemblyBasedCallerArgumentCollection.FORCE_CALL_ALLELES_LONG_NAME, forceCallingVcf.getAbsolutePath()
};

runCommandLine(args);
Expand Down Expand Up @@ -913,7 +918,7 @@ public void testContaminationCorrection( final String contaminatedBam,
"-R", reference,
"-L", interval.toString(),
"-O", uncorrectedOutput.getAbsolutePath(),
"-ERC", (gvcfMode ? "GVCF" : "NONE"),
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, (gvcfMode ? ReferenceConfidenceMode.GVCF.toString() : ReferenceConfidenceMode.NONE.toString()),
};
Utils.resetRandomGenerator();
runCommandLine(noContaminationCorrectionArgs);
Expand All @@ -925,7 +930,7 @@ public void testContaminationCorrection( final String contaminatedBam,
"-L", interval.toString(),
"-O", correctedOutput.getAbsolutePath(),
"-contamination", Double.toString(contaminationFraction),
"-ERC", (gvcfMode ? "GVCF" : "NONE"),
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, (gvcfMode ? ReferenceConfidenceMode.GVCF.toString() : ReferenceConfidenceMode.NONE.toString()),
};
Utils.resetRandomGenerator();
runCommandLine(contaminationCorrectionArgs);
Expand All @@ -937,7 +942,7 @@ public void testContaminationCorrection( final String contaminatedBam,
"-L", interval.toString(),
"-O", correctedOutputUsingContaminationFile.getAbsolutePath(),
"-contamination-file", contaminationFile,
"-ERC", (gvcfMode ? "GVCF" : "NONE"),
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, (gvcfMode ? ReferenceConfidenceMode.GVCF.toString() : ReferenceConfidenceMode.NONE.toString()),
};
Utils.resetRandomGenerator();
runCommandLine(contaminationCorrectionFromFileArgs);
Expand Down Expand Up @@ -1015,7 +1020,7 @@ public void test100PercentContaminationNoCallsInGVCFMode() throws Exception {
"-L", "20:10000000-10010000",
"-O", output.getAbsolutePath(),
"-contamination", "1.0",
"-ERC", "GVCF"
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, ReferenceConfidenceMode.GVCF.toString()
};
runCommandLine(contaminationArgs);

Expand Down Expand Up @@ -1080,7 +1085,7 @@ public void testMaxAlternateAlleles(final String bam, final String reference, fi
"-R", reference,
"-L", intervalString,
"-O", outputNoMaxAlternateAlleles.getAbsolutePath(),
"-ERC", (gvcfMode ? "GVCF" : "NONE")
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, (gvcfMode ? ReferenceConfidenceMode.GVCF.toString() : ReferenceConfidenceMode.NONE.toString())
};
runCommandLine(argsNoMaxAlternateAlleles);

Expand All @@ -1090,7 +1095,7 @@ public void testMaxAlternateAlleles(final String bam, final String reference, fi
"-L", intervalString,
"-O", outputWithMaxAlternateAlleles.getAbsolutePath(),
"--max-alternate-alleles", Integer.toString(maxAlternateAlleles),
"-ERC", (gvcfMode ? "GVCF" : "NONE")
"--" + AssemblyBasedCallerArgumentCollection.EMIT_REF_CONFIDENCE_LONG_NAME, (gvcfMode ? ReferenceConfidenceMode.GVCF.toString() : ReferenceConfidenceMode.NONE.toString())
};
runCommandLine(argsWithMaxAlternateAlleles);

Expand Down
Loading

0 comments on commit b48b007

Please sign in to comment.