Skip to content

Commit

Permalink
added option for ValidateBasicSomaticShortMutations to output a vcf (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin authored Jul 23, 2018
1 parent 8ad39d7 commit aa1d525
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,12 @@ public static BasicValidationResult calculateBasicValidationResult(final Genotyp
int validationTumorAltCount, int validationTumorTotalCount,
int minBaseQualityCutoff, final SimpleInterval interval, final String filters) {

if (!isAbleToValidateGenotype(genotype, referenceAllele)){
if (!isAbleToValidateGenotype(genotype, referenceAllele) || validationNormalPileup == null){
return null;
}

Utils.nonNull(referenceAllele);
Utils.nonNull(genotype);
Utils.nonNull(validationNormalPileup);
Utils.nonNull(interval);
Utils.nonNull(filters);
ParamUtils.isPositiveOrZero(validationTumorAltCount, "Validation alt count must be >= 0");
Expand All @@ -88,6 +87,10 @@ public static BasicValidationResult calculateBasicValidationResult(final Genotyp
final int discoveryTumorAltCount = genotype.getAD()[1];
final int discoveryTumorTotalCount = genotype.getAD()[0] + discoveryTumorAltCount;

if (Double.isNaN(maxAltRatioSeenInNormalValidation) || discoveryTumorTotalCount == 0) {
return null;
}

final int minCountForSignal = PowerCalculationUtils.calculateMinCountForSignal(validationTumorTotalCount, maxAltRatioSeenInNormalValidation);
final double power = PowerCalculationUtils.calculatePower(validationTumorTotalCount, discoveryTumorAltCount, discoveryTumorTotalCount, minCountForSignal);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.apache.commons.lang3.mutable.MutableInt;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
Expand All @@ -18,7 +21,6 @@
import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.walkers.validation.Concordance;
import org.broadinstitute.hellbender.tools.walkers.validation.ConcordanceState;
import org.broadinstitute.hellbender.tools.walkers.validation.ConcordanceSummaryRecord;
import org.broadinstitute.hellbender.utils.GATKProtectedVariantContextUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
Expand All @@ -30,9 +32,7 @@

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

@CommandLineProgramProperties(
Expand All @@ -47,6 +47,7 @@
@ExperimentalFeature
@DocumentedFeature
public class ValidateBasicSomaticShortMutations extends VariantWalker {
public static final String ANNOTATED_VCF_LONG_NAME = "annotated-vcf";
public static final String SAMPLE_NAME_DISCOVERY_VCF_LONG_NAME = "discovery-sample-name";
public static final String SAMPLE_NAME_VALIDATION_CASE = "val-case-sample-name";
public static final String SAMPLE_NAME_VALIDATION_CONTROL = "val-control-sample-name";
Expand All @@ -65,6 +66,11 @@ public class ValidateBasicSomaticShortMutations extends VariantWalker {
doc = "The output file, which will be a validation table (tsv).")
protected String outputFile;

@Argument(fullName = ANNOTATED_VCF_LONG_NAME,
doc = "Optional output vcf containing original variants annotated with validation info.",
optional = true)
protected File annotatedVcf;

@Argument(doc = "A table of summary statistics (true positives, sensitivity, etc.)",
fullName = Concordance.SUMMARY_LONG_NAME,
optional = true)
Expand All @@ -88,11 +94,14 @@ public class ValidateBasicSomaticShortMutations extends VariantWalker {
optional = true)
public int minBqCutoff = DEFAULT_MIN_BQ_CUTOFF;

private VariantContextWriter vcfWriter;

@Override
public boolean requiresReference() {
return true;
}

// for the table
public final static String CONTIG = "CONTIG";
public final static String START = "START";
public final static String END = "END";
Expand All @@ -108,6 +117,26 @@ public boolean requiresReference() {
public final static String IS_ENOUGH_VALIDATION_COVERAGE = "sufficient_tv_alt_coverage";
public final static String DISCOVERY_VCF_FILTER = "discovery_vcf_filter";

// for the optional vcf
public final static String POWER_INFO_FIELD_KEY = "POWER";
public final static String VALIDATION_AD_INFO_FIELD_KEY = "VAL_AD";
public final static String JUDGMENT_INFO_FIELD_KEY = "JUDGMENT";

public final VCFInfoHeaderLine POWER_HEADER_LINE = new VCFInfoHeaderLine(POWER_INFO_FIELD_KEY, 1, VCFHeaderLineType.Float,
"Power to validate variant in validation bam.");

public final VCFInfoHeaderLine VALIDATION_AD_HEADER_LINE = new VCFInfoHeaderLine(VALIDATION_AD_INFO_FIELD_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer,
"Ref and alt allele count in validation bam.");

public final VCFInfoHeaderLine JUDGMENT_HEADER_LINE = new VCFInfoHeaderLine(JUDGMENT_INFO_FIELD_KEY, 1, VCFHeaderLineType.String,
"Validation judgment: validated, unvalidated, or skipped.");


public enum Judgment {
VALIDATED, UNVALIDATED, SKIPPED;
}


public static String[] headers = {CONTIG, START, END, REF, ALT, DISCOVERY_ALT_COVERAGE, DISCOVERY_REF_COVERAGE,
VALIDATION_ALT_COVERAGE, VALIDATION_REF_COVERAGE, MIN_VAL_COUNT, POWER, IS_NOT_NOISE, IS_ENOUGH_VALIDATION_COVERAGE, DISCOVERY_VCF_FILTER};

Expand All @@ -130,6 +159,19 @@ public List<ReadFilter> getDefaultReadFilters() {
@Override
public boolean requiresReads() {return true;}

@Override
public void onTraversalStart() {
if (annotatedVcf != null) {
final VCFHeader inputHeader = getHeaderForVariants();
final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeader.getMetaDataInSortedOrder());
headerLines.addAll(Arrays.asList(POWER_HEADER_LINE, VALIDATION_AD_HEADER_LINE, JUDGMENT_HEADER_LINE));
headerLines.addAll(getDefaultToolVCFHeaderLines());
final VCFHeader vcfHeader = new VCFHeader(headerLines, inputHeader.getGenotypeSamples());
vcfWriter = createVCFWriter(annotatedVcf);
vcfWriter.writeHeader(vcfHeader);
}
}

@Override
public void apply(VariantContext discoveryVariantContext, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {

Expand All @@ -145,6 +187,9 @@ public void apply(VariantContext discoveryVariantContext, ReadsContext readsCont
// If we cannot validate this genotype, we should simply skip it.
final boolean isAbleToValidate = BasicSomaticShortMutationValidator.isAbleToValidateGenotype(genotype, referenceAllele);
if (!isAbleToValidate) {
if (annotatedVcf != null) {
vcfWriter.add(new VariantContextBuilder(discoveryVariantContext).attribute(JUDGMENT_INFO_FIELD_KEY, Judgment.SKIPPED).make());
}
return;
}

Expand Down Expand Up @@ -174,10 +219,12 @@ public void apply(VariantContext discoveryVariantContext, ReadsContext readsCont
new SimpleInterval(discoveryVariantContext.getContig(), discoveryVariantContext.getStart(), discoveryVariantContext.getEnd()),
filterString);

results.add(basicValidationResult);
if (basicValidationResult != null) {
results.add(basicValidationResult);
}

final boolean validated = basicValidationResult.isOutOfNoiseFloor();
final boolean powered = basicValidationResult.getPower() > minPower;
final boolean validated = basicValidationResult != null && basicValidationResult.isOutOfNoiseFloor();
final boolean powered = basicValidationResult != null && basicValidationResult.getPower() > minPower;

if (discoveryVariantContext.isSNP()) {
if (validated) {
Expand All @@ -192,6 +239,13 @@ public void apply(VariantContext discoveryVariantContext, ReadsContext readsCont
indelFalsePositiveCount.increment();
}
}

if (annotatedVcf != null) {
vcfWriter.add(new VariantContextBuilder(discoveryVariantContext)
.attribute(JUDGMENT_INFO_FIELD_KEY, validated ? Judgment.VALIDATED : Judgment.UNVALIDATED)
.attribute(POWER_INFO_FIELD_KEY, basicValidationResult == null ? 0 : basicValidationResult.getPower())
.attribute(VALIDATION_AD_INFO_FIELD_KEY, new int[] {validationTumorRefCount, validationTumorAltCount}).make());
}
}

@Override
Expand Down Expand Up @@ -238,4 +292,11 @@ protected void composeLine(BasicValidationResult record, DataLine dataLine) {
}
return "SUCCESS";
}

@Override
public void closeTool() {
if ( vcfWriter != null ) {
vcfWriter.close();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public void testBasic() {
// No variants should validate, since the validation bam is not the same one used for calling.
final File outputFile = IOUtils.createTempFile("basicTest", ".seg");
final File summaryFile = IOUtils.createTempFile("summary", ".txt");
final File annotatedVcf = IOUtils.createTempFile("annotated", ".vcf");
final List<String> arguments = new ArrayList<>();
arguments.add("--" + ValidateBasicSomaticShortMutations.SAMPLE_NAME_DISCOVERY_VCF_LONG_NAME);
arguments.add("synthetic.challenge.set1.tumor");
Expand All @@ -59,6 +60,8 @@ public void testBasic() {
arguments.add(outputFile.getAbsolutePath());
arguments.add("-" + Concordance.SUMMARY_LONG_NAME);
arguments.add(summaryFile.getAbsolutePath());
arguments.add("--" + ValidateBasicSomaticShortMutations.ANNOTATED_VCF_LONG_NAME);
arguments.add(annotatedVcf.getAbsolutePath());
arguments.add("--verbosity");
arguments.add("INFO");
runCommandLine(arguments);
Expand Down

0 comments on commit aa1d525

Please sign in to comment.