Skip to content

Commit

Permalink
LOD back to TLOD
Browse files Browse the repository at this point in the history
  • Loading branch information
meganshand committed Oct 17, 2018
1 parent b158b82 commit 618c23a
Show file tree
Hide file tree
Showing 13 changed files with 43 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -44,9 +43,9 @@ public void annotate(ReferenceContext ref, VariantContext vc, Genotype g, Genoty
Utils.nonNull(vc);
Utils.nonNull(likelihoods);

final double[] lods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY);
final double[] lods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
if (lods==null) {
warning.warn(String.format("One or more variant contexts is missing the 'LOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY));
warning.warn(String.format("One or more variant contexts is missing the 'TLOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.ORIGINAL_CONTIG_MISMATCH_KEY));
return;
}
final int indexOfMaxLod = MathUtils.maxElementIndex(lods);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.spark_project.guava.collect.Range;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -62,9 +61,9 @@ public void annotate(ReferenceContext ref, VariantContext vc, Genotype g, Genoty
Utils.nonNull(gb);
Utils.nonNull(vc);
Utils.nonNull(likelihoods);
final double[] lods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY);
final double[] lods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
if (lods==null) {
warning.warn(String.format("One or more variant contexts is missing the 'LOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_KEY));
warning.warn(String.format("One or more variant contexts is missing the 'TLOD' annotation, %s will not be computed for these VariantContexts", GATKVCFConstants.POTENTIAL_POLYMORPHIC_NUMT_KEY));
return;
}
final int indexOfMaxLod = MathUtils.maxElementIndex(lods);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public void annotate(final ReferenceContext ref,
return;
}

final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY, () -> null, -1);
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, -1);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
final Allele altAllele = vc.getAlternateAllele(indexOfMaxTumorLod);
final Nucleotide altBase = Nucleotide.valueOf(altAllele.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,14 @@ public void annotate(final ReferenceContext ref,
pi.put(ART_REV, PRIOR_PROBABILITY_OF_ARTIFACT);

// We use the allele with highest log odds score
final double[] lods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY, () -> null, -1);
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY, () -> null, -1);

if (lods==null) {
warning.warn("One or more variant contexts is missing the 'LOD' annotation, StrandArtifact will not be computed for these VariantContexts");
if (tumorLods==null) {
warning.warn("One or more variant contexts is missing the 'TLOD' annotation, StrandArtifact will not be computed for these VariantContexts");
return;
}
final int indexOfMaxLod = MathUtils.maxElementIndex(lods);
final Allele altAllele = vc.getAlternateAllele(indexOfMaxLod);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);
final Allele altAllele = vc.getAlternateAllele(indexOfMaxTumorLod);

final Collection<ReadLikelihoods<Allele>.BestAllele> informativeBestAlleles = likelihoods.bestAllelesBreakingTies(g.getSampleName()).stream().
filter(ba -> ba.isInformative()).collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final String DEFAULT_AF_LONG_NAME = "af-of-alleles-not-in-resource";
public static final String DEFAULT_AF_SHORT_NAME = "default-af";
public static final String EMISSION_LOD_LONG_NAME = "tumor-lod-to-emit";
public static final String EMISSION_LOD_SHORT_NAME = "emit-lod";
public static final String INITIAL_LOD_LONG_NAME = "initial-tumor-lod";
public static final String INITIAL_LOD_SHORT_NAME = "init-lod";
public static final String EMISSION_TUMOR_LOD_SHORT_NAME = "emit-lod";
public static final String INITIAL_TUMOR_LOD_LONG_NAME = "initial-tumor-lod";
public static final String INITIAL_TUMOR_LOD_SHORT_NAME = "init-lod";
public static final String INITIAL_PCR_ERROR_QUAL = "initial-pcr-qual";
public static final String MAX_POPULATION_AF_LONG_NAME = "max-population-af";
public static final String MAX_POPULATION_AF_SHORT_NAME = "max-af";
Expand Down Expand Up @@ -115,12 +115,12 @@ public double getDefaultAlleleFrequency() {
public Boolean mitochondria = false;

/**
* Only variants with LODs exceeding this threshold will be written to the VCF, regardless of filter status.
* Only variants with tumor LODs exceeding this threshold will be written to the VCF, regardless of filter status.
* Set to less than or equal to tumor_lod. Increase argument value to reduce false positives in the callset.
* Default setting of 3 is permissive and will emit some amount of negative training data that
* {@link FilterMutectCalls} should then filter.
*/
@Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to emit tumor variant to VCF.")
@Argument(fullName = EMISSION_LOD_LONG_NAME, shortName = EMISSION_TUMOR_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to emit variant to VCF.")
private double emissionLodArg = DEFAULT_EMISSION_LOD;

public double getEmissionLod() {
Expand All @@ -130,7 +130,7 @@ public double getEmissionLod() {
/**
* Only variants with estimated tumor LODs exceeding this threshold will be considered active.
*/
@Argument(fullName = INITIAL_LOD_LONG_NAME, shortName = INITIAL_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to consider pileup active.")
@Argument(fullName = INITIAL_TUMOR_LOD_LONG_NAME, shortName = INITIAL_TUMOR_LOD_SHORT_NAME, optional = true, doc = "LOD threshold to consider pileup active.")
private double initialLod = DEFAULT_INITIAL_LOD;

public double getInitialLod() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentCollection {
private static final long serialVersionUID = 9345L;
public static final String LOG_SOMATIC_PRIOR_LONG_NAME = "log-somatic-prior";
public static final String LOD_LONG_NAME = "tumor-lod";
public static final String TUMOR_LOD_LONG_NAME = "tumor-lod";
public static final String NORMAL_ARTIFACT_LOD_LONG_NAME = "normal-artifact-lod";
public static final String NORMAL_P_VALUE_THRESHOLD_LONG_NAME = "normal-p-value-threshold";
public static final String MAX_GERMLINE_POSTERIOR_LONG_NAME = "max-germline-posterior";
Expand Down Expand Up @@ -58,8 +58,8 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
/**
* Only variants with log odds ratios exceeding this threshold can pass filtering.
*/
@Argument(fullName = LOD_LONG_NAME, optional = true, doc = "LOD threshold for calling variant")
public double LOD_THRESHOLD = 5.3;
@Argument(fullName = TUMOR_LOD_LONG_NAME, optional = true, doc = "LOD threshold for calling variant")
public double TUMOR_LOD_THRESHOLD = 5.3;

/**
* This is a measure of the minimum evidence to support that a variant observed in the tumor is not also present in the normal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ private void applyContaminationFilter(final M2FiltersArgumentCollection MTFAC, f
}

private void applyTriallelicFilter(final VariantContext vc, final FilterResult filterResult) {
if (vc.hasAttribute(GATKVCFConstants.LOD_KEY)) {
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY);
final long numPassingAltAlleles = Arrays.stream(tumorLods).filter(x -> x > MTFAC.LOD_THRESHOLD).count();
if (vc.hasAttribute(GATKVCFConstants.TUMOR_LOD_KEY)) {
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final long numPassingAltAlleles = Arrays.stream(tumorLods).filter(x -> x > MTFAC.TUMOR_LOD_THRESHOLD).count();

if (numPassingAltAlleles > MTFAC.numAltAllelesThreshold) {
filterResult.addFilter(GATKVCFConstants.MULTIALLELIC_FILTER_NAME);
Expand Down Expand Up @@ -109,7 +109,7 @@ private static void applyPanelOfNormalsFilter(final M2FiltersArgumentCollection

private void applyBaseQualityFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
final int[] baseQualityByAllele = getIntArrayTumorField(vc, BaseQuality.KEY);
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY);
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);

if (baseQualityByAllele != null && baseQualityByAllele[indexOfMaxTumorLod + 1] < MTFAC.minMedianBaseQuality) {
Expand Down Expand Up @@ -141,8 +141,8 @@ private void applyReadPositionFilter(final M2FiltersArgumentCollection MTFAC, fi
}

private void applyGermlineVariantFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
if (vc.hasAttribute(GATKVCFConstants.LOD_KEY) && vc.hasAttribute(GATKVCFConstants.POPULATION_AF_VCF_ATTRIBUTE)) {
final double[] tumorLog10OddsIfSomatic = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY);
if (vc.hasAttribute(GATKVCFConstants.TUMOR_LOD_KEY) && vc.hasAttribute(GATKVCFConstants.POPULATION_AF_VCF_ATTRIBUTE)) {
final double[] tumorLog10OddsIfSomatic = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final Optional<double[]> normalLods = vc.hasAttribute(GATKVCFConstants.NORMAL_LOD_KEY) ?
Optional.of(GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.NORMAL_LOD_KEY)) : Optional.empty();
final double[] populationAlleleFrequencies = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.POPULATION_AF_VCF_ATTRIBUTE);
Expand Down Expand Up @@ -188,11 +188,11 @@ private void applyGermlineVariantFilter(final M2FiltersArgumentCollection MTFAC,
}

private void applyInsufficientEvidenceFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
if (vc.hasAttribute(GATKVCFConstants.LOD_KEY)) {
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY);
if (vc.hasAttribute(GATKVCFConstants.TUMOR_LOD_KEY)) {
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);

if (MathUtils.arrayMax(tumorLods) < MTFAC.LOD_THRESHOLD) {
filterResult.addFilter(GATKVCFConstants.LOW_LOD_FILTER_NAME);
if (MathUtils.arrayMax(tumorLods) < MTFAC.TUMOR_LOD_THRESHOLD) {
filterResult.addFilter(GATKVCFConstants.TUMOR_LOD_FILTER_NAME);
}
}
}
Expand All @@ -201,11 +201,11 @@ private void applyInsufficientEvidenceFilter(final M2FiltersArgumentCollection M
// this handles shared artifacts, such as ones due to alignment and any shared aspects of sequencing
private void applyArtifactInNormalFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
if (!( vc.hasAttribute(GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE)
&& vc.hasAttribute(GATKVCFConstants.LOD_KEY))) {
&& vc.hasAttribute(GATKVCFConstants.TUMOR_LOD_KEY))) {
return;
}

final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.LOD_KEY);
final double[] tumorLods = GATKProtectedVariantContextUtils.getAttributeAsDoubleArray(vc, GATKVCFConstants.TUMOR_LOD_KEY);
final int indexOfMaxTumorLod = MathUtils.maxElementIndex(tumorLods);

Genotype tumorGenotype = vc.getGenotype(tumorSample);
Expand Down Expand Up @@ -339,7 +339,7 @@ private void applyChimericOriginalAlignmentFilter(final M2FiltersArgumentCollect
}
private void applyLODFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult) {
if(vc.isBiallelic()) {
final Double lod = vc.getAttributeAsDouble(GATKVCFConstants.LOD_KEY, 1);
final Double lod = vc.getAttributeAsDouble(GATKVCFConstants.TUMOR_LOD_KEY, 1);
final Double depth = vc.getAttributeAsDouble(VCFConstants.DEPTH_KEY, 1);
final Double lodByDepth = lod / depth;
if (lodByDepth < MTFAC.lodByDepth) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import org.apache.commons.math3.util.FastMath;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.annotator.StrandBiasTest;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculator;
import org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerGenotypingEngine;
Expand Down Expand Up @@ -151,7 +150,7 @@ public CalledHaplotypes callMutations(
final VariantContextBuilder callVcb = new VariantContextBuilder(mergedVC)
.alleles(allAllelesToEmit)
.attributes(populationAFAnnotation)
.attribute(GATKVCFConstants.LOD_KEY, tumorAltAlleles.stream().mapToDouble(tumorLog10Odds::getAlt).toArray());
.attribute(GATKVCFConstants.TUMOR_LOD_KEY, tumorAltAlleles.stream().mapToDouble(tumorLog10Odds::getAlt).toArray());

normalLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_LOD_KEY, values.asDoubleArray(tumorAltAlleles)));
normalArtifactLog10Odds.ifPresent(values -> callVcb.attribute(GATKVCFConstants.NORMAL_ARTIFACT_LOD_ATTRIBUTE, values.asDoubleArray(tumorAltAlleles)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import htsjdk.variant.variantcontext.Allele;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -105,14 +104,14 @@ public final class GATKVCFConstants {
public static final String F2R1_KEY = "F2R1";

// Mutect2-specific INFO keys
public static final String LOD_KEY = "LOD";
public static final String TUMOR_LOD_KEY = "TLOD";
public static final String NORMAL_LOD_KEY = "NLOD";
public static final String IN_PON_VCF_ATTRIBUTE = "IN_PON";
public static final String NORMAL_ARTIFACT_LOD_ATTRIBUTE = "N_ART_LOD";
public static final String POPULATION_AF_VCF_ATTRIBUTE = "POP_AF";
public static final String GERMLINE_POSTERIORS_VCF_ATTRIBUTE = "P_GERMLINE";
public static final String POSTERIOR_PROB_OF_CONTAMINATION_ATTRIBUTE = "P_CONTAM";
public static final List<String> STANDARD_MUTECT_INFO_FIELDS = Arrays.asList(NORMAL_LOD_KEY, LOD_KEY, NORMAL_ARTIFACT_LOD_ATTRIBUTE,
public static final List<String> STANDARD_MUTECT_INFO_FIELDS = Arrays.asList(NORMAL_LOD_KEY, TUMOR_LOD_KEY, NORMAL_ARTIFACT_LOD_ATTRIBUTE,
EVENT_COUNT_IN_HAPLOTYPE_KEY, IN_PON_VCF_ATTRIBUTE, POPULATION_AF_VCF_ATTRIBUTE, GERMLINE_POSTERIORS_VCF_ATTRIBUTE, POSTERIOR_PROB_OF_CONTAMINATION_ATTRIBUTE);

// FORMAT keys
Expand Down Expand Up @@ -154,7 +153,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i
public static final String ALIGNMENT_ARTIFACT_FILTER_NAME = "alignment_artifact";
public static final String PON_FILTER_NAME = "panel_of_normals"; //M2
public static final String STR_CONTRACTION_FILTER_NAME = "str_contraction"; //M2
public final static String LOW_LOD_FILTER_NAME = "low_lod"; //M2
public final static String TUMOR_LOD_FILTER_NAME = "t_lod"; //M2
public static final String MULTIALLELIC_FILTER_NAME = "multiallelic"; //M2
public static final String STRAND_ARTIFACT_FILTER_NAME = "strand_artifact"; // M2
public static final String DUPLICATED_EVIDENCE_FILTER_NAME = "duplicate_evidence";
Expand All @@ -171,7 +170,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i


public static final List<String> MUTECT_FILTER_NAMES = Arrays.asList(STR_CONTRACTION_FILTER_NAME,
PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, LOW_LOD_FILTER_NAME, GERMLINE_RISK_FILTER_NAME,
PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_LOD_FILTER_NAME, GERMLINE_RISK_FILTER_NAME,
MULTIALLELIC_FILTER_NAME, STRAND_ARTIFACT_FILTER_NAME, ARTIFACT_IN_NORMAL_FILTER_NAME,
MEDIAN_BASE_QUALITY_FILTER_NAME, MEDIAN_MAPPING_QUALITY_FILTER_NAME,
MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME,
Expand Down
Loading

0 comments on commit 618c23a

Please sign in to comment.