Skip to content

Commit

Permalink
Add option to recalculate the allele fraction based on AD instead of …
Browse files Browse the repository at this point in the history
…the Bayesian estimate
  • Loading branch information
ldgauthier committed Aug 16, 2018
1 parent be42b23 commit d560cb5
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFFormatHeaderLine;
import htsjdk.variant.vcf.VCFStandardHeaderLines;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;

import java.util.*;

/**
* Created by gauthier on 8/15/18.
* Bug Laura to fill this in
*/
@DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Variant allele fraction for a genotype")

public final class AlleleFraction extends GenotypeAnnotation implements StandardMutectAnnotation {
@Override
public void annotate(final ReferenceContext ref,
final VariantContext vc,
final Genotype g,
final GenotypeBuilder gb,
final ReadLikelihoods<Allele> likelihoods) {
Utils.nonNull(gb, "gb is null");
Utils.nonNull(vc, "vc is null");

final GenotypesContext genotypes = vc.getGenotypes();
if ( g == null || !g.isCalled() || g.hasExtendedAttribute(getKeyNames().get(0))) { //don't overwrite AF based on Bayesian estimate if it already exists
return;
}

for ( final Genotype genotype : genotypes ) {
if ( genotype.hasAD() ) {
final int[] AD = genotype.getAD();
final double[] allAlleleFractions = MathUtils.normalizeFromRealSpace(Arrays.stream(AD).mapToDouble(x -> x*1.0).toArray());
gb.attribute(getKeyNames().get(0), Arrays.copyOfRange(allAlleleFractions, 1, allAlleleFractions.length)); //omit the first entry of the array corresponding to the reference
}
// if there is no AD value calculate it now using likelihoods
else if (likelihoods != null) {
DepthPerAlleleBySample adCalc = new DepthPerAlleleBySample();
final int[] AD = adCalc.annotateWithLikelihoods(vc, g, new LinkedHashSet<>(vc.getAlleles()), likelihoods);
final double[] allAlleleFractions = MathUtils.normalizeFromRealSpace(Arrays.stream(AD).mapToDouble(x -> x*1.0).toArray());
gb.attribute(getKeyNames().get(0), Arrays.copyOfRange(allAlleleFractions, 1, allAlleleFractions.length)); //omit the first entry of the array corresponding to the reference
}
}
}

@Override
public List<String> getKeyNames() { return Collections.singletonList(GATKVCFConstants.ALLELE_FRACTION_KEY);}

@Override
public List<VCFFormatHeaderLine> getDescriptions() {
return Collections.singletonList(GATKVCFHeaderLines.getFormatLine(getKeyNames().get(0)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ private int[] annotateWithPileup(final VariantContext vc, List<PileupElement> pi
return counts;
}

private int[] annotateWithLikelihoods(VariantContext vc, Genotype g, Set<Allele> alleles, ReadLikelihoods<Allele> likelihoods) {
protected int[] annotateWithLikelihoods(VariantContext vc, Genotype g, Set<Allele> alleles, ReadLikelihoods<Allele> likelihoods) {

final Map<Allele, Integer> alleleCounts = new LinkedHashMap<>();
for ( final Allele allele : vc.getAlleles() ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public class M2ArgumentCollection extends AssemblyBasedCallerArgumentCollection
public static final String MAX_MNP_DISTANCE_SHORT_NAME = "mnp-dist";
public static final String IGNORE_ITR_ARTIFACTS_LONG_NAME = "ignore-itr-artifacts";
public static final String ARTIFACT_PRIOR_TABLE_NAME = "orientation-bias-artifact-priors";
public static final String GET_AF_FROM_AD_LONG_NAME = "get-af-from-ad";

public static final double DEFAULT_AF_FOR_TUMOR_ONLY_CALLING = 5e-8;
public static final double DEFAULT_AF_FOR_TUMOR_NORMAL_CALLING = 1e-6;
Expand Down Expand Up @@ -164,4 +165,8 @@ public double getDefaultAlleleFrequency() {
@Argument(fullName= IGNORE_ITR_ARTIFACTS_LONG_NAME, doc="Turn off read transformer that clips artifacts associated with end repair insertions near inverted tandem repeats.", optional = true)
public boolean dontClipITRArtifacts = false;

@Advanced
@Argument(fullName = GET_AF_FROM_AD_LONG_NAME, doc="Use allelic depth to calculate tumor allele fraction; recommended for mitochondrial applications", optional = true)
public boolean calculateAFfromAD = false;

}
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,13 @@ private void addGenotypes(final LikelihoodMatrix<Allele> tumorLog10Matrix,
final Optional<LikelihoodMatrix<Allele>> normalLog10Matrix,
final VariantContextBuilder callVcb) {
final double[] tumorAlleleCounts = getEffectiveCounts(tumorLog10Matrix);
final Genotype tumorGenotype = new GenotypeBuilder(tumorSample, tumorLog10Matrix.alleles())
.AD(Arrays.stream(tumorAlleleCounts).mapToInt(x -> (int) FastMath.round(x)).toArray())
.attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, getAltAlleleFractions(tumorAlleleCounts))
.make();
final int[] adArray = Arrays.stream(tumorAlleleCounts).mapToInt(x -> (int) FastMath.round(x)).toArray();
final int dp = (int) MathUtils.sum(adArray);
final GenotypeBuilder gb = new GenotypeBuilder(tumorSample, tumorLog10Matrix.alleles());
if (!MTAC.calculateAFfromAD) {
gb.attribute(GATKVCFConstants.ALLELE_FRACTION_KEY, getAltAlleleFractions(Arrays.stream(tumorAlleleCounts).map(x -> (double) FastMath.round(x)/dp).toArray()));
}
final Genotype tumorGenotype = gb.make();
final List<Genotype> genotypes = new ArrayList<>(Arrays.asList(tumorGenotype));

// TODO: We shouldn't always assume that the genotype in the normal is hom ref
Expand Down Expand Up @@ -269,7 +272,7 @@ public static double[] getEffectiveCounts(final LikelihoodMatrix<Allele> log10Li

private static double[] getAltAlleleFractions(final double[] alleleCounts) {
final double[] allAlleleFractions = MathUtils.normalizeFromRealSpace(alleleCounts);
return Arrays.copyOfRange(allAlleleFractions, 1, allAlleleFractions.length);
return Arrays.copyOfRange(allAlleleFractions, 1, allAlleleFractions.length); //omit the first entry of the array corresponding to the reference
}
/**
* Calculate the log10 likelihoods of the ref/alt het genotype for each alt allele, then subtracts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import scala.tools.nsc.transform.patmat.ScalaLogic;

import java.io.File;
import java.nio.file.Files;
Expand Down Expand Up @@ -49,6 +50,8 @@ public class Mutect2IntegrationTest extends CommandLineProgramTest {

private static final File NA12878_MITO_BAM = new File(toolsTestDir, "mutect/mito/NA12878.bam");
private static final File MITO_REF = new File(toolsTestDir, "mutect/mito/Homo_sapiens_assembly38.mt_only.fasta");
private static final File DEEP_MITO_BAM = new File(largeFileTestDir, "mutect/highDPMTsnippet.bam");
private static final String DEEP_MITO_SAMPLE_NAME = "mixture";

/**
* Several DREAM challenge bams with synthetic truth data. In order to keep file sizes manageable, bams are restricted
Expand Down Expand Up @@ -499,7 +502,33 @@ public void testMitochondria() throws Exception {
Assert.assertTrue(expectedKeys.stream().allMatch(variantKeys::contains));
}

@Test
@SuppressWarnings("deprecated")
public void testAFfromADoverHighDP() throws Exception {
Utils.resetRandomGenerator();
final File unfilteredVcf = createTempFile("unfiltered", ".vcf");

final List<String> args = Arrays.asList("-I", DEEP_MITO_BAM.getAbsolutePath(),
"-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, DEEP_MITO_SAMPLE_NAME,
"-R", MITO_REF.getAbsolutePath(),
"-L", "chrM:1-1018",
"-ip", "300",
"-min-pruning", "4",
"--" + M2ArgumentCollection.GET_AF_FROM_AD_LONG_NAME,
"-O", unfilteredVcf.getAbsolutePath());
runCommandLine(args);

final List<VariantContext> variants = VariantContextTestUtils.streamVcf(unfilteredVcf).collect(Collectors.toList());

for (final VariantContext vc : variants) {
Assert.assertTrue(vc.isBiallelic()); //I do some lazy parsing below that won't hold for multiple alternate alleles
Genotype g = vc.getGenotype(DEEP_MITO_SAMPLE_NAME);
Assert.assertTrue(g.hasAD());
final int[] ADs = g.getAD();
Assert.assertTrue(g.hasExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY));
Assert.assertEquals(Double.parseDouble(String.valueOf(vc.getGenotype(DEEP_MITO_SAMPLE_NAME).getExtendedAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY,"0"))), (double)ADs[1]/(ADs[0]+ADs[1]), 1e-6);
}
}

@DataProvider(name="bamoutVariations")
public Object[][] bamoutVariations() {
Expand Down
3 changes: 3 additions & 0 deletions src/test/resources/large/mutect/highDPMTsnippet.bai
Git LFS file not shown
3 changes: 3 additions & 0 deletions src/test/resources/large/mutect/highDPMTsnippet.bam
Git LFS file not shown

0 comments on commit d560cb5

Please sign in to comment.