Skip to content

Commit

Permalink
New qual doesn't count spanning deletions as support for variant qual (
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin authored May 30, 2018
1 parent c77101a commit ecc32f6
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
import org.apache.commons.math3.util.MathArrays;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAlleleCounts;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeLikelihoodCalculator;
Expand Down Expand Up @@ -66,9 +67,8 @@ public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int de
double[] alleleCounts = new double[numAlleles];
final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles); // log10(1/numAlleles)
double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);
double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY;

while (alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE) {
for (double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY; alleleCountsMaximumDifference > THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE; ) {
final double[] newAlleleCounts = effectiveAlleleCounts(vc, log10AlleleFrequencies);
alleleCountsMaximumDifference = Arrays.stream(MathArrays.ebeSubtract(alleleCounts, newAlleleCounts)).map(Math::abs).max().getAsDouble();
alleleCounts = newAlleleCounts;
Expand All @@ -83,6 +83,8 @@ public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int de
double[] log10POfZeroCountsByAllele = new double[numAlleles];
double log10PNoVariant = 0;

final boolean spanningDeletionPresent = alleles.contains(Allele.SPAN_DEL);
final Map<Integer, int[]> nonVariantIndicesByPloidy = new Int2ObjectArrayMap<>();
for (final Genotype g : vc.getGenotypes()) {
if (!g.hasLikelihoods()) {
continue;
Expand All @@ -93,7 +95,14 @@ public AFCalculationResult getLog10PNonRef(final VariantContext vc, final int de
final double[] log10GenotypePosteriors = log10NormalizedGenotypePosteriors(g, glCalc, log10AlleleFrequencies);

//the total probability
log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX];
if (!spanningDeletionPresent) {
log10PNoVariant += log10GenotypePosteriors[HOM_REF_GENOTYPE_INDEX];
} else {
nonVariantIndicesByPloidy.computeIfAbsent(ploidy, p -> genotypeIndicesWithOnlyRefAndSpanDel(p, alleles));
final int[] nonVariantIndices = nonVariantIndicesByPloidy.get(ploidy);
final double[] nonVariantLog10Posteriors = MathUtils.applyToArray(nonVariantIndices, n -> log10GenotypePosteriors[n]);
log10PNoVariant += MathUtils.log10SumLog10(nonVariantLog10Posteriors);
}

// per allele non-log space probabilities of zero counts for this sample
// for each allele calculate the total probability of genotypes containing at least one copy of the allele
Expand Down Expand Up @@ -168,6 +177,18 @@ private static double[] log10NormalizedGenotypePosteriors(final Genotype g, fina
return MathUtils.normalizeLog10(log10Posteriors);
}

private static int[] genotypeIndicesWithOnlyRefAndSpanDel(final int ploidy, final List<Allele> alleles) {
final GenotypeLikelihoodCalculator glCalc = GL_CALCS.getInstance(ploidy, alleles.size());
final boolean spanningDeletionPresent = alleles.contains(Allele.SPAN_DEL);
if (!spanningDeletionPresent) {
return new int[] {HOM_REF_GENOTYPE_INDEX};
} else {
final int spanDelIndex = alleles.indexOf(Allele.SPAN_DEL);
// allele counts are in the GenotypeLikelihoodCalculator format of {ref index, ref count, span del index, span del count}
return new IndexRange(0, ploidy).mapToInteger(n -> glCalc.alleleCountsToIndex(new int[]{0, ploidy - n, spanDelIndex, n}));
}
}

@Override //Note: unused
protected AFCalculationResult getResultFromFinalState(final VariantContext vc, final double[] priors, final StateTracker st) { return null; }

Expand Down
19 changes: 19 additions & 0 deletions src/main/java/org/broadinstitute/hellbender/utils/MathUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,25 @@ public static double[] applyToArray(final double[] array, final DoubleUnaryOpera
return result;
}

/**
* The following method implements Arrays.stream(array).map(func).toArray(), which is concise but performs poorly due
* to the overhead of creating a stream, especially with small arrays. Thus we wrap the wordy but fast array code
* in the following method which permits concise Java 8 code.
*
* Returns a new array -- the original array in not modified.
*
* This method has been benchmarked and performs as well as array-only code.
*/
public static double[] applyToArray(final int[] array, final IntToDoubleFunction func) {
Utils.nonNull(func);
Utils.nonNull(array);
final double[] result = new double[array.length];
for (int m = 0; m < result.length; m++) {
result[m] = func.applyAsDouble(array[m]);
}
return result;
}

/**
* The following method implements Arrays.stream(array).map(func).toArray(), which is concise but performs poorly due
* to the overhead of creating a stream, especially with small arrays. Thus we wrap the wordy but fast array code
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,82 @@ public void testManyRefSamplesDontKillGoodVariant() {
}
}

// spanning deletion alleles should be treated as non-variant, so we make a few random PLs and test that the
// qual is invariant to swapping the ref/ref PL with the ref/SPAN_DEL PL
@Test
public void testSpanningDeletionIsNotConsideredVariant() {
final int ploidy = 2;
final AlleleFrequencyCalculator afCalc = new AlleleFrequencyCalculator(1, 0.1, 0.1, ploidy);
final List<Allele> alleles = Arrays.asList(A, B, Allele.SPAN_DEL);

//{AA}, {AB}, {BB}, {AC}, {BC}, {CC}

// some pls that have high likelihood for span del allele but not for the SNP (B)
final int[] spanDelPls = new int[] {50, 100, 100, 0, 100, 100};

// some pls that weakly support the SNP
final int[] lowQualSnpPls = new int[] {10,0,40,100,70,300};


final Genotype spanDel = makeGenotype(ploidy, spanDelPls);
final Genotype lowQualSNP = makeGenotype(ploidy, lowQualSnpPls);

// first test the span del genotype alone. Its best PL containing the SNP is 100, so we expect a variant probability
// of about 10^(-100/10) -- a bit less due to the prior bias in favor of the reference
final VariantContext vcSpanDel = makeVC(alleles, Arrays.asList(spanDel));
final double log10PVariant = afCalc.getLog10PNonRef(vcSpanDel).getLog10LikelihoodOfAFGT0();
Assert.assertTrue(log10PVariant < - 10);

// now test a realistic situation of two samples, one with a low-quality SNP and one with the spanning deletion
// we want to find that the spanning deletion has little effect on the qual
// In fact, we also want to check that it *decreases* the qual, because it's essentially one more hom ref sample
// Furthermore, to be precise it should be really behave almost identically to a hom ref *haploid* sample,
// so we check that, too
final VariantContext vcLowQualSnp = makeVC(alleles, Arrays.asList(lowQualSNP));
final double lowQualSNPQualScore = afCalc.getLog10PNonRef(vcLowQualSnp).getLog10LikelihoodOfAFGT0();
final VariantContext vcBoth = makeVC(alleles, Arrays.asList(lowQualSNP, spanDel));
final double bothQualScore = afCalc.getLog10PNonRef(vcBoth).getLog10LikelihoodOfAFGT0();
Assert.assertEquals(lowQualSNPQualScore, bothQualScore, 0.1);
Assert.assertTrue(bothQualScore < lowQualSNPQualScore);

final int[] haploidRefPls = new int[] {0, 100, 100};
final Genotype haploidRef = makeGenotype(1, haploidRefPls);

final VariantContext vcLowQualSnpAndHaploidRef = makeVC(alleles, Arrays.asList(lowQualSNP, haploidRef));
final double lowQualSNPAndHaplpidRefQualScore = afCalc.getLog10PNonRef(vcLowQualSnpAndHaploidRef).getLog10LikelihoodOfAFGT0();
Assert.assertEquals(bothQualScore, lowQualSNPAndHaplpidRefQualScore, 1e-5);

// as a final test, we check that getting rid of the spanning deletion allele, in the sense that
// REF / SPAN_DEL --> haploid REF; REF / SNP --> REF / SNP
// does not affect the qual score

final int[] haploidRefPlsWithoutSpanDel = new int[] {0, 100};
final int[] snpPlsWithoutSpanDel = new int[] {10, 0, 40};
final VariantContext vcNoSpanDel = makeVC(Arrays.asList(A,B), Arrays.asList(makeGenotype(ploidy, snpPlsWithoutSpanDel),
makeGenotype(1, haploidRefPlsWithoutSpanDel)));
final double noSpanDelQualScore = afCalc.getLog10PNonRef(vcNoSpanDel).getLog10LikelihoodOfAFGT0();
Assert.assertEquals(bothQualScore, noSpanDelQualScore, 1e-6);
}

@Test
public void testPresenceOfUnlikelySpanningDeletionDoesntAffectResults() {
final int ploidy = 2;
final AlleleFrequencyCalculator afCalc = new AlleleFrequencyCalculator(1, 0.1, 0.1, ploidy);
final List<Allele> allelesWithoutSpanDel = Arrays.asList(A, B);
final List<Allele> allelesWithSpanDel = Arrays.asList(A, B, Allele.SPAN_DEL);

// make PLs that support an A/B genotype
final int[] plsWithoutSpanDel = new int[] {50, 0, 50};
final int[] plsWithSpanDel = new int[] {50, 0, 50, 100, 100, 100};
final Genotype genotypeWithoutSpanDel = makeGenotype(ploidy, plsWithoutSpanDel);
final Genotype genotypeWithSpanDel = makeGenotype(ploidy, plsWithSpanDel);
final VariantContext vcWithoutSpanDel = makeVC(allelesWithoutSpanDel, Arrays.asList(genotypeWithoutSpanDel));
final VariantContext vcWithSpanDel = makeVC(allelesWithSpanDel, Arrays.asList(genotypeWithSpanDel));
final double log10PVariantWithoutSpanDel = afCalc.getLog10PNonRef(vcWithoutSpanDel).getLog10LikelihoodOfAFGT0();
final double log10PVariantWithSpanDel = afCalc.getLog10PNonRef(vcWithSpanDel).getLog10LikelihoodOfAFGT0();
Assert.assertEquals(log10PVariantWithoutSpanDel, log10PVariantWithSpanDel, 0.0001);
}

// make PLs that correspond to an obvious call i.e. one PL is relatively big and the rest are zero
// alleleCounts is the GenotypeAlleleCounts format for the obvious genotype, with repeats but in no particular order
private static int[] PLsForObviousCall(final int ploidy, final int numAlleles, final int[] alleleCounts, final int PL) {
Expand Down

0 comments on commit ecc32f6

Please sign in to comment.