Skip to content

Commit

Permalink
fixed bug for M2 GGA alleles with zero coverage (broadinstitute#5303)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin authored and EdwardDixon committed Nov 9, 2018
1 parent 73e9efc commit 3a7a3ea
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ private void addGenotypes(final LikelihoodMatrix<Allele> tumorLog10Matrix,
final int dp = (int) MathUtils.sum(adArray);
final GenotypeBuilder gb = new GenotypeBuilder(tumorSample, tumorLog10Matrix.alleles());
final double[] flatPriorPseudocounts = new IndexRange(0, tumorLog10Matrix.numberOfAlleles()).mapToDouble(n -> 1);
final double[] alleleFractionsPosterior = SomaticLikelihoodsEngine.alleleFractionsPosterior(
getAsRealMatrix(tumorLog10Matrix), flatPriorPseudocounts);
final double[] alleleFractionsPosterior = tumorLog10Matrix.numberOfReads() == 0 ? flatPriorPseudocounts :
SomaticLikelihoodsEngine.alleleFractionsPosterior(getAsRealMatrix(tumorLog10Matrix), flatPriorPseudocounts);
if (!MTAC.calculateAFfromAD) {
// Use mean of the allele fraction posterior distribution
double[] tumorAlleleFractionsMean = MathUtils.normalizeFromRealSpace(alleleFractionsPosterior);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ public void testGivenAllelesMode() throws Exception {
final List<String> args = Arrays.asList("-I", NA12878_20_21_WGS_bam,
"-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, "NA12878",
"-R", b37_reference_20_21,
"-L", "20:10000000-10010000",
"-L", "20:9998500-10010000",
"-O", unfilteredVcf.getAbsolutePath(),
"--genotyping-mode", "GENOTYPE_GIVEN_ALLELES",
"--alleles", givenAllelesVcf.getAbsolutePath());
Expand All @@ -332,6 +332,25 @@ public void testGivenAllelesMode() throws Exception {
}
}

// make sure that GGA mode with given alleles that normally wouldn't be called due to complete lack of coverage
// doesn't run into any edge case bug involving empty likelihoods matrices
@Test
public void testGivenAllelesZeroCoverage() throws Exception {
Utils.resetRandomGenerator();
final File bam = new File(DREAM_BAMS_DIR, "tumor_3.bam");
final String sample = "IS3.snv.indel.sv";
final File unfilteredVcf = createTempFile("unfiltered", ".vcf");
final File givenAllelesVcf = new File(toolsTestDir, "mutect/gga_mode_2.vcf");
final List<String> args = Arrays.asList("-I", bam.getAbsolutePath(),
"-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, sample,
"-R", b37_reference_20_21,
"-L", "20:1119000-1120000",
"-O", unfilteredVcf.getAbsolutePath(),
"--genotyping-mode", "GENOTYPE_GIVEN_ALLELES",
"--alleles", givenAllelesVcf.getAbsolutePath());
runCommandLine(args);
}

@Test
public void testContaminationFilter() throws Exception {
Utils.resetRandomGenerator();
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
##fileformat=VCFv4.2
##FILTER=<ID=LowQual,Description="Low quality">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RAW_MQ,Number=1,Type=Float,Description="Raw data for RMS Mapping Quality">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
##contig=<ID=20,length=63025520>
##contig=<ID=21,length=48129895>
#CHROM POS ID REF ALT QUAL FILTER INFO
20 1119886 . A T . . .
20 1119950 . A T . . .
Binary file not shown.

0 comments on commit 3a7a3ea

Please sign in to comment.