Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed bug for M2 GGA alleles with zero coverage #5303

Merged
merged 1 commit into from
Oct 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ private void addGenotypes(final LikelihoodMatrix<Allele> tumorLog10Matrix,
final int dp = (int) MathUtils.sum(adArray);
final GenotypeBuilder gb = new GenotypeBuilder(tumorSample, tumorLog10Matrix.alleles());
final double[] flatPriorPseudocounts = new IndexRange(0, tumorLog10Matrix.numberOfAlleles()).mapToDouble(n -> 1);
final double[] alleleFractionsPosterior = SomaticLikelihoodsEngine.alleleFractionsPosterior(
getAsRealMatrix(tumorLog10Matrix), flatPriorPseudocounts);
final double[] alleleFractionsPosterior = tumorLog10Matrix.numberOfReads() == 0 ? flatPriorPseudocounts :
SomaticLikelihoodsEngine.alleleFractionsPosterior(getAsRealMatrix(tumorLog10Matrix), flatPriorPseudocounts);
if (!MTAC.calculateAFfromAD) {
// Use mean of the allele fraction posterior distribution
double[] tumorAlleleFractionsMean = MathUtils.normalizeFromRealSpace(alleleFractionsPosterior);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ public void testGivenAllelesMode() throws Exception {
final List<String> args = Arrays.asList("-I", NA12878_20_21_WGS_bam,
"-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, "NA12878",
"-R", b37_reference_20_21,
"-L", "20:10000000-10010000",
"-L", "20:9998500-10010000",
"-O", unfilteredVcf.getAbsolutePath(),
"--genotyping-mode", "GENOTYPE_GIVEN_ALLELES",
"--alleles", givenAllelesVcf.getAbsolutePath());
Expand All @@ -332,6 +332,25 @@ public void testGivenAllelesMode() throws Exception {
}
}

// make sure that GGA mode with given alleles that normally wouldn't be called due to complete lack of coverage
// doesn't run into any edge case bug involving empty likelihoods matrices
@Test
public void testGivenAllelesZeroCoverage() throws Exception {
Utils.resetRandomGenerator();
final File bam = new File(DREAM_BAMS_DIR, "tumor_3.bam");
final String sample = "IS3.snv.indel.sv";
final File unfilteredVcf = createTempFile("unfiltered", ".vcf");
final File givenAllelesVcf = new File(toolsTestDir, "mutect/gga_mode_2.vcf");
final List<String> args = Arrays.asList("-I", bam.getAbsolutePath(),
"-" + M2ArgumentCollection.TUMOR_SAMPLE_SHORT_NAME, sample,
"-R", b37_reference_20_21,
"-L", "20:1119000-1120000",
"-O", unfilteredVcf.getAbsolutePath(),
"--genotyping-mode", "GENOTYPE_GIVEN_ALLELES",
"--alleles", givenAllelesVcf.getAbsolutePath());
runCommandLine(args);
}

@Test
public void testContaminationFilter() throws Exception {
Utils.resetRandomGenerator();
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
##fileformat=VCFv4.2
##FILTER=<ID=LowQual,Description="Low quality">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RAW_MQ,Number=1,Type=Float,Description="Raw data for RMS Mapping Quality">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
##contig=<ID=20,length=63025520>
##contig=<ID=21,length=48129895>
#CHROM POS ID REF ALT QUAL FILTER INFO
20 1119886 . A T . . .
20 1119950 . A T . . .
Binary file not shown.