Skip to content

Commit

Permalink
bugfix for variants on contig edges
Browse files Browse the repository at this point in the history
  • Loading branch information
lucidtronix committed May 14, 2018
1 parent 61c6891 commit 4309a50
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ def reference_string_to_tensor(reference):
dna_data[i, defines.DNA_SYMBOLS[b]] = 1.0
elif b in defines.AMBIGUITY_CODES:
dna_data[i] = defines.AMBIGUITY_CODES[b]
elif b == '\x00':
break
else:
raise ValueError('Error! Unknown code:', b)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,19 @@ public void testSmallBatchInference()throws IOException {
spec.executeTest("testInference", this);
}

@Test(groups = {"python"})
public void testOnContigEdge() throws IOException{
final String edgeVcf = toolsTestDir + "walkers/VQSR/variantNearContigEdge.vcf";
final ArgumentsBuilder argsBuilder = new ArgumentsBuilder();
argsBuilder.addArgument(StandardArgumentDefinitions.VARIANT_LONG_NAME, edgeVcf)
.addArgument(StandardArgumentDefinitions.REFERENCE_LONG_NAME, hg19MiniReference)
.addArgument("architecture", architecture1D)
.addArgument(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false");

argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, largeFileTestDir + "VQSR/expected/chrM.vcf");
runCommandLine(argsBuilder);
}

/**
* Run the 2D Model on a small test VCF.
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
##fileformat=VCFv4.2
##FILTER=<ID=LowQual,Description="Low quality">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RAW_MQ,Number=1,Type=Float,Description="Raw data for RMS Mapping Quality">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias">
##contig=<ID=1,length=16000>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT test
1 15990 . C T 157767.77 . AC=2;AF=1.00;AN=2;BaseQRankSum=5.566;ClippingRankSum=0.053;DP=4492;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=57.64;MQRankSum=-0.712;QD=30.32;ReadPosRankSum=-1.183;SOR=0.761 GT:AD:DP:GQ:PL 1/1:21,4410:4431:99:157796,12877,0

0 comments on commit 4309a50

Please sign in to comment.