Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing a bug in parsing of T0 tag #8185

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -358,14 +358,17 @@ private void parseSingleHmer(final double[] probs, final byte[] tp, final int fl
}
}

//convert qualities from the single hmer to a column in a flow matrix
// convert qualities from the t0 tag to the probabilities of 1->0 error.
// This function deals with t0 tag that encodes the probability of 1->0 error
// in this case there is no nucleotide to place the error probability on, so we
// place it on the neighboring bases and choose the **lower** error probability between the
// neighbors (that's how T0 encoding works). The error is placed only on the 1-mer error assuming
// that 2->0 errors are negligibly rare.
private void parseZeroQuals(final double[] probs, final int flowIdx, final int qualOfs){
if ((qualOfs == 0) | (qualOfs==probs.length)){ // do not report zero error probability on the edge of the read
return;
}
if ((probs[qualOfs-1])==(probs[qualOfs])){
flowMatrix[1][flowIdx] = Math.max(flowMatrix[1][flowIdx], Math.max(probs[qualOfs-1],probs[qualOfs]));
}
flowMatrix[1][flowIdx] = Math.max(flowMatrix[1][flowIdx], Math.min(probs[qualOfs-1],probs[qualOfs]));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you provide a bit of context as to what the expectation is/was here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}

public String getFlowOrder() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@

public class FlowBasedReadUnitTest extends GATKBaseTest {

// If true, update the expected outputs in tests that assert an exact match vs. prior output,
// instead of actually running the tests. Can be used with "./gradlew test -Dtest.single=HaplotypeCallerIntegrationTest"
// to update all of the exact-match tests at once. After you do this, you should look at the
// diffs in the new expected outputs in git to confirm that they are consistent with expectations.
public static final boolean UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS = false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good to add.


@Test
void testBAMFormatParsing() throws Exception{
final String testResourceDir = publicTestDir + "org/broadinstitute/hellbender/utils/read/flow/reads/";
Expand All @@ -38,23 +44,35 @@ void testBAMFormatParsing() throws Exception{
for ( sr = reader.iterator(), curRead = 0 ; sr.hasNext(); curRead++) {
final FlowBasedRead fbr = new FlowBasedRead(sr.next(),flowOrder, 12, fbargs);
fbr.applyAlignment();
Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength());

try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".key.txt")) {
fbr.writeKey(fos);
}


String expectedFile = outputDir + "sample." + curRead + ".key.txt";
IntegrationTestSpec.assertEqualTextFiles(new File(tempOutputDir + "/" + curRead + ".key.txt"), new File(expectedFile));
try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".matrix.txt")){
fbr.writeMatrix(fos);
if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength());
try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".key.txt")) {
fbr.writeKey(fos);
}
IntegrationTestSpec.assertEqualTextFiles(new File(tempOutputDir + "/" + curRead + ".key.txt"), new File(expectedFile));
} else {
try (FileWriter fos = new FileWriter(expectedFile)) {
fbr.writeKey(fos);
}
}
expectedFile = outputDir + "sample." + curRead + ".matrix.txt";
IntegrationTestSpec.assertEqualTextFiles(new File(tempOutputDir + "/" + curRead + ".matrix.txt"), new File(expectedFile));

if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".matrix.txt")) {
fbr.writeMatrix(fos);
}
IntegrationTestSpec.assertEqualTextFiles(new File(tempOutputDir + "/" + curRead + ".matrix.txt"), new File(expectedFile));
} else {
try (FileWriter fos = new FileWriter(expectedFile)) {
fbr.writeMatrix(fos);
}
}
}
}


@Test
void testBAMFormatParsingWithT0() throws Exception{
final String testResourceDir = publicTestDir + "org/broadinstitute/hellbender/utils/read/flow/reads/";
Expand All @@ -73,20 +91,33 @@ void testBAMFormatParsingWithT0() throws Exception{
for ( sr = reader.iterator(), curRead = 0 ; sr.hasNext(); curRead++) {
final FlowBasedRead fbr = new FlowBasedRead(sr.next(),flowOrder, 12, fbargs);
fbr.applyAlignment();
Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength());
String expectedFile = outputDir + "sample.t0." + curRead + ".key.txt";

try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".key.txt")) {
fbr.writeKey(fos);
if ( !UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) {
Assert.assertEquals(fbr.totalKeyBases(), fbr.seqLength());
try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".key.txt")) {
fbr.writeKey(fos);
}
IntegrationTestSpec.assertEqualTextFiles(new File(tempOutputDir + "/" + curRead + ".key.txt"), new File(expectedFile));
} else {
try (FileWriter fos = new FileWriter( expectedFile )) {
fbr.writeKey(fos);
}
}

expectedFile = outputDir + "sample.t0." + curRead + ".matrix.txt";

if (!UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) {
try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".matrix.txt")) {
fbr.writeMatrix(fos);
}
IntegrationTestSpec.assertEqualTextFiles(new File(tempOutputDir + "/" + curRead + ".matrix.txt"), new File(expectedFile));
} else {
try (FileWriter fos = new FileWriter(expectedFile)) {
fbr.writeMatrix(fos);
}

String expectedFile = outputDir + "sample.t0." + curRead + ".key.txt";
IntegrationTestSpec.assertEqualTextFiles(new File(tempOutputDir + "/" + curRead + ".key.txt"), new File(expectedFile));
try (FileWriter fos = new FileWriter(tempOutputDir + "/" + curRead + ".matrix.txt")){
fbr.writeMatrix(fos);
}
expectedFile = outputDir + "sample.t0." + curRead + ".matrix.txt";
IntegrationTestSpec.assertEqualTextFiles(new File(tempOutputDir + "/" + curRead + ".matrix.txt"), new File(expectedFile));
}
}

Expand Down Expand Up @@ -165,5 +196,9 @@ public void testArtificialFlowBasedReadConstruction() {
new FlowBasedRead(read, FlowBasedRead.DEFAULT_FLOW_ORDER, FlowBasedRead.MAX_CLASS, new FlowBasedArgumentCollection());
}

@Test
public void assertThatExpectedOutputUpdateToggleIsDisabled() {
Assert.assertFalse(UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS, "The toggle to update expected outputs should not be left enabled");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -4829,8 +4829,8 @@ C,R,F,B,Bi,Q,ti
321,12,1,G,234,40,0,. 0.0010

C,R,F,B,Bi,Q,ti
322,0,0,.,.,.,.,. 0.9880
322,1,0,.,.,.,.,. 0.0010
322,0,0,.,.,.,.,. 0.8890
322,1,0,.,.,.,.,. 0.1000
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These seem like fairly small effects...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. And from the beginning the effect is small. But the fix does seem to improve SNPs a tiny bit

322,2,0,.,.,.,.,. 0.0010
322,3,0,.,.,.,.,. 0.0010
322,4,0,.,.,.,.,. 0.0010
Expand All @@ -4844,8 +4844,8 @@ C,R,F,B,Bi,Q,ti
322,12,0,.,.,.,.,. 0.0010

C,R,F,B,Bi,Q,ti
323,0,0,.,.,.,.,. 0.9880
323,1,0,.,.,.,.,. 0.0010
323,0,0,.,.,.,.,. 0.8890
323,1,0,.,.,.,.,. 0.1000
323,2,0,.,.,.,.,. 0.0010
323,3,0,.,.,.,.,. 0.0010
323,4,0,.,.,.,.,. 0.0010
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1994,8 +1994,8 @@ C,R,F,B,Bi,Q,ti
132,12,1,A,88,40,0,. 0.0010

C,R,F,B,Bi,Q,ti
133,0,0,.,.,.,.,. 0.9880
133,1,0,.,.,.,.,. 0.0010
133,0,0,.,.,.,.,. 0.9764
133,1,0,.,.,.,.,. 0.0126
133,2,0,.,.,.,.,. 0.0010
133,3,0,.,.,.,.,. 0.0010
133,4,0,.,.,.,.,. 0.0010
Expand Down Expand Up @@ -4889,7 +4889,7 @@ C,R,F,B,Bi,Q,ti
325,12,1,T,227,40,0,. 0.0010

C,R,F,B,Bi,Q,ti
326,0,0,.,.,.,.,. 0.9880
326,0,0,.,.,.,.,. 0.9877
326,1,0,.,.,.,.,. 0.0010
326,2,0,.,.,.,.,. 0.0010
326,3,0,.,.,.,.,. 0.0010
Expand All @@ -4904,7 +4904,7 @@ C,R,F,B,Bi,Q,ti
326,12,0,.,.,.,.,. 0.0010

C,R,F,B,Bi,Q,ti
327,0,0,.,.,.,.,. 0.9880
327,0,0,.,.,.,.,. 0.9877
327,1,0,.,.,.,.,. 0.0010
327,2,0,.,.,.,.,. 0.0010
327,3,0,.,.,.,.,. 0.0010
Expand Down Expand Up @@ -4964,8 +4964,8 @@ C,R,F,B,Bi,Q,ti
330,12,1,G,229,40,0,. 0.0010

C,R,F,B,Bi,Q,ti
331,0,0,.,.,.,.,. 0.9880
331,1,0,.,.,.,.,. 0.0010
331,0,0,.,.,.,.,. 0.9827
331,1,0,.,.,.,.,. 0.0063
331,2,0,.,.,.,.,. 0.0010
331,3,0,.,.,.,.,. 0.0010
331,4,0,.,.,.,.,. 0.0010
Expand All @@ -4979,8 +4979,8 @@ C,R,F,B,Bi,Q,ti
331,12,0,.,.,.,.,. 0.0010

C,R,F,B,Bi,Q,ti
332,0,0,.,.,.,.,. 0.9880
332,1,0,.,.,.,.,. 0.0010
332,0,0,.,.,.,.,. 0.9827
332,1,0,.,.,.,.,. 0.0063
332,2,0,.,.,.,.,. 0.0010
332,3,0,.,.,.,.,. 0.0010
332,4,0,.,.,.,.,. 0.0010
Expand Down Expand Up @@ -5009,8 +5009,8 @@ C,R,F,B,Bi,Q,ti
333,12,3,T,230,20,0,. 0.0010

C,R,F,B,Bi,Q,ti
334,0,0,.,.,.,.,. 0.9880
334,1,0,.,.,.,.,. 0.0010
334,0,0,.,.,.,.,. 0.9827
334,1,0,.,.,.,.,. 0.0063
334,2,0,.,.,.,.,. 0.0010
334,3,0,.,.,.,.,. 0.0010
334,4,0,.,.,.,.,. 0.0010
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ C,R,F,B,Bi,Q,ti
10,12,2,A,5,40,0,r 0.0010

C,R,F,B,Bi,Q,ti
11,0,0,.,.,.,.,r 0.9880
11,1,0,.,.,.,.,r 0.0010
11,0,0,.,.,.,.,r 0.9732
11,1,0,.,.,.,.,r 0.0158
11,2,0,.,.,.,.,r 0.0010
11,3,0,.,.,.,.,r 0.0010
11,4,0,.,.,.,.,r 0.0010
Expand All @@ -179,8 +179,8 @@ C,R,F,B,Bi,Q,ti
11,12,0,.,.,.,.,r 0.0010

C,R,F,B,Bi,Q,ti
12,0,0,.,.,.,.,r 0.9880
12,1,0,.,.,.,.,r 0.0010
12,0,0,.,.,.,.,r 0.9732
12,1,0,.,.,.,.,r 0.0158
12,2,0,.,.,.,.,r 0.0010
12,3,0,.,.,.,.,r 0.0010
12,4,0,.,.,.,.,r 0.0010
Expand Down Expand Up @@ -209,8 +209,8 @@ C,R,F,B,Bi,Q,ti
13,12,2,C,7,5,0,r 0.0010

C,R,F,B,Bi,Q,ti
14,0,0,.,.,.,.,r 0.9880
14,1,0,.,.,.,.,r 0.0010
14,0,0,.,.,.,.,r 0.9096
14,1,0,.,.,.,.,r 0.0794
14,2,0,.,.,.,.,r 0.0010
14,3,0,.,.,.,.,r 0.0010
14,4,0,.,.,.,.,r 0.0010
Expand Down Expand Up @@ -359,8 +359,8 @@ C,R,F,B,Bi,Q,ti
23,12,1,T,16,16,0,r 0.0010

C,R,F,B,Bi,Q,ti
24,0,0,.,.,.,.,r 0.9880
24,1,0,.,.,.,.,r 0.0010
24,0,0,.,.,.,.,r 0.9492
24,1,0,.,.,.,.,r 0.0398
24,2,0,.,.,.,.,r 0.0010
24,3,0,.,.,.,.,r 0.0010
24,4,0,.,.,.,.,r 0.0010
Expand All @@ -374,8 +374,8 @@ C,R,F,B,Bi,Q,ti
24,12,0,.,.,.,.,r 0.0010

C,R,F,B,Bi,Q,ti
25,0,0,.,.,.,.,r 0.9880
25,1,0,.,.,.,.,r 0.0010
25,0,0,.,.,.,.,r 0.9492
25,1,0,.,.,.,.,r 0.0398
25,2,0,.,.,.,.,r 0.0010
25,3,0,.,.,.,.,r 0.0010
25,4,0,.,.,.,.,r 0.0010
Expand Down Expand Up @@ -539,7 +539,7 @@ C,R,F,B,Bi,Q,ti
35,12,1,T,25,25,0,r 0.0010

C,R,F,B,Bi,Q,ti
36,0,0,.,.,.,.,r 0.9880
36,0,0,.,.,.,.,r 0.9877
36,1,0,.,.,.,.,r 0.0010
36,2,0,.,.,.,.,r 0.0010
36,3,0,.,.,.,.,r 0.0010
Expand All @@ -554,7 +554,7 @@ C,R,F,B,Bi,Q,ti
36,12,0,.,.,.,.,r 0.0010

C,R,F,B,Bi,Q,ti
37,0,0,.,.,.,.,r 0.9880
37,0,0,.,.,.,.,r 0.9877
37,1,0,.,.,.,.,r 0.0010
37,2,0,.,.,.,.,r 0.0010
37,3,0,.,.,.,.,r 0.0010
Expand Down Expand Up @@ -614,7 +614,7 @@ C,R,F,B,Bi,Q,ti
40,12,1,G,28,27,0,r 0.0010

C,R,F,B,Bi,Q,ti
41,0,0,.,.,.,.,r 0.9880
41,0,0,.,.,.,.,r 0.9877
41,1,0,.,.,.,.,r 0.0010
41,2,0,.,.,.,.,r 0.0010
41,3,0,.,.,.,.,r 0.0010
Expand All @@ -629,7 +629,7 @@ C,R,F,B,Bi,Q,ti
41,12,0,.,.,.,.,r 0.0010

C,R,F,B,Bi,Q,ti
42,0,0,.,.,.,.,r 0.9880
42,0,0,.,.,.,.,r 0.9877
42,1,0,.,.,.,.,r 0.0010
42,2,0,.,.,.,.,r 0.0010
42,3,0,.,.,.,.,r 0.0010
Expand Down Expand Up @@ -659,7 +659,7 @@ C,R,F,B,Bi,Q,ti
43,12,3,T,29,10,0,r 0.0010

C,R,F,B,Bi,Q,ti
44,0,0,.,.,.,.,r 0.9880
44,0,0,.,.,.,.,r 0.9870
44,1,0,.,.,.,.,r 0.0010
44,2,0,.,.,.,.,r 0.0010
44,3,0,.,.,.,.,r 0.0010
Expand All @@ -674,7 +674,7 @@ C,R,F,B,Bi,Q,ti
44,12,0,.,.,.,.,r 0.0010

C,R,F,B,Bi,Q,ti
45,0,0,.,.,.,.,r 0.9880
45,0,0,.,.,.,.,r 0.9870
45,1,0,.,.,.,.,r 0.0010
45,2,0,.,.,.,.,r 0.0010
45,3,0,.,.,.,.,r 0.0010
Expand Down Expand Up @@ -1694,7 +1694,7 @@ C,R,F,B,Bi,Q,ti
112,12,1,G,80,28,0,r 0.0010

C,R,F,B,Bi,Q,ti
113,0,0,.,.,.,.,r 0.9880
113,0,0,.,.,.,.,r 0.9874
113,1,0,.,.,.,.,r 0.0010
113,2,0,.,.,.,.,r 0.0010
113,3,0,.,.,.,.,r 0.0010
Expand All @@ -1709,7 +1709,7 @@ C,R,F,B,Bi,Q,ti
113,12,0,.,.,.,.,r 0.0010

C,R,F,B,Bi,Q,ti
114,0,0,.,.,.,.,r 0.9880
114,0,0,.,.,.,.,r 0.9874
114,1,0,.,.,.,.,r 0.0010
114,2,0,.,.,.,.,r 0.0010
114,3,0,.,.,.,.,r 0.0010
Expand Down Expand Up @@ -2279,8 +2279,8 @@ C,R,F,B,Bi,Q,ti
151,12,3,T,104,40,0,r 0.0010

C,R,F,B,Bi,Q,ti
152,0,0,.,.,.,.,r 0.9880
152,1,0,.,.,.,.,r 0.0010
152,0,0,.,.,.,.,r 0.9389
152,1,0,.,.,.,.,r 0.0501
152,2,0,.,.,.,.,r 0.0010
152,3,0,.,.,.,.,r 0.0010
152,4,0,.,.,.,.,r 0.0010
Expand All @@ -2294,8 +2294,8 @@ C,R,F,B,Bi,Q,ti
152,12,0,.,.,.,.,r 0.0010

C,R,F,B,Bi,Q,ti
153,0,0,.,.,.,.,r 0.9880
153,1,0,.,.,.,.,r 0.0010
153,0,0,.,.,.,.,r 0.9389
153,1,0,.,.,.,.,r 0.0501
153,2,0,.,.,.,.,r 0.0010
153,3,0,.,.,.,.,r 0.0010
153,4,0,.,.,.,.,r 0.0010
Expand Down Expand Up @@ -3074,7 +3074,7 @@ C,R,F,B,Bi,Q,ti
204,12,1,G,146,21,0,r 0.0010

C,R,F,B,Bi,Q,ti
205,0,0,.,.,.,.,r 0.9880
205,0,0,.,.,.,.,r 0.9877
205,1,0,.,.,.,.,r 0.0010
205,2,0,.,.,.,.,r 0.0010
205,3,0,.,.,.,.,r 0.0010
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4484,7 +4484,7 @@ C,R,F,B,Bi,Q,ti
298,12,1,T,209,40,0,. 0.0010

C,R,F,B,Bi,Q,ti
299,0,0,.,.,.,.,. 0.9880
299,0,0,.,.,.,.,. 0.9877
299,1,0,.,.,.,.,. 0.0010
299,2,0,.,.,.,.,. 0.0010
299,3,0,.,.,.,.,. 0.0010
Expand All @@ -4499,7 +4499,7 @@ C,R,F,B,Bi,Q,ti
299,12,0,.,.,.,.,. 0.0010

C,R,F,B,Bi,Q,ti
300,0,0,.,.,.,.,. 0.9880
300,0,0,.,.,.,.,. 0.9877
300,1,0,.,.,.,.,. 0.0010
300,2,0,.,.,.,.,. 0.0010
300,3,0,.,.,.,.,. 0.0010
Expand Down
Loading