From 9c007fa139aedcd71d7c56f60d4b77cddb594324 Mon Sep 17 00:00:00 2001 From: James Date: Thu, 6 Sep 2018 16:29:21 -0400 Subject: [PATCH 1/4] added a short cast --- .../sparkrecords/TransientFieldPhysicalLocation.java | 4 ++-- .../pipelines/MarkDuplicatesSparkIntegrationTest.java | 8 ++++++++ .../AbstractMarkDuplicatesCommandLineProgramTest.java | 7 +++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java index 8b64578c785..cb0e174b1d7 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java @@ -37,13 +37,13 @@ public TransientFieldPhysicalLocation(int partitionIndex, String name) { public int getX() { return this.x; } @Override - public void setX(final int x) { this.x = x; } + public void setX(final int x) { this.x = (short)x; } @Override public int getY() { return this.y; } @Override - public void setY(final int y) { this.y = y; } + public void setY(final int y) { this.y = (short)y; } @Override public short getLibraryId() { return this.libraryId; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index dc0a996f0cb..e329c8f4474 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -189,6 +189,14 @@ public void testSupplementaryReadUnmappedMate() { } } + @Test + public void test() { + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.addOutput(createTempFile("output","bam")); + args.addInput(new File("/Users/emeryj/hellbender/gatk/compareDuplicates/picarddiffs.sam")); + runCommandLine(args); + } + @Test public void testHashCollisionHandling() { // This test asserts that the handling of two read pairs with the same start positions but on different in such a way diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java index 29adf54a717..e8d933e7750 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java @@ -177,6 +177,9 @@ public Object[][] readNameData(){ {"RUNID:7:1203:2886:35756", "RUNID:7:1203:2886:36756"}, {"RUNID:7:1203:2886:37756", "RUNID:7:1203:2886:36756"}, {"RUNID:7:1203:2886:38756", "RUNID:7:1203:2886:36756"}, + + //Added a test for tiebreaking accounting for the short casting done in picard + {"HK3T5CCXX160204:3:1112:11586:37067", "HK3T5CCXX160204:3:1112:11586:32144"} }; } @@ -199,11 +202,11 @@ public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicates(final Str int compare = position1.tile - position2.tile; if (compare == 0) { - compare = position1.x - position2.x; + compare = (short)position1.x - (short)position2.x; } if (compare == 0) { - compare = position1.y - position2.y; + compare = (short)position1.y - (short)position2.y; } final boolean isDuplicate = compare < 0; From 1081acf7fbdc901bcc659ab8bf8b86f9444dfb2b Mon Sep 17 00:00:00 2001 From: James Date: Fri, 7 Sep 2018 11:54:43 -0400 Subject: [PATCH 2/4] finalizing the PR --- .../sparkrecords/TransientFieldPhysicalLocation.java | 4 ++-- .../spark/pipelines/MarkDuplicatesSparkIntegrationTest.java | 4 ++-- .../AbstractMarkDuplicatesCommandLineProgramTest.java | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java index cb0e174b1d7..7a02c7167f9 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java @@ -12,8 +12,8 @@ public abstract class TransientFieldPhysicalLocation extends PairedEnds implemen // Information used to detect optical dupes protected short readGroupIndex = -1; protected transient short tile = -1; - protected transient int x = -1; - protected transient int y = -1; + protected transient short x = -1; + protected transient short y = -1; protected transient short libraryId = -1; public TransientFieldPhysicalLocation(int partitionIndex, String name) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index e329c8f4474..a18fb20b4e5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -275,11 +275,11 @@ public void testOpticalDuplicatesTiebrokenByPhysicalLocationNotStartPosition(fin int compare = position1.tile - position2.tile; if (compare == 0) { - compare = position1.x - position2.x; + compare = (short)position1.x - (short)position2.x; } if (compare == 0) { - compare = position1.y - position2.y; + compare = (short)position1.y - (short)position2.y; } final boolean isDuplicate = compare < 0; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java index e8d933e7750..a03fbf4b83c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java @@ -254,7 +254,7 @@ public void testOpticalDuplicatesTheSameReadGroup() { public void testOpticalDuplicatesAndPCRDuplicatesOrientationDifference() { final AbstractMarkDuplicatesTester tester = getTester(); tester.setExpectedOpticalDuplicate(0); - tester.addMatePair("RUNID:7:1203:2886:82292", 19, 19, 485253, 486253, false, false, true, true, "101M", "101M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // duplicate + tester.addMatePair("RUNID:7:1203:2886:16838", 19, 19, 485253, 486253, false, false, true, true, "101M", "101M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "1"); // duplicate tester.addMatePair("RUNID:7:1203:2886:16834", 19, 19, 486253, 485253, false, false, false, false, "101M", "101M", false, true, false, false, false, DEFAULT_BASE_QUALITY, "1"); // Even though these reads are in a duplicate group together, we don't want to mark them as Optical Duplicates because their orientation is flipped (which doesn't matter for PCR duplicates) tester.runTest(); From 0bfed0008ace38d7e95ec99de3133cd74d66c99c Mon Sep 17 00:00:00 2001 From: James Date: Fri, 7 Sep 2018 12:44:35 -0400 Subject: [PATCH 3/4] responding to the first round of comments --- .../sparkrecords/TransientFieldPhysicalLocation.java | 2 ++ .../pipelines/MarkDuplicatesSparkIntegrationTest.java | 8 -------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java index 7a02c7167f9..9f22c1f0b23 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/markduplicates/sparkrecords/TransientFieldPhysicalLocation.java @@ -36,12 +36,14 @@ public TransientFieldPhysicalLocation(int partitionIndex, String name) { @Override public int getX() { return this.x; } + // NOTE picard in practice compresses the pixel values to signed shorts for space purposes despite the api using an integer @Override public void setX(final int x) { this.x = (short)x; } @Override public int getY() { return this.y; } + // NOTE picard in practice compresses the pixel values to signed shorts for space purposes despite the api using an integer @Override public void setY(final int y) { this.y = (short)y; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index a18fb20b4e5..c4f0427e72c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -189,14 +189,6 @@ public void testSupplementaryReadUnmappedMate() { } } - @Test - public void test() { - final ArgumentsBuilder args = new ArgumentsBuilder(); - args.addOutput(createTempFile("output","bam")); - args.addInput(new File("/Users/emeryj/hellbender/gatk/compareDuplicates/picarddiffs.sam")); - runCommandLine(args); - } - @Test public void testHashCollisionHandling() { // This test asserts that the handling of two read pairs with the same start positions but on different in such a way From 861f5e790676ba5b44aa8ad224c141696c3f5062 Mon Sep 17 00:00:00 2001 From: James Date: Fri, 7 Sep 2018 12:55:01 -0400 Subject: [PATCH 4/4] rather than resovle an issue for a class slated for removal, I just pushed the shared test down --- .../MarkDuplicatesSparkIntegrationTest.java | 61 +++++++++++++++++++ ...tMarkDuplicatesCommandLineProgramTest.java | 61 ------------------- .../MarkDuplicatesGATKIntegrationTest.java | 2 - 3 files changed, 61 insertions(+), 63 deletions(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index c4f0427e72c..c8a5ff2b0e6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -281,4 +281,65 @@ public void testOpticalDuplicatesTiebrokenByPhysicalLocationNotStartPosition(fin tester.addMatePair(readName2, 1,2, 51, false, false, isDuplicate, isDuplicate, "6S42M28S", "8S68M", true, false, false, false, false, DEFAULT_BASE_QUALITY); tester.runTest(); } + + @DataProvider + public Object[][] readNameData(){ + return new Object[][]{ + {"RUNID:7:1203:2886:82292", "RUNID:7:1205:3886:16834"}, + + {"RUNID:7:1203:2886:16756", "RUNID:7:1205:3886:16756"}, + {"RUNID:7:1204:2886:16756", "RUNID:7:1205:3886:16756"}, + {"RUNID:7:1205:2886:16756", "RUNID:7:1205:3886:16756"}, + {"RUNID:7:1206:2886:16756", "RUNID:7:1205:3886:16756"}, + {"RUNID:7:1207:2886:16756", "RUNID:7:1205:3886:16756"}, + + {"RUNID:7:1203:2886:16756", "RUNID:7:1203:4886:26756"}, + {"RUNID:7:1203:3886:16756", "RUNID:7:1203:4886:26756"}, + {"RUNID:7:1203:4886:16756", "RUNID:7:1203:4886:26756"}, + {"RUNID:7:1203:5886:16756", "RUNID:7:1203:4886:26756"}, + {"RUNID:7:1203:6886:16756", "RUNID:7:1203:4886:26756"}, + + {"RUNID:7:1203:2886:34756", "RUNID:7:1203:2886:36756"}, + {"RUNID:7:1203:2886:35756", "RUNID:7:1203:2886:36756"}, + {"RUNID:7:1203:2886:37756", "RUNID:7:1203:2886:36756"}, + {"RUNID:7:1203:2886:38756", "RUNID:7:1203:2886:36756"}, + + //Added a test for tiebreaking accounting for the short casting done in picard + {"HK3T5CCXX160204:3:1112:11586:37067", "HK3T5CCXX160204:3:1112:11586:32144"} + }; + } + + @Test(dataProvider = "readNameData") + public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicates(final String readName1, final String readName2) { + // This tests the readname based tiebreaking code in mark duplicates. Since it's ambiguous which read should be marked + // as duplicate or not if scores match we break ties by evaluating the readname for consistencies sake. + + final ReadNameParser parser = new ReadNameParser(); + + final PhysicalLocationInt position1 = new PhysicalLocationInt(); + final PhysicalLocationInt position2 = new PhysicalLocationInt(); + + parser.addLocationInformation(readName1, position1); + parser.addLocationInformation(readName2, position2); + + final AbstractMarkDuplicatesTester tester = getTester(); + tester.getSamRecordSetBuilder().setReadLength(101); + tester.setExpectedOpticalDuplicate(0); + + int compare = position1.tile - position2.tile; + if (compare == 0) { + compare = (short)position1.x - (short)position2.x; + } + + if (compare == 0) { + compare = (short)position1.y - (short)position2.y; + } + + final boolean isDuplicate = compare < 0; + + tester.addMatePair(readName1, 1,485253, 485253, false, false, !isDuplicate, !isDuplicate, "42M59S", "59S42M", false, true, false, false, false, DEFAULT_BASE_QUALITY); + tester.addMatePair(readName2, 1,485253, 485253, false, false, isDuplicate, isDuplicate, "59S42M", "42M59S", true, false, false, false, false, DEFAULT_BASE_QUALITY); + + tester.runTest(); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java index a03fbf4b83c..3b961f99e56 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java @@ -156,67 +156,6 @@ public void testOpticalDuplicateFinding() { tester.runTest(); } - @DataProvider - public Object[][] readNameData(){ - return new Object[][]{ - {"RUNID:7:1203:2886:82292", "RUNID:7:1205:3886:16834"}, - - {"RUNID:7:1203:2886:16756", "RUNID:7:1205:3886:16756"}, - {"RUNID:7:1204:2886:16756", "RUNID:7:1205:3886:16756"}, - {"RUNID:7:1205:2886:16756", "RUNID:7:1205:3886:16756"}, - {"RUNID:7:1206:2886:16756", "RUNID:7:1205:3886:16756"}, - {"RUNID:7:1207:2886:16756", "RUNID:7:1205:3886:16756"}, - - {"RUNID:7:1203:2886:16756", "RUNID:7:1203:4886:26756"}, - {"RUNID:7:1203:3886:16756", "RUNID:7:1203:4886:26756"}, - {"RUNID:7:1203:4886:16756", "RUNID:7:1203:4886:26756"}, - {"RUNID:7:1203:5886:16756", "RUNID:7:1203:4886:26756"}, - {"RUNID:7:1203:6886:16756", "RUNID:7:1203:4886:26756"}, - - {"RUNID:7:1203:2886:34756", "RUNID:7:1203:2886:36756"}, - {"RUNID:7:1203:2886:35756", "RUNID:7:1203:2886:36756"}, - {"RUNID:7:1203:2886:37756", "RUNID:7:1203:2886:36756"}, - {"RUNID:7:1203:2886:38756", "RUNID:7:1203:2886:36756"}, - - //Added a test for tiebreaking accounting for the short casting done in picard - {"HK3T5CCXX160204:3:1112:11586:37067", "HK3T5CCXX160204:3:1112:11586:32144"} - }; - } - - @Test(dataProvider = "readNameData") - public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicates(final String readName1, final String readName2) { - // This tests the readname based tiebreaking code in mark duplicates. Since it's ambiguous which read should be marked - // as duplicate or not if scores match we break ties by evaluating the readname for consistencies sake. - - final ReadNameParser parser = new ReadNameParser(); - - final PhysicalLocationInt position1 = new PhysicalLocationInt(); - final PhysicalLocationInt position2 = new PhysicalLocationInt(); - - parser.addLocationInformation(readName1, position1); - parser.addLocationInformation(readName2, position2); - - final AbstractMarkDuplicatesTester tester = getTester(); - tester.getSamRecordSetBuilder().setReadLength(101); - tester.setExpectedOpticalDuplicate(0); - - int compare = position1.tile - position2.tile; - if (compare == 0) { - compare = (short)position1.x - (short)position2.x; - } - - if (compare == 0) { - compare = (short)position1.y - (short)position2.y; - } - - final boolean isDuplicate = compare < 0; - - tester.addMatePair(readName1, 1,485253, 485253, false, false, !isDuplicate, !isDuplicate, "42M59S", "59S42M", false, true, false, false, false, DEFAULT_BASE_QUALITY); - tester.addMatePair(readName2, 1,485253, 485253, false, false, isDuplicate, isDuplicate, "59S42M", "42M59S", true, false, false, false, false, DEFAULT_BASE_QUALITY); - - tester.runTest(); - } - @Test public void testOpticalDuplicatesDifferentReadGroups() { final AbstractMarkDuplicatesTester tester = getTester(); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/MarkDuplicatesGATKIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/MarkDuplicatesGATKIntegrationTest.java index 115087cf791..06e8209a5f1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/MarkDuplicatesGATKIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/markduplicates/MarkDuplicatesGATKIntegrationTest.java @@ -14,8 +14,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import picard.sam.util.PhysicalLocationInt; -import picard.sam.util.ReadNameParser; import java.io.File; import java.util.*;