diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/GATKSparkTool.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/GATKSparkTool.java index 09e0c8830e2..6b1302c7f8e 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/GATKSparkTool.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/GATKSparkTool.java @@ -1,8 +1,9 @@ package org.broadinstitute.hellbender.engine.spark; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMSequenceDictionary; +import com.google.common.annotations.VisibleForTesting; +import htsjdk.samtools.*; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; +import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.GZIIndex; import htsjdk.samtools.util.IOUtil; import htsjdk.variant.vcf.VCFHeaderLine; @@ -25,6 +26,7 @@ import org.broadinstitute.hellbender.engine.filters.WellformedReadFilter; import org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSink; import org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSource; +import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation; import org.broadinstitute.hellbender.utils.SequenceDictionaryUtils; @@ -121,7 +123,7 @@ public abstract class GATKSparkTool extends SparkCommandLineProgram { private ReadsSparkSource readsSource; private SAMFileHeader readsHeader; - private String readInput; + private LinkedHashMap readInputs; private ReferenceMultiSparkSource referenceSource; private SAMSequenceDictionary referenceDictionary; private List userIntervals; @@ -158,6 +160,20 @@ public boolean requiresReads() { return false; } + /** + * Does this tool support multiple inputs? Tools that do should override this method with the desired {@link ReadInputMergingPolicy}. + * + * @return doNotMerge by default + */ + public ReadInputMergingPolicy getReadInputMergingPolicy() { + return ReadInputMergingPolicy.doNotMerge; + } + + public static enum ReadInputMergingPolicy { + doNotMerge, + concatMerge + } + /** * Does this tool require intervals? Tools that do should override to return true. * @@ -274,15 +290,29 @@ public JavaRDD getUnfilteredReads() { } traversalParameters = new TraversalParameters(getIntervals(), traverseUnmapped); } else { - traversalParameters = null; // no intervals were specified so return all reads (mapped and unmapped) + traversalParameters = null; } - // TODO: This if statement is a temporary hack until #959 gets resolved. - if (readInput.endsWith(".adam")) { + JavaRDD output = null; + ReadsSparkSource source = readsSource; + for (String input : readInputs.keySet()) { + if (output == null) { + output = getGatkReadJavaRDD(traversalParameters, source, input); + } else { + output = output.union(getGatkReadJavaRDD(traversalParameters, source, input)); + } + } + return output; + } + + protected JavaRDD getGatkReadJavaRDD(TraversalParameters traversalParameters, ReadsSparkSource source, String input) { + JavaRDD output; + // TODO: This if statement is a temporary hack until #959 gets resolve + if (input.endsWith(".adam")) { try { - return readsSource.getADAMReads(readInput, traversalParameters, getHeaderForReads()); + output = source.getADAMReads(input, traversalParameters, getHeaderForReads()); } catch (IOException e) { - throw new UserException("Failed to read ADAM file " + readInput, e); + throw new UserException("Failed to read ADAM file " + input, e); } } else { @@ -290,8 +320,9 @@ public JavaRDD getUnfilteredReads() { throw new UserException.MissingReference("A reference file is required when using CRAM files."); } final String refPath = hasReference() ? referenceArguments.getReferenceFileName() : null; - return readsSource.getParallelReads(readInput, refPath, traversalParameters, bamPartitionSplitSize); + output = source.getParallelReads(input, refPath, traversalParameters, bamPartitionSplitSize); } + return output; } /** @@ -334,7 +365,8 @@ public int getRecommendedNumReducers() { if (numReducers != 0) { return numReducers; } - return 1 + (int) (BucketUtils.dirSize(getReadSourceName()) / getTargetPartitionSize()); + int size = readInputs.keySet().stream().mapToInt(k -> (int) BucketUtils.dirSize(k)).sum(); + return 1 + (size / getTargetPartitionSize()); } /** @@ -445,8 +477,18 @@ public Collection makeVariantAnnotations() { /** * Returns the name of the source of reads data. It can be a file name or URL. */ - protected String getReadSourceName(){ - return readInput; + protected List getReadSourceName(){ + if (readInputs.size() > 1) { + throw new GATKException("Multiple ReadsDataSources specificed but a single source requested by the tool"); + } + return new ArrayList<>(readInputs.keySet()); + } + + /** + * Returns a map of read input to header. + */ + protected LinkedHashMap getReadSouceHeaderMap(){ + return readInputs; } /** @@ -489,15 +531,37 @@ private void initializeReads(final JavaSparkContext sparkContext) { return; } - if ( readArguments.getReadFilesNames().size() != 1 ) { - throw new UserException("Sorry, we only support a single reads input for spark tools for now."); + if (getReadInputMergingPolicy() == ReadInputMergingPolicy.doNotMerge && readArguments.getReadFilesNames().size() != 1 ) { + throw new UserException("Sorry, we only support a single reads input for for this spark tool."); } - readInput = readArguments.getReadFilesNames().get(0); + readInputs = new LinkedHashMap<>(); readsSource = new ReadsSparkSource(sparkContext, readArguments.getReadValidationStringency()); - readsHeader = readsSource.getHeader( - readInput, - hasReference() ? referenceArguments.getReferenceFileName() : null); + for (String input : readArguments.getReadFilesNames()) { + readInputs.put(input, readsSource.getHeader( + input, hasReference() ? referenceArguments.getReferenceFileName() : null)); + } + readsHeader = createHeaderMerger().getMergedHeader(); + } + + /** + * Create a header merger from the individual SAM/BAM headers in our readers + * + * @return a header merger containing all individual headers in this data source + */ + private SamFileHeaderMerger createHeaderMerger() { + return new SamFileHeaderMerger(identifySortOrder(readInputs.values()), readInputs.values(), true); + } + @VisibleForTesting + static SAMFileHeader.SortOrder identifySortOrder(final Collection headers){ + final Set sortOrders = headers.stream().map(SAMFileHeader::getSortOrder).collect(Collectors.toSet()); + final SAMFileHeader.SortOrder order; + if (sortOrders.size() == 1) { + order = sortOrders.iterator().next(); + } else { + order = SAMFileHeader.SortOrder.unsorted; + } + return order; } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/CollectBaseDistributionByCycleSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/CollectBaseDistributionByCycleSpark.java index be712555b0a..f29586514f6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/CollectBaseDistributionByCycleSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/CollectBaseDistributionByCycleSpark.java @@ -226,7 +226,7 @@ public void addToMetricsFile(final MetricsFile reads = getReads(); final MetricsFile metricsFile = calculateBaseDistributionByCycle(reads); - saveResults(metricsFile, getHeaderForReads(), getReadSourceName()); + saveResults(metricsFile, getHeaderForReads(), getReadSourceName().get(0)); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/CollectMultipleMetricsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/CollectMultipleMetricsSpark.java index c5ebaa0c85b..55f914cf0c6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/CollectMultipleMetricsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/CollectMultipleMetricsSpark.java @@ -193,7 +193,7 @@ protected void runTool( final JavaSparkContext ctx ) { unFilteredReads.filter(r -> readFilter.test(r)), getHeaderForReads() ); - metricsCollector.saveMetrics(getReadSourceName()); + metricsCollector.saveMetrics(getReadSourceName().get(0)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/MeanQualityByCycleSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/MeanQualityByCycleSpark.java index e26bca3a1d7..9e2112ccb1e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/MeanQualityByCycleSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/MeanQualityByCycleSpark.java @@ -197,7 +197,7 @@ HistogramGeneratorPair merge(final HistogramGeneratorPair other){ protected void runTool(final JavaSparkContext ctx) { final JavaRDD reads = getReads(); final MetricsFile metricsFile = calculateMeanQualityByCycle(reads); - saveResults(metricsFile, getHeaderForReads(), getReadSourceName()); + saveResults(metricsFile, getHeaderForReads(), getReadSourceName().get(0)); } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/MetricsCollectorSparkTool.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/MetricsCollectorSparkTool.java index 8eab776bb48..1737c1e8ad4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/MetricsCollectorSparkTool.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/MetricsCollectorSparkTool.java @@ -70,7 +70,7 @@ protected void runTool( JavaSparkContext ctx ) { getHeaderForReads().getSortOrder(), getExpectedSortOrder(), false, - getReadSourceName() + getReadSourceName().get(0) ); // Execute the collector lifecycle @@ -84,7 +84,7 @@ protected void runTool( JavaSparkContext ctx ) { initialize(collectorArgs, getHeaderForReads(), getDefaultHeaders()); final JavaRDD filteredReads = getReads(); collectMetrics(filteredReads, getHeaderForReads()); - saveMetrics(getReadSourceName()); + saveMetrics(getReadSourceName().get(0)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/QualityScoreDistributionSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/QualityScoreDistributionSpark.java index 9b84e61c8a7..cf8977f98cd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/QualityScoreDistributionSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pipelines/metrics/QualityScoreDistributionSpark.java @@ -134,7 +134,7 @@ protected void runTool(final JavaSparkContext ctx) { (counts1, counts2) -> counts1.merge(counts2)); final MetricsFile metrics = makeMetrics(result); - saveResults(metrics, getHeaderForReads(), getReadSourceName()); + saveResults(metrics, getHeaderForReads(), getReadSourceName().get(0)); } //Convert the count object into a metrics object so save in a report diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java index 98051ad0e96..56b60ffeef0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/transforms/markduplicates/MarkDuplicatesSpark.java @@ -17,7 +17,9 @@ import org.broadinstitute.hellbender.engine.filters.ReadFilter; import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary; import org.broadinstitute.hellbender.engine.spark.GATKSparkTool; +import org.broadinstitute.hellbender.engine.spark.datasources.ReadsSparkSource; import org.broadinstitute.hellbender.exceptions.GATKException; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.read.ReadUtils; @@ -69,6 +71,11 @@ public List getDefaultReadFilters() { // Reads with this marker will be treated and marked as optical duplicates public static int OPTICAL_DUPLICATE_MARKER = -2; + @Override + public ReadInputMergingPolicy getReadInputMergingPolicy() { + return ReadInputMergingPolicy.concatMerge; + } + /** * Main method for marking duplicates, takes an JavaRDD of GATKRead and an associated SAMFileHeader with corresponding * sorting information and returns a new JavaRDD\ in which all read templates have been marked as duplicates @@ -215,6 +222,14 @@ public int getPartition(Object key) { @Override protected void runTool(final JavaSparkContext ctx) { + // Check if we are using multiple inputs that the headers are all in the correct querygrouped ordering + Map headerMap = getReadSouceHeaderMap(); + if (headerMap.size() > 1) { + headerMap.entrySet().stream().forEach(h -> {if(!ReadUtils.isReadNameGroupedBam(h.getValue())) { + throw new UserException("Multiple inputs to MarkDuplicatesSpark detected but input "+h.getKey()+" was sorted in "+h.getValue().getSortOrder()+" order"); + }}); + } + JavaRDD reads = getReads(); final OpticalDuplicateFinder finder = opticalDuplicatesArgumentCollection.READ_NAME_REGEX != null ? new OpticalDuplicateFinder(opticalDuplicatesArgumentCollection.READ_NAME_REGEX, opticalDuplicatesArgumentCollection.OPTICAL_DUPLICATE_PIXEL_DISTANCE, null) : null; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java index 312855b194e..7d47536bc4e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/pipelines/MarkDuplicatesSparkIntegrationTest.java @@ -8,6 +8,7 @@ import org.broadinstitute.hellbender.cmdline.argumentcollections.MarkDuplicatesSparkArgumentCollection; import org.broadinstitute.hellbender.engine.ReadsDataSource; import org.broadinstitute.hellbender.engine.spark.GATKSparkTool; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.tools.spark.transforms.markduplicates.MarkDuplicatesSpark; import org.broadinstitute.hellbender.tools.walkers.markduplicates.AbstractMarkDuplicatesCommandLineProgramTest; @@ -61,28 +62,35 @@ public Object[][] md(){ //Note: in each of those cases, we'd really want to pass null as the last parameter (not 0L) but IntelliJ // does not like it and skips the test (rendering issue) - so we pass 0L and account for it at test time // (see comment in testMarkDuplicatesSparkIntegrationTestLocal) - {new File(TEST_DATA_DIR,"example.chr1.1-1K.unmarkedDups.noDups.bam"), 20, 0, + {new File[]{new File(TEST_DATA_DIR,"example.chr1.1-1K.unmarkedDups.noDups.bam")}, 20, 0, ImmutableMap.of("Solexa-16419", ImmutableList.of(0L, 3L, 0L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16416", ImmutableList.of(0L, 1L, 0L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16404", ImmutableList.of(0L, 3L, 0L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16406", ImmutableList.of(0L, 1L, 0L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16412", ImmutableList.of(0L, 1L, 0L, 0L, 0L, 0L, 0.0, 0L))}, - {new File(TEST_DATA_DIR,"example.chr1.1-1K.unmarkedDups.bam"), 90, 6, + {new File[]{new File(TEST_DATA_DIR,"example.chr1.1-1K.unmarkedDups.bam")}, 90, 6, ImmutableMap.of("Solexa-16419", ImmutableList.of(4L, 4L, 4L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16416", ImmutableList.of(2L, 2L, 2L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16404", ImmutableList.of(3L, 9L, 3L, 0L, 2L, 0L, 0.190476, 17L), "Solexa-16406", ImmutableList.of(1L, 10L, 1L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16412", ImmutableList.of(3L, 6L, 3L, 0L, 1L, 0L, 0.133333, 15L))}, - {new File(TEST_DATA_DIR,"example.chr1.1-1K.markedDups.bam"), 90, 6, + {new File[]{new File(TEST_DATA_DIR,"example.chr1.1-1K.markedDups.bam")}, 90, 6, ImmutableMap.of("Solexa-16419", ImmutableList.of(4L, 4L, 4L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16416", ImmutableList.of(2L, 2L, 2L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16404", ImmutableList.of(3L, 9L, 3L, 0L, 2L, 0L, 0.190476, 17L), "Solexa-16406", ImmutableList.of(1L, 10L, 1L, 0L, 0L, 0L, 0.0, 0L), "Solexa-16412", ImmutableList.of(3L, 6L, 3L, 0L, 1L, 0L, 0.133333, 15L))}, - {new File(TEST_DATA_DIR, "optical_dupes.bam"), 4, 2, + {new File[]{new File(TEST_DATA_DIR, "optical_dupes.bam")}, 4, 2, ImmutableMap.of("mylib", ImmutableList.of(0L, 2L, 0L, 0L, 1L, 1L, 0.5, 0L))}, - {new File(TEST_DATA_DIR, "optical_dupes_casava.bam"), 4, 2, + {new File[]{new File(TEST_DATA_DIR, "optical_dupes_casava.bam")}, 4, 2, ImmutableMap.of("mylib", ImmutableList.of(0L, 2L, 0L, 0L, 1L, 1L, 0.5, 0L))}, + {new File[]{new File(TEST_DATA_DIR, "optical_dupes.queryname.bam"), new File(TEST_DATA_DIR, "example.chr1.1-1K.markedDups.queryname.bam")}, 94, 8, + ImmutableMap.builder().put("mylib", ImmutableList.of(0L, 2L, 0L, 0L, 1L, 1L, 0.5, 0L)) + .put("Solexa-16419", ImmutableList.of(4L, 4L, 4L, 0L, 0L, 0L, 0.0, 0L)) + .put("Solexa-16416", ImmutableList.of(2L, 2L, 2L, 0L, 0L, 0L, 0.0, 0L)) + .put("Solexa-16404", ImmutableList.of(3L, 9L, 3L, 0L, 2L, 0L, 0.190476, 17L)) + .put("Solexa-16406", ImmutableList.of(1L, 10L, 1L, 0L, 0L, 0L, 0.0, 0L)) + .put("Solexa-16412", ImmutableList.of(3L, 6L, 3L, 0L, 1L, 0L, 0.133333, 15L)).build()}, }; } @@ -99,11 +107,13 @@ public void testMappedPairAndMappedFragmentAndMatePairSecondUnmapped() { @Test( dataProvider = "md") public void testMarkDuplicatesSparkIntegrationTestLocal( - final File input, final long totalExpected, final long dupsExpected, + final File[] inputFiles, final long totalExpected, final long dupsExpected, Map> metricsExpected) throws IOException { ArgumentsBuilder args = new ArgumentsBuilder(); - args.addArgument(StandardArgumentDefinitions.INPUT_LONG_NAME, input.getPath()); + for (File input : inputFiles) { + args.addArgument(StandardArgumentDefinitions.INPUT_LONG_NAME,input.getPath()); + } args.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME); File outputFile = createTempFile("markdups", ".bam"); @@ -184,12 +194,13 @@ protected List getGatkDuplicationMetrics(MetricsFile> metricsExpected) throws IOException { ArgumentsBuilder args = new ArgumentsBuilder(); - args.add("--" + StandardArgumentDefinitions.INPUT_LONG_NAME); - args.add(input.getPath()); + for (File input : inputFiles) { + args.addArgument(StandardArgumentDefinitions.INPUT_LONG_NAME,input.getPath()); + } args.add("--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME); File outputFile = createTempFile("markdups", ".bam"); @@ -248,12 +259,13 @@ public void testMarkDuplicatesSparkMarkingOpticalDuplicatesWithTagging( @Test( dataProvider = "md") // Testing the DUPLICATE_TAGGING_POLICY_LONG_NAME = ALL option. public void testMarkDuplicatesSparkMarkingAllDuplicatesWithTagging( - final File input, final long totalExpected, final long dupsExpected, + final File[] inputFiles, final long totalExpected, final long dupsExpected, Map> metricsExpected) throws IOException { ArgumentsBuilder args = new ArgumentsBuilder(); - args.add("--" + StandardArgumentDefinitions.INPUT_LONG_NAME); - args.add(input.getPath()); + for (File input : inputFiles) { + args.addArgument(StandardArgumentDefinitions.INPUT_LONG_NAME,input.getPath()); + } args.add("--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME); File outputFile = createTempFile("markdups", ".bam"); @@ -319,12 +331,13 @@ public void testMarkDuplicatesSparkMarkingAllDuplicatesWithTagging( @Test( dataProvider = "md") public void testMarkDuplicatesSparkDeletingDuplicateReads( - final File input, final long totalExpected, final long dupsExpected, + final File[] inputFiles, final long totalExpected, final long dupsExpected, Map> metricsExpected) throws IOException { ArgumentsBuilder args = new ArgumentsBuilder(); - args.add("--"+ StandardArgumentDefinitions.INPUT_LONG_NAME); - args.add(input.getPath()); + for (File input : inputFiles) { + args.addArgument(StandardArgumentDefinitions.INPUT_LONG_NAME,input.getPath()); + } args.add("--"+StandardArgumentDefinitions.OUTPUT_LONG_NAME); File outputFile = createTempFile("markdups", ".bam"); @@ -373,12 +386,13 @@ public void testMarkDuplicatesSparkDeletingDuplicateReads( @Test( dataProvider = "md") public void testMarkDuplicatesSparkDeletingOpticalDuplicateReads( - final File input, final long totalExpected, final long dupsExpected, + final File[] inputFiles, final long totalExpected, final long dupsExpected, Map> metricsExpected) throws IOException { ArgumentsBuilder args = new ArgumentsBuilder(); - args.add("--" + StandardArgumentDefinitions.INPUT_LONG_NAME); - args.add(input.getPath()); + for (File input : inputFiles) { + args.addArgument(StandardArgumentDefinitions.INPUT_LONG_NAME,input.getPath()); + } args.add("--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME); File outputFile = createTempFile("markdups", ".bam"); @@ -448,4 +462,16 @@ public void testHashCollisionHandling() { Assert.assertEquals(actualReads.size(), 4, "Wrong number of reads output"); } } + + @Test (expectedExceptions = UserException.class) + public void testAssertCorrectSortOrderMultipleBams() { + // This test asserts that two bams with different sort orders will throw an exception for MarkDuplicatesSpark if both + // are supplied as inputs to the tool (currently we require all bams in multi-inputs to be querygroup/queryname sorted). + final File output = createTempFile("supplementaryReadUnmappedMate", "bam"); + final ArgumentsBuilder args = new ArgumentsBuilder(); + args.addOutput(output); + args.addInput(new File(TEST_DATA_DIR,"optical_dupes.bam")); + args.addInput(new File(TEST_DATA_DIR,"example.chr1.1-1K.unmarkedDups.noDups.bam")); + runCommandLine(args); + } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/example.chr1.1-1K.markedDups.queryname.bam b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/example.chr1.1-1K.markedDups.queryname.bam new file mode 100644 index 00000000000..2ff5cd838e4 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/example.chr1.1-1K.markedDups.queryname.bam differ diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/optical_dupes.queryname.bam b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/optical_dupes.queryname.bam new file mode 100644 index 00000000000..137afb09f19 Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/MarkDuplicatesGATK/optical_dupes.queryname.bam differ