Skip to content

Commit

Permalink
adding --sort-order option to SortSamSpark
Browse files Browse the repository at this point in the history
adding a --sort-order option to SortSamSpark to let users specify the what order to sort in
enabling disabled tests
fixing the tests which weren't actually asserting anything

closes #1260
  • Loading branch information
lbergelson committed Mar 20, 2018
1 parent dac6311 commit 74bcb52
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.Utils;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;
import org.broadinstitute.hellbender.engine.filters.ReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary;
Expand All @@ -27,17 +29,29 @@
public final class SortSamSpark extends GATKSparkTool {
private static final long serialVersionUID = 1L;

public static final String SORT_ORDER_LONG_NAME = "sort-order";
@Override
public boolean requiresReads() { return true; }

@Argument(doc="the output file path", shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, optional = false)
protected String outputFile;
private String outputFile;

@Argument(doc="the order to sort the file into", fullName = SORT_ORDER_LONG_NAME, optional = true)
private SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;

@Override
public List<ReadFilter> getDefaultReadFilters() {
return Collections.singletonList(ReadFilterLibrary.ALLOW_ALL_READS);
}

@Override
protected void onStartup() {
if( sortOrder.getComparatorInstance() == null){
throw new UserException.BadInput("Cannot sort a file in " + sortOrder + " order. That ordering doesnt define a valid comparator. "
+ "Please choose a valid sort order");
}
}

@Override
protected void runTool(final JavaSparkContext ctx) {
JavaRDD<GATKRead> reads = getReads();
Expand All @@ -55,7 +69,7 @@ protected void runTool(final JavaSparkContext ctx) {
} else {
sortedReads = reads; // sorting is done by writeReads below
}
readsHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
readsHeader.setSortOrder(sortOrder);
writeReads(ctx, outputFile, sortedReads);
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package org.broadinstitute.hellbender.tools.spark.pipelines;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.ValidationStringency;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.test.ArgumentsBuilder;
import org.broadinstitute.hellbender.utils.test.SamAssertionUtils;
import org.testng.annotations.DataProvider;
Expand All @@ -20,9 +23,8 @@ public Object[][] sortBAMData() {
{"count_reads.cram", "count_reads_sorted.cram", "count_reads.fasta", ".cram", "coordinate"},
{"count_reads.bam", "count_reads_sorted.bam", "count_reads.fasta", ".cram", "coordinate"},

//SortBamSpark is missing SORT_ORDER parameter https://github.com/broadinstitute/gatk/issues/1260
// {"count_reads.bam", "count_reads.bam", null, ".bam", "queryname"},
// {"count_reads.cram", "count_reads.cram", "count_reads.fasta", ".cram", "queryname"},
{"count_reads.bam", "count_reads.bam", null, ".bam", "queryname"},
{"count_reads.cram", "count_reads.cram", "count_reads.fasta", ".cram", "queryname"},
};
}

Expand All @@ -38,21 +40,17 @@ public void testSortBAMs(
final File actualOutputFile = createTempFile("sort_sam", outputExtension);
File referenceFile = null == referenceFileName ? null : new File(getTestDataDir(), referenceFileName);
ArgumentsBuilder args = new ArgumentsBuilder();
args.add("--input"); args.add(inputFile.getCanonicalPath());
args.add("--output"); args.add(actualOutputFile.getCanonicalPath());
args.addInput(inputFile);
args.addOutput(actualOutputFile);
if (null != referenceFile) {
args.add("--R");
args.add(referenceFile.getAbsolutePath());
args.addReference(referenceFile);
}
args.add("--num-reducers"); args.add("1");
args.addArgument(GATKSparkTool.NUM_REDUCERS_LONG_NAME, "1");
args.addArgument(SortSamSpark.SORT_ORDER_LONG_NAME, sortOrderName);

//https://github.com/broadinstitute/gatk/issues/1260
// args.add("--SORT_ORDER");
// args.add(sortOrderName);
this.runCommandLine(args);

this.runCommandLine(args.getArgsArray());

SamAssertionUtils.samsEqualStringent(actualOutputFile, expectedOutputFile, ValidationStringency.DEFAULT_STRINGENCY, referenceFile);
SamAssertionUtils.assertSamsEqual(actualOutputFile, expectedOutputFile, ValidationStringency.DEFAULT_STRINGENCY, referenceFile);
}

@Test(groups = "spark")
Expand All @@ -61,13 +59,32 @@ public void test() throws Exception {
final File sortedBam = new File(getTestDataDir(), "count_reads_sorted.bam");
final File outputBam = createTempFile("sort_bam_spark", ".bam");
ArgumentsBuilder args = new ArgumentsBuilder();
args.add("--"+ StandardArgumentDefinitions.INPUT_LONG_NAME); args.add(unsortedBam.getCanonicalPath());
args.add("--"+StandardArgumentDefinitions.OUTPUT_LONG_NAME); args.add(outputBam.getCanonicalPath());
args.add("--num-reducers"); args.add("1");
args.addInput(unsortedBam);
args.addOutput(outputBam);
args.addArgument(GATKSparkTool.NUM_REDUCERS_LONG_NAME, "1");

this.runCommandLine(args.getArgsArray());
this.runCommandLine(args);

SamAssertionUtils.assertSamsEqual(outputBam, sortedBam);
}


@DataProvider
public Object[][] getInvalidSortOrders(){
return new Object[][]{
{SAMFileHeader.SortOrder.unknown},
{SAMFileHeader.SortOrder.unsorted}
};
}

@Test(expectedExceptions = UserException.BadInput.class, dataProvider = "getInvalidSortOrders")
public void testBadSortOrders(SAMFileHeader.SortOrder badOrder){
final File unsortedBam = new File(getTestDataDir(), "count_reads.bam");
ArgumentsBuilder args = new ArgumentsBuilder();
args.addInput(unsortedBam);
args.addOutput(createTempFile("sort_bam_spark", ".bam"));
args.addArgument(SortSamSpark.SORT_ORDER_LONG_NAME, badOrder.toString());

this.runCommandLine(args);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
@SQ SN:chr6 LN:101
@SQ SN:chr7 LN:404
@SQ SN:chr8 LN:202
@RG ID:0 SM:Hi,Mom!
@RG ID:0 SM:Hi,Mom! PL:ILLUMINA
@PG ID:1 PN:Hey! VN:2.0
both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
Expand Down

0 comments on commit 74bcb52

Please sign in to comment.