diff --git a/src/main/java/picard/analysis/CollectMultipleMetrics.java b/src/main/java/picard/analysis/CollectMultipleMetrics.java
index a38adea493..099ae50f1d 100644
--- a/src/main/java/picard/analysis/CollectMultipleMetrics.java
+++ b/src/main/java/picard/analysis/CollectMultipleMetrics.java
@@ -26,17 +26,33 @@
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.StringUtil;
+import org.apache.commons.lang3.StringUtils;
import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.CommandLineException;
+import org.broadinstitute.barclay.argparser.CommandLineParser;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.analysis.artifacts.CollectSequencingArtifactMetrics;
import picard.analysis.directed.RnaSeqMetricsCollector;
import picard.cmdline.CommandLineProgram;
-import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
+
import java.io.File;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
/**
* Class that is designed to instantiate and execute multiple metrics programs that extend
@@ -59,8 +75,8 @@ public class CollectMultipleMetrics extends CommandLineProgram {
* Includes a method for determining whether or not a Program explicitly needs a reference sequence (i.e. cannot be null)
*/
- static final String USAGE_SUMMARY ="Collect multiple classes of metrics. ";
- static final String USAGE_DETAILS ="This 'meta-metrics' tool runs one or more of the metrics collection modules at the same" +
+ static final String USAGE_SUMMARY = "Collect multiple classes of metrics.";
+ static final String USAGE_DETAILS = "This 'meta-metrics' tool runs one or more of the metrics collection modules at the same" +
" time to cut down on the time spent reading in data from input files. Available modules include " +
"CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, QualityScoreDistribution, MeanQualityByCycle, " +
"CollectBaseDistributionByCycle, CollectGcBiasMetrics, RnaSeqMetrics, CollectSequencingArtifactMetrics" +
@@ -88,22 +104,25 @@ public class CollectMultipleMetrics extends CommandLineProgram {
" R=reference_sequence.fasta \\ " +
" PROGRAM=null \\ " +
" PROGRAM=QualityScoreDistribution \\ " +
- " PROGRAM=MeanQualityByCycle "+
+ " PROGRAM=MeanQualityByCycle " +
"" +
"
";
- public static interface ProgramInterface {
- /** By default, this method calls the
+ public interface ProgramInterface {
+
+ /**
+ * By default, this method calls the
* {@link #makeInstance(String, String, File, File, Set, File, File, File, Set)} method
- * without 'includeUnpaired' parameter. */
- default SinglePassSamProgram makeInstance(final String outbase,
- final String outext,
- final File input,
- final File reference,
- final Set metricAccumulationLevel,
- final File dbSnp, final File intervals,
- final File refflat, Set ignoreSequence,
- final boolean includeUnpaired) {
+ * without 'includeUnpaired' parameter.
+ */
+ default SinglePassSamProgram makeInstance(final String outbase,
+ final String outext,
+ final File input,
+ final File reference,
+ final Set metricAccumulationLevel,
+ final File dbSnp, final File intervals,
+ final File refflat, Set ignoreSequence,
+ final boolean includeUnpaired) {
return makeInstance(outbase, outext, input,
reference,
@@ -122,7 +141,7 @@ SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence);
+ final Set ignoreSequence);
default boolean needsReferenceSequence() {
return false;
@@ -137,9 +156,8 @@ default boolean supportsMetricAccumulationLevel() {
}
}
- public static enum Program implements ProgramInterface {
+ public enum Program implements ProgramInterface {
CollectAlignmentSummaryMetrics {
-
@Override
public boolean supportsMetricAccumulationLevel() {
return true;
@@ -154,7 +172,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
final CollectAlignmentSummaryMetrics program = new CollectAlignmentSummaryMetrics();
program.OUTPUT = new File(outbase + ".alignment_summary_metrics" + outext);
@@ -170,7 +188,6 @@ public SinglePassSamProgram makeInstance(final String outbase,
},
CollectInsertSizeMetrics {
-
@Override
public boolean supportsMetricAccumulationLevel() {
return true;
@@ -185,7 +202,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
final CollectInsertSizeMetrics program = new CollectInsertSizeMetrics();
program.OUTPUT = new File(outbase + ".insert_size_metrics" + outext);
program.Histogram_FILE = new File(outbase + ".insert_size_histogram.pdf");
@@ -201,7 +218,6 @@ public SinglePassSamProgram makeInstance(final String outbase,
},
QualityScoreDistribution {
-
@Override
public SinglePassSamProgram makeInstance(final String outbase, final String outext, final File input,
final File reference,
@@ -209,7 +225,7 @@ public SinglePassSamProgram makeInstance(final String outbase, final String oute
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
final QualityScoreDistribution program = new QualityScoreDistribution();
program.OUTPUT = new File(outbase + ".quality_distribution_metrics" + outext);
program.CHART_OUTPUT = new File(outbase + ".quality_distribution.pdf");
@@ -224,7 +240,6 @@ public SinglePassSamProgram makeInstance(final String outbase, final String oute
},
MeanQualityByCycle {
-
@Override
public SinglePassSamProgram makeInstance(final String outbase,
final String outext,
@@ -234,7 +249,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
final MeanQualityByCycle program = new MeanQualityByCycle();
program.OUTPUT = new File(outbase + ".quality_by_cycle_metrics" + outext);
program.CHART_OUTPUT = new File(outbase + ".quality_by_cycle.pdf");
@@ -249,7 +264,6 @@ public SinglePassSamProgram makeInstance(final String outbase,
},
CollectBaseDistributionByCycle {
-
@Override
public SinglePassSamProgram makeInstance(final String outbase,
final String outext,
@@ -259,7 +273,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
final CollectBaseDistributionByCycle program = new CollectBaseDistributionByCycle();
program.OUTPUT = new File(outbase + ".base_distribution_by_cycle_metrics" + outext);
program.CHART_OUTPUT = new File(outbase + ".base_distribution_by_cycle.pdf");
@@ -274,7 +288,6 @@ public SinglePassSamProgram makeInstance(final String outbase,
},
CollectGcBiasMetrics {
-
@Override
public boolean needsReferenceSequence() {
return true;
@@ -294,7 +307,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
final CollectGcBiasMetrics program = new CollectGcBiasMetrics();
program.OUTPUT = new File(outbase + ".gc_bias.detail_metrics" + outext);
program.SUMMARY_OUTPUT = new File(outbase + ".gc_bias.summary_metrics" + outext);
@@ -315,7 +328,6 @@ public SinglePassSamProgram makeInstance(final String outbase,
},
RnaSeqMetrics {
-
@Override
public boolean needsRefflatFile() {
return true;
@@ -335,9 +347,9 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
final CollectRnaSeqMetrics program = new CollectRnaSeqMetrics();
- program.OUTPUT = new File(outbase + ".rna_metrics" + outext);
+ program.OUTPUT = new File(outbase + ".rna_metrics" + outext);
program.CHART_OUTPUT = new File(outbase + ".rna_coverage.pdf");
// Generally programs should not be accessing these directly but it might make things smoother
// to just set them anyway. These are set here to make sure that in case of a the derived class
@@ -354,7 +366,6 @@ public SinglePassSamProgram makeInstance(final String outbase,
},
CollectSequencingArtifactMetrics {
-
@Override
public boolean needsReferenceSequence() {
return true;
@@ -369,7 +380,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
return makeInstance(outbase, outext, input,
reference,
@@ -380,6 +391,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
ignoreSequence,
false);
}
+
@Override
public SinglePassSamProgram makeInstance(final String outbase,
final String outext,
@@ -389,7 +401,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence,
+ final Set ignoreSequence,
final boolean includeUnpaired) {
final CollectSequencingArtifactMetrics program = new CollectSequencingArtifactMetrics();
program.OUTPUT = new File(outbase);
@@ -408,7 +420,6 @@ public SinglePassSamProgram makeInstance(final String outbase,
},
CollectQualityYieldMetrics {
-
@Override
public SinglePassSamProgram makeInstance(final String outbase,
final String outext,
@@ -418,7 +429,7 @@ public SinglePassSamProgram makeInstance(final String outbase,
final File dbSnp,
final File intervals,
final File refflat,
- final Set ignoreSequence) {
+ final Set ignoreSequence) {
final CollectQualityYieldMetrics program = new CollectQualityYieldMetrics();
program.OUTPUT = new File(outbase + ".quality_yield_metrics" + outext);
// Generally programs should not be accessing these directly but it might make things smoother
@@ -450,13 +461,13 @@ public SinglePassSamProgram makeInstance(final String outbase,
// We'll use this to init the command-line arg and for validation later.
private final Set accumLevelDefault = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
- @Argument(shortName="LEVEL",
- doc="The level(s) at which to accumulate metrics.")
+ @Argument(shortName = "LEVEL",
+ doc = "The level(s) at which to accumulate metrics.")
public Set METRIC_ACCUMULATION_LEVEL = new HashSet<>(accumLevelDefault);
@Argument(shortName = "EXT",
- doc="Append the given file extension to all metric file names (ex. OUTPUT.insert_size_metrics.EXT). None if null",
- optional=true)
+ doc = "Append the given file extension to all metric file names (ex. OUTPUT.insert_size_metrics.EXT). None if null",
+ optional = true)
public String FILE_EXTENSION = null;
@Argument(doc = "Set of metrics programs to apply during the pass through the SAM file.")
@@ -478,12 +489,12 @@ public SinglePassSamProgram makeInstance(final String outbase,
optional = true)
public File DB_SNP;
- @Argument(doc="Gene annotations in refFlat form. " +
+ @Argument(doc = "Gene annotations in refFlat form. " +
"Format described here: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat",
optional = true)
public File REF_FLAT;
- @Argument(doc="If a read maps to a sequence specified with this option, " +
+ @Argument(doc = "If a read maps to a sequence specified with this option, " +
"all the bases in the read are counted as ignored bases.",
optional = true)
public Set IGNORE_SEQUENCE = new HashSet<>();
@@ -491,9 +502,21 @@ public SinglePassSamProgram makeInstance(final String outbase,
@Argument(shortName = "UNPAIRED",
doc = "Include unpaired reads in CollectSequencingArtifactMetrics. " +
"If set to true then all paired reads will be included as well - " +
- "MINIMUM_INSERT_SIZE and MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics.")
+ "MINIMUM_INSERT_SIZE and MAXIMUM_INSERT_SIZE will be ignored in CollectSequencingArtifactMetrics.")
public boolean INCLUDE_UNPAIRED = false;
+ @Argument(doc="extra arguments to the various tools can be specified using the following format:" +
+ ":: where is one of the programs specified in PROGRAM, " +
+ "and are the argument and value that you'd like to specify as you would on the command line. " +
+ "For example, to change the HISTOGRAM_WIDTH in CollectInsertSizeMetrics to 200, use:\n " +
+ "\"EXTRA_ARGUMENT=CollectInsertSizeMetrics::HISTOGRAM_WIDTH=200\"\n " +
+ "or, in the new parser:" +
+ "--EXTRA_ARGUMENT \"CollectInsertSizeMetrics::--HISTOGRAM_WIDTH 200\"\n " +
+ "(Quotes are required to avoid the shell from separating this into two arguments.) " +
+ "Note that the following arguments cannot be modified on a per-program level: INPUT, REFERENCE_SEQUENCE, ASSUME_SORTED, and STOP_AFTER. " +
+ "Providing them in an EXTRA_ARGUMENT will _not_ result in an error, but they will be silently ignored. " , optional = true)
+ public List EXTRA_ARGUMENT = null;
+
/**
* Contents of PROGRAM set is transferred to this set during command-line validation, so that an outside
* developer can invoke this class programmatically and provide alternative Programs to run by calling
@@ -533,6 +556,8 @@ public int doWork() {
OUTPUT = OUTPUT.substring(0, OUTPUT.length() - 1);
}
+ final Map> additionalArguments = processAdditionalArguments(EXTRA_ARGUMENT);
+
final List programs = new ArrayList<>();
for (final ProgramInterface program : programsToRun) {
if (program.needsReferenceSequence() && REFERENCE_SEQUENCE == null) {
@@ -557,11 +582,53 @@ public int doWork() {
REF_FLAT,
IGNORE_SEQUENCE,
INCLUDE_UNPAIRED);
+
+ if (additionalArguments.containsKey(program)) {
+ final CommandLineParser commandLineParser = getCommandLineParser(instance);
+ final boolean success = commandLineParser.parseArguments(System.err, additionalArguments.get(program).toArray(new String[0]));
+ if (!success) {
+ throw new CommandLineException("Failed to parse arguments ["+ String.join(",", additionalArguments.get(program))+ "] for " + program);
+ }
+ additionalArguments.remove(program);
+ }
+
instance.setDefaultHeaders(getDefaultHeaders());
programs.add(instance);
}
+ if (!additionalArguments.isEmpty()) {
+ throw new CommandLineException("EXTRA_ARGUMENT values were provided, but corresponding PROGRAM wasn't requested:" +
+ additionalArguments.entrySet().stream().map(e -> e.getKey().toString() + "::" + e.getValue().toString()).collect(Collectors.joining()));
+ }
SinglePassSamProgram.makeItSo(INPUT, REFERENCE_SEQUENCE, ASSUME_SORTED, STOP_AFTER, programs);
return 0;
}
+
+ private static Map> processAdditionalArguments(final List arguments) {
+ final Map> map = new HashMap<>();
+ final Pattern pattern = Pattern.compile("(?.*)::(?.+?)( +(?.+))?");
+ for (String str : arguments) {
+ final Matcher matcher = pattern.matcher(str);
+ if (!matcher.matches()) {
+ throw new CommandLineException("couldn't understand EXTRA_ARGUMENT " + str +
+ " it doesn't conform to the form '::'.");
+ }
+ final String programName = matcher.group("program");
+ final ProgramInterface program;
+ try {
+ program = Program.valueOf(programName);
+ } catch (IllegalArgumentException e) {
+ throw new CommandLineException("Couldn't find program with value " + programName, e);
+ }
+ final String argumentAndValue = matcher.group("argumentAndValue");
+
+ map.computeIfAbsent(program, (k) -> new ArrayList<>()).add(argumentAndValue);
+
+ final String optionalValue = matcher.group("optionalValue");
+ if (!StringUtils.isEmpty(optionalValue)) {
+ map.get(program).add(optionalValue);
+ }
+ }
+ return map;
+ }
}
diff --git a/src/main/java/picard/cmdline/CommandLineProgram.java b/src/main/java/picard/cmdline/CommandLineProgram.java
index 1caf8ebd2b..93f349721c 100644
--- a/src/main/java/picard/cmdline/CommandLineProgram.java
+++ b/src/main/java/picard/cmdline/CommandLineProgram.java
@@ -377,14 +377,20 @@ public String getStandardUsagePreamble() {
*/
public CommandLineParser getCommandLineParser() {
if (commandLineParser == null) {
- commandLineParser = useLegacyParser(getClass()) ?
- new LegacyCommandLineArgumentParser(this) :
- new CommandLineArgumentParser(this,
- Collections.EMPTY_LIST,
- new HashSet<>(Collections.singleton(CommandLineParserOptions.APPEND_TO_COLLECTIONS)));
+ commandLineParser = getCommandLineParser(this);
}
return commandLineParser;
}
+ /**
+ * @return Return a newly minted command line parser for the provided object.
+ */
+ static public CommandLineParser getCommandLineParser(Object o) {
+ return useLegacyParser(o.getClass()) ?
+ new LegacyCommandLineArgumentParser(o) :
+ new CommandLineArgumentParser(o,
+ Collections.EMPTY_LIST,
+ new HashSet<>(Collections.singleton(CommandLineParserOptions.APPEND_TO_COLLECTIONS)));
+ }
/**
* Return true if the Picard command line parser should be used in place of the Barclay command line parser,
diff --git a/src/main/java/picard/cmdline/CommandLineSyntaxTranslater.java b/src/main/java/picard/cmdline/CommandLineSyntaxTranslater.java
index 8403195ac8..d7d2ace180 100644
--- a/src/main/java/picard/cmdline/CommandLineSyntaxTranslater.java
+++ b/src/main/java/picard/cmdline/CommandLineSyntaxTranslater.java
@@ -1,6 +1,7 @@
package picard.cmdline;
-import java.util.*;
+import java.util.Arrays;
+import java.util.List;
import java.util.stream.Collectors;
/**
@@ -32,6 +33,12 @@ public static String[] convertPicardStyleToPosixStyle(final String argv[]) {
if (splitArgPair.length == 1) { // assume positional arg
return Arrays.stream(new String[]{ originalArgPair });
} else if (splitArgPair.length == 2) {
+ //deal with EXTRA_ARGUMENT in CollectMultipleMetrics
+ if (splitArgPair[0].equals("EXTRA_ARGUMENT")) {
+ splitArgPair[1] = splitArgPair[1]
+ .replace("::", "::" + BARCLAY_LONG_OPTION_PREFIX)
+ .replace(LEGACY_VALUE_SEPARATOR, " ");
+ }
// it doesn't matter whether we use the short short name token ("-") or the long name token
// ("--"), so just treat everything as if it were a short name, since the CLP will accept either
return Arrays.stream(new String[]{BARCLAY_SHORT_OPTION_PREFIX + splitArgPair[0], splitArgPair[1]});
diff --git a/src/test/java/picard/analysis/CollectMultipleMetricsTest.java b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java
index 32588c3380..2e29853002 100644
--- a/src/test/java/picard/analysis/CollectMultipleMetricsTest.java
+++ b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java
@@ -1,5 +1,6 @@
package picard.analysis;
+import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
@@ -10,21 +11,28 @@
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.BufferedLineReader;
import org.apache.commons.lang3.StringUtils;
+import org.broadinstitute.barclay.argparser.CommandLineException;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramTest;
import picard.sam.SortSam;
-import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.*;
+
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashSet;
+import java.util.List;
import java.util.Random;
import java.util.Set;
+import static picard.analysis.GcBiasMetricsCollector.PerUnitGcBiasMetricsCollector.ACCUMULATION_LEVEL_ALL_READS;
+
/**
* Tests the two default "programs" that have tests in CollectMultipleMetrics
*
@@ -51,11 +59,23 @@ public void testAlignmentSummaryViaMultipleMetrics() throws IOException {
"METRIC_ACCUMULATION_LEVEL=" + MetricAccumulationLevel.ALL_READS.name(),
"PROGRAM=null",
"PROGRAM=" + CollectMultipleMetrics.Program.CollectAlignmentSummaryMetrics.name(),
- "PROGRAM=" + CollectMultipleMetrics.Program.CollectInsertSizeMetrics.name()
+ "PROGRAM=" + CollectMultipleMetrics.Program.CollectInsertSizeMetrics.name(),
+ "EXTRA_ARGUMENT=CollectInsertSizeMetrics::HISTOGRAM_WIDTH= 58",
+ "EXTRA_ARGUMENT=CollectInsertSizeMetrics::METRIC_ACCUMULATION_LEVEL=LIBRARY",
+ "EXTRA_ARGUMENT=CollectInsertSizeMetrics::METRIC_ACCUMULATION_LEVEL=READ_GROUP",
+
};
Assert.assertEquals(runPicardCommandLine(args), 0);
- final MetricsFile> output = new MetricsFile>();
+ final MetricsFile> outputISM = new MetricsFile<>();
+ outputISM.read(new FileReader(outfile + ".insert_size_metrics"));
+ Assert.assertEquals(outputISM.getMetrics().size(), 3);
+
+ for (final InsertSizeMetrics metrics : outputISM.getMetrics()) {
+ Assert.assertEquals(metrics.MEAN_INSERT_SIZE, 40D);
+ }
+
+ final MetricsFile> output = new MetricsFile<>();
output.read(new FileReader(outfile + ".alignment_summary_metrics"));
for (final AlignmentSummaryMetrics metrics : output.getMetrics()) {
@@ -101,6 +121,48 @@ public void testAlignmentSummaryViaMultipleMetrics() throws IOException {
}
}
+ @DataProvider
+ Object[][] extraArgumentValue() {
+ List