diff --git a/scripts/cnv_wdl/cnv_common_tasks.wdl b/scripts/cnv_wdl/cnv_common_tasks.wdl index 7b8a69f791e..1910ec22e62 100644 --- a/scripts/cnv_wdl/cnv_common_tasks.wdl +++ b/scripts/cnv_wdl/cnv_common_tasks.wdl @@ -301,29 +301,39 @@ task ScatterIntervals { String base_filename = basename(interval_list, ".interval_list") + String dollar = "$" #WDL workaround, see https://github.com/broadinstitute/cromwell/issues/1819 + command <<< set -e + # IntervalListTools will fail if the output directory does not exist, so we create it mkdir ${output_dir_} export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} - - { - >&2 echo "Attempting to run IntervalListTools..." + + # IntervalListTools behaves differently when scattering to a ssingle or multiple shards, so we do some handling in bash + + # IntervalListTools puts remainder intervals in the last shard, so integer division gives the number of shards + # (unless NUM_INTERVALS < num_intervals_per_scatter and NUM_SCATTERS = 0, in which case we still want a single shard) + NUM_INTERVALS=${dollar}(grep -v '@' ${interval_list} | wc -l) + NUM_SCATTERS=${dollar}(echo ${dollar}((NUM_INTERVALS / ${num_intervals_per_scatter}))) + + if [ $NUM_SCATTERS -le 1 ]; then + # if only a single shard is required, then we can just rename the original interval list + >&2 echo "Not running IntervalListTools because only a single shard is required. Copying original interval list..." + cp ${interval_list} ${output_dir_}/${base_filename}.scattered.0001.interval_list + else gatk --java-options "-Xmx${command_mem_mb}m" IntervalListTools \ --INPUT ${interval_list} \ --SUBDIVISION_MODE INTERVAL_COUNT \ --SCATTER_CONTENT ${num_intervals_per_scatter} \ - --OUTPUT ${output_dir_} && + --OUTPUT ${output_dir_} + # output files are named output_dir_/temp_0001_of_N/scattered.interval_list, etc. (N = number of scatters); - # we rename them as output_dir_/base_filename.scattered.0000.interval_list, etc. - ls ${output_dir_}/*/scattered.interval_list | \ + # we rename them as output_dir_/base_filename.scattered.0001.interval_list, etc. + ls -v ${output_dir_}/*/scattered.interval_list | \ cat -n | \ while read n filename; do mv $filename ${output_dir_}/${base_filename}.scattered.$(printf "%04d" $n).interval_list; done rm -rf ${output_dir_}/temp_*_of_* - } || { - # if only a single shard is required, then we can just rename the original interval list - >&2 echo "IntervalListTools failed because only a single shard is required. Copying original interval list..." - cp ${interval_list} ${output_dir_}/${base_filename}.scattered.1.interval_list - } + fi >>> runtime { diff --git a/scripts/cnv_wdl/germline/cnv_germline_case_workflow.wdl b/scripts/cnv_wdl/germline/cnv_germline_case_workflow.wdl index 4828649d625..955855220ea 100644 --- a/scripts/cnv_wdl/germline/cnv_germline_case_workflow.wdl +++ b/scripts/cnv_wdl/germline/cnv_germline_case_workflow.wdl @@ -273,7 +273,6 @@ task DetermineGermlineContigPloidyCaseMode { command <<< set -e - mkdir ${output_dir_} export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} export MKL_NUM_THREADS=${default=8 cpu} export OMP_NUM_THREADS=${default=8 cpu} @@ -369,7 +368,6 @@ task GermlineCNVCallerCaseMode { command <<< set -e - mkdir ${output_dir_} export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} export MKL_NUM_THREADS=${default=8 cpu} export OMP_NUM_THREADS=${default=8 cpu} diff --git a/scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl b/scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl index 450eaa27ae5..a2688283fbe 100644 --- a/scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl +++ b/scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl @@ -374,7 +374,6 @@ task DetermineGermlineContigPloidyCohortMode { command <<< set -e - mkdir ${output_dir_} export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} export MKL_NUM_THREADS=${default=8 cpu} export OMP_NUM_THREADS=${default=8 cpu} @@ -482,7 +481,6 @@ task GermlineCNVCallerCohortMode { command <<< set -e - mkdir ${output_dir_} export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} export MKL_NUM_THREADS=${default=8 cpu} export OMP_NUM_THREADS=${default=8 cpu} diff --git a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl index 63485caaca3..037107135a8 100644 --- a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl +++ b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl @@ -597,7 +597,6 @@ task ModelSegments { command <<< set -e - mkdir ${output_dir_} export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} gatk --java-options "-Xmx${command_mem_mb}m" ModelSegments \ @@ -727,7 +726,6 @@ task PlotDenoisedCopyRatios { command <<< set -e - mkdir ${output_dir_} export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} gatk --java-options "-Xmx${command_mem_mb}m" PlotDenoisedCopyRatios \ @@ -787,7 +785,6 @@ task PlotModeledSegments { command <<< set -e - mkdir ${output_dir_} export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} gatk --java-options "-Xmx${command_mem_mb}m" PlotModeledSegments \ diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java index fde21af0d71..dc733f665ad 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/AnnotateIntervals.java @@ -7,7 +7,6 @@ import org.apache.commons.collections4.IteratorUtils; import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; @@ -157,7 +156,7 @@ public boolean requiresIntervals() { @Override public void onTraversalStart() { - CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(intervalArgumentCollection); + validateArguments(); logger.info("Loading intervals for annotation..."); sequenceDictionary = getBestAvailableSequenceDictionary(); @@ -191,6 +190,11 @@ public void onTraversalStart() { logger.info("Annotating intervals..."); } + private void validateArguments() { + CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(intervalArgumentCollection); + CopyNumberArgumentValidationUtils.validateOutputFiles(outputAnnotatedIntervalsFile); + } + private static void checkForOverlaps(final FeatureManager featureManager, final FeatureInput featureTrackPath, final SAMSequenceDictionary sequenceDictionary) { @@ -229,8 +233,12 @@ public void traverse() { public Object onTraversalSuccess() { reference.close(); features.close(); - logger.info(String.format("Writing annotated intervals to %s...", outputAnnotatedIntervalsFile)); + + logger.info(String.format("Writing annotated intervals to %s...", outputAnnotatedIntervalsFile.getAbsolutePath())); annotatedIntervals.write(outputAnnotatedIntervalsFile); + + logger.info("AnnotateIntervals complete."); + return super.onTraversalSuccess(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java index ebb6f677ecc..fc27ea1a71c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegments.java @@ -3,12 +3,12 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.commons.io.FilenameUtils; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; +import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils; import org.broadinstitute.hellbender.tools.copynumber.caller.SimpleCopyRatioCaller; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledCopyRatioSegmentCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CalledLegacySegmentCollection; @@ -56,6 +56,16 @@ * a row specifying the column headers contained in {@link CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn}, * and the corresponding entry rows. * + *
  • + * CBS-formatted .igv.seg file (compatible with IGV). + * This is a tab-separated values (TSV) file with CBS-format column headers + * (see + * http://software.broadinstitute.org/cancer/software/genepattern/file-formats-guide#CBS) + * and the corresponding entry rows that can be plotted using IGV (see + * + * https://software.broadinstitute.org/software/igv/SEG). + * The mean log2 copy ratio is given in the SEGMENT_MEAN column. + *
  • * * *

    Usage example

    @@ -131,10 +141,11 @@ public final class CallCopyRatioSegments extends CommandLineProgram { ) private double callingCopyRatioZScoreThreshold = 2.; + private File outputCalledLegacySegmentsFile; + @Override protected Object doWork() { - Utils.validateArg(neutralSegmentCopyRatioLowerBound < neutralSegmentCopyRatioUpperBound, - "Copy-neutral lower bound must be less than upper bound."); + validateArguments(); final CopyRatioSegmentCollection copyRatioSegments = new CopyRatioSegmentCollection(inputCopyRatioSegmentsFile); final CalledCopyRatioSegmentCollection calledCopyRatioSegments = @@ -142,18 +153,36 @@ protected Object doWork() { neutralSegmentCopyRatioLowerBound, neutralSegmentCopyRatioUpperBound, outlierNeutralSegmentCopyRatioZScoreThreshold, callingCopyRatioZScoreThreshold) .makeCalls(); + + logger.info(String.format("Writing called segments to %s...", outputCalledCopyRatioSegmentsFile.getAbsolutePath())); calledCopyRatioSegments.write(outputCalledCopyRatioSegmentsFile); // Write an IGV compatible collection final CalledLegacySegmentCollection legacySegmentCollection = createCalledLegacySegmentCollection(calledCopyRatioSegments); - legacySegmentCollection.write(createCalledLegacyOutputFilename(outputCalledCopyRatioSegmentsFile)); + logger.info(String.format("Writing called segments in IGV-compatible format to %s...", outputCalledLegacySegmentsFile.getAbsolutePath())); + legacySegmentCollection.write(outputCalledLegacySegmentsFile); - return "SUCCESS"; + logger.info("CallCopyRatioSegments complete."); + + return null; + } + + private void validateArguments() { + Utils.validateArg(neutralSegmentCopyRatioLowerBound < neutralSegmentCopyRatioUpperBound, + String.format("Copy-neutral lower bound (%s) must be less than upper bound (%s).", + NEUTRAL_SEGMENT_COPY_RATIO_LOWER_BOUND_LONG_NAME, + NEUTRAL_SEGMENT_COPY_RATIO_UPPER_BOUND_LONG_NAME)); + + CopyNumberArgumentValidationUtils.validateInputs(inputCopyRatioSegmentsFile); + outputCalledLegacySegmentsFile = createCalledLegacySegmentsFile(outputCalledCopyRatioSegmentsFile); + CopyNumberArgumentValidationUtils.validateOutputFiles( + outputCalledCopyRatioSegmentsFile, + outputCalledLegacySegmentsFile); } @VisibleForTesting - public static File createCalledLegacyOutputFilename(final File calledCopyRatioBaseFilename) { - return new File(FilenameUtils.removeExtension(calledCopyRatioBaseFilename.getAbsolutePath()) + LEGACY_SEGMENTS_FILE_SUFFIX); + static File createCalledLegacySegmentsFile(final File calledCopyRatioSegmentsFile) { + return new File(FilenameUtils.removeExtension(calledCopyRatioSegmentsFile.getAbsolutePath()) + LEGACY_SEGMENTS_FILE_SUFFIX); } private static CalledLegacySegmentCollection createCalledLegacySegmentCollection(final CalledCopyRatioSegmentCollection segments) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java index 7fc5688c1b0..fa4f579af49 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCounts.java @@ -2,8 +2,6 @@ import com.google.common.collect.ImmutableList; import htsjdk.samtools.SAMSequenceDictionary; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; @@ -80,11 +78,7 @@ ) @DocumentedFeature public final class CollectAllelicCounts extends LocusWalker { - private static final Logger logger = LogManager.getLogger(CollectAllelicCounts.class); - - static final String MINIMUM_BASE_QUALITY_LONG_NAME = "minimum-base-quality"; - - static final int DEFAULT_MINIMUM_MAPPING_QUALITY = 30; + private static final int DEFAULT_MINIMUM_MAPPING_QUALITY = 30; static final int DEFAULT_MINIMUM_BASE_QUALITY = 20; static final List DEFAULT_ADDITIONAL_READ_FILTERS = ImmutableList.of( @@ -93,6 +87,8 @@ public final class CollectAllelicCounts extends LocusWalker { ReadFilterLibrary.NOT_DUPLICATE, new MappingQualityReadFilter(DEFAULT_MINIMUM_MAPPING_QUALITY)); + public static final String MINIMUM_BASE_QUALITY_LONG_NAME = "minimum-base-quality"; + @Argument( doc = "Output file for allelic counts.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, @@ -134,8 +130,12 @@ public List getDefaultReadFilters() { @Override public void onTraversalStart() { + validateArguments(); + final SampleLocatableMetadata metadata = MetadataUtils.fromHeader(getHeaderForReads(), Metadata.Type.SAMPLE_LOCATABLE); final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary(); + //this check is currently redundant, since the master dictionary is taken from the reads; + //however, if any other dictionary is added in the future, such a check should be performed if (!CopyNumberArgumentValidationUtils.isSameDictionary(metadata.getSequenceDictionary(), sequenceDictionary)) { logger.warn("Sequence dictionary in BAM does not match the master sequence dictionary."); } @@ -143,11 +143,18 @@ public void onTraversalStart() { logger.info("Collecting allelic counts..."); } + private void validateArguments() { + CopyNumberArgumentValidationUtils.validateOutputFiles(outputAllelicCountsFile); + } + @Override public Object onTraversalSuccess() { + logger.info(String.format("Writing allelic counts to %s...", outputAllelicCountsFile.getAbsolutePath())); allelicCountCollector.getAllelicCounts().write(outputAllelicCountsFile); - logger.info("Allelic counts written to " + outputAllelicCountsFile); - return("SUCCESS"); + + logger.info("CollectAllelicCounts complete."); + + return null; } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java index a5f382f28b8..a232796b4bf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectAllelicCountsSpark.java @@ -1,7 +1,6 @@ package org.broadinstitute.hellbender.tools.copynumber; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import htsjdk.samtools.SAMSequenceDictionary; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; @@ -9,10 +8,11 @@ import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; -import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; +import org.broadinstitute.hellbender.cmdline.programgroups.CoverageAnalysisProgramGroup; import org.broadinstitute.hellbender.engine.filters.ReadFilter; import org.broadinstitute.hellbender.engine.spark.LocusWalkerContext; import org.broadinstitute.hellbender.engine.spark.LocusWalkerSpark; +import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils; import org.broadinstitute.hellbender.tools.copynumber.datacollection.AllelicCountCollector; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.Metadata; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.MetadataUtils; @@ -29,16 +29,13 @@ * See {@link CollectAllelicCounts}. This behaves the same, except that it supports spark. */ @CommandLineProgramProperties( - summary = "Collects ref/alt counts at sites", - oneLineSummary = "Collects ref/alt counts at sites", - programGroup = CopyNumberProgramGroup.class + summary = "Collects reference and alternate allele counts at specified sites", + oneLineSummary = "Collects reference and alternate allele counts at specified sites", + programGroup = CoverageAnalysisProgramGroup.class ) public class CollectAllelicCountsSpark extends LocusWalkerSpark { - private static final long serialVersionUID = 1L; - private static final Logger logger = LogManager.getLogger(CollectAllelicCounts.class); - @Argument( doc = "Output file for allelic counts.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, @@ -72,13 +69,29 @@ public List getDefaultReadFilters() { @Override protected void processAlignments(JavaRDD rdd, JavaSparkContext ctx) { + validateArguments(); + final SampleLocatableMetadata metadata = MetadataUtils.fromHeader(getHeaderForReads(), Metadata.Type.SAMPLE_LOCATABLE); + final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary(); + //this check is currently redundant, since the master dictionary is taken from the reads; + //however, if any other dictionary is added in the future, such a check should be performed + if (!CopyNumberArgumentValidationUtils.isSameDictionary(metadata.getSequenceDictionary(), sequenceDictionary)) { + logger.warn("Sequence dictionary in BAM does not match the master sequence dictionary."); + } final Broadcast sampleMetadataBroadcast = ctx.broadcast(metadata); final AllelicCountCollector finalAllelicCountCollector = rdd.mapPartitions(distributedCount(sampleMetadataBroadcast, minimumBaseQuality)) .reduce((a1, a2) -> combineAllelicCountCollectors(a1, a2, sampleMetadataBroadcast.getValue())); + + logger.info(String.format("Writing allelic counts to %s...", outputAllelicCountsFile.getAbsolutePath())); finalAllelicCountCollector.getAllelicCounts().write(outputAllelicCountsFile); + + logger.info("CollectAllelicCountsSpark complete."); + } + + private void validateArguments() { + CopyNumberArgumentValidationUtils.validateOutputFiles(outputAllelicCountsFile); } private static FlatMapFunction, AllelicCountCollector> distributedCount(final Broadcast sampleMetadataBroadcast, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectReadCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectReadCounts.java index 9aee28e7c70..1da31174d10 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectReadCounts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CollectReadCounts.java @@ -6,8 +6,6 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.Locatable; import htsjdk.samtools.util.OverlapDetector; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; @@ -34,7 +32,9 @@ import org.broadinstitute.hellbender.utils.read.GATKRead; import java.io.File; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; import java.util.stream.Collectors; /** @@ -93,14 +93,12 @@ ) @DocumentedFeature public final class CollectReadCounts extends ReadWalker { - private static final Logger logger = LogManager.getLogger(CollectReadCounts.class); - - private static final int DEFAULT_MINIMUM_MAPPING_QUALITY = 30; - - enum Format { + public enum Format { TSV, HDF5 } + private static final int DEFAULT_MINIMUM_MAPPING_QUALITY = 30; + public static final String FORMAT_LONG_NAME = "format"; @Argument( @@ -150,6 +148,8 @@ public List getDefaultReadFilters() { @Override public void onTraversalStart() { + validateArguments(); + metadata = MetadataUtils.fromHeader(getHeaderForReads(), Metadata.Type.SAMPLE_LOCATABLE); final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary(); //this check is currently redundant, since the master dictionary is taken from the reads; @@ -158,14 +158,17 @@ public void onTraversalStart() { logger.warn("Sequence dictionary in BAM does not match the master sequence dictionary."); } - CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(intervalArgumentCollection); - intervals = intervalArgumentCollection.getIntervals(sequenceDictionary); intervalMultiset = HashMultiset.create(intervals.size()); logger.info("Collecting read counts..."); } + private void validateArguments() { + CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(intervalArgumentCollection); + CopyNumberArgumentValidationUtils.validateOutputFiles(outputCountsFile); + } + @Override public void apply(GATKRead read, ReferenceContext referenceContext, FeatureContext featureContext) { if (currentContig == null || !read.getContig().equals(currentContig)) { @@ -188,7 +191,7 @@ public void apply(GATKRead read, ReferenceContext referenceContext, FeatureConte @Override public Object onTraversalSuccess() { - logger.info("Writing read counts to " + outputCountsFile); + logger.info(String.format("Writing read counts to %s...", outputCountsFile.getAbsolutePath())); final SimpleCountCollection readCounts = new SimpleCountCollection( metadata, ImmutableList.copyOf(intervals.stream() //making this an ImmutableList avoids a defensive copy in SimpleCountCollection @@ -201,7 +204,9 @@ public Object onTraversalSuccess() { readCounts.write(outputCountsFile); } - return "SUCCESS"; + logger.info("CollectReadCounts complete."); + + return null; } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java index a959ac9abe0..b67e05e87f7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/CreateReadCountPanelOfNormals.java @@ -7,7 +7,6 @@ import org.apache.spark.api.java.JavaSparkContext; import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hdf5.HDF5Library; @@ -25,7 +24,6 @@ import org.broadinstitute.hellbender.tools.copynumber.utils.HDF5Utils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.io.IOUtils; import java.io.File; import java.util.ArrayList; @@ -121,15 +119,6 @@ public final class CreateReadCountPanelOfNormals extends SparkCommandLineProgram { private static final long serialVersionUID = 1L; - //parameter names - public static final String MINIMUM_INTERVAL_MEDIAN_PERCENTILE_LONG_NAME = "minimum-interval-median-percentile"; - public static final String MAXIMUM_ZEROS_IN_SAMPLE_PERCENTAGE_LONG_NAME = "maximum-zeros-in-sample-percentage"; - public static final String MAXIMUM_ZEROS_IN_INTERVAL_PERCENTAGE_LONG_NAME = "maximum-zeros-in-interval-percentage"; - public static final String EXTREME_SAMPLE_MEDIAN_PERCENTILE_LONG_NAME = "extreme-sample-median-percentile"; - public static final String IMPUTE_ZEROS_LONG_NAME = "do-impute-zeros"; - public static final String EXTREME_OUTLIER_TRUNCATION_PERCENTILE_LONG_NAME = "extreme-outlier-truncation-percentile"; - public static final String MAXIMUM_CHUNK_SIZE = "maximum-chunk-size"; - //default values for filtering private static final double DEFAULT_MINIMUM_INTERVAL_MEDIAN_PERCENTILE = 10.0; private static final double DEFAULT_MAXIMUM_ZEROS_IN_SAMPLE_PERCENTAGE = 5.0; @@ -142,6 +131,15 @@ public final class CreateReadCountPanelOfNormals extends SparkCommandLineProgram private static final int DEFAULT_CHUNK_DIVISOR = 16; private static final int DEFAULT_MAXIMUM_CHUNK_SIZE = HDF5Utils.MAX_NUMBER_OF_VALUES_PER_HDF5_MATRIX / DEFAULT_CHUNK_DIVISOR; + //parameter names + public static final String MINIMUM_INTERVAL_MEDIAN_PERCENTILE_LONG_NAME = "minimum-interval-median-percentile"; + public static final String MAXIMUM_ZEROS_IN_SAMPLE_PERCENTAGE_LONG_NAME = "maximum-zeros-in-sample-percentage"; + public static final String MAXIMUM_ZEROS_IN_INTERVAL_PERCENTAGE_LONG_NAME = "maximum-zeros-in-interval-percentage"; + public static final String EXTREME_SAMPLE_MEDIAN_PERCENTILE_LONG_NAME = "extreme-sample-median-percentile"; + public static final String IMPUTE_ZEROS_LONG_NAME = "do-impute-zeros"; + public static final String EXTREME_OUTLIER_TRUNCATION_PERCENTILE_LONG_NAME = "extreme-outlier-truncation-percentile"; + public static final String MAXIMUM_CHUNK_SIZE = "maximum-chunk-size"; + @Argument( doc = "Input TSV or HDF5 files containing integer read counts in genomic intervals for all samples in the panel of normals (output of CollectReadCounts). " + "Intervals must be identical and in the same order for all samples.", @@ -258,7 +256,6 @@ protected void runPipeline(final JavaSparkContext ctx) { "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems."); } - //validate parameters and parse optional parameters validateArguments(); //get sample filenames @@ -296,18 +293,22 @@ protected void runPipeline(final JavaSparkContext ctx) { extremeSampleMedianPercentile, doImputeZeros, extremeOutlierTruncationPercentile, numEigensamplesRequested, maximumChunkSize, ctx); - logger.info("Panel of normals successfully created."); + logger.info("CreateReadCountPanelOfNormals complete."); } private void validateArguments() { - Utils.validateArg(inputReadCountFiles.size() == new HashSet<>(inputReadCountFiles).size(), - "List of input read-counts files cannot contain duplicates."); - inputReadCountFiles.forEach(IOUtils::canReadFile); if (numEigensamplesRequested > inputReadCountFiles.size()) { logger.warn(String.format("Number of eigensamples (%d) is greater than the number of input samples (%d); " + "the number of samples retained after filtering will be used instead.", numEigensamplesRequested, inputReadCountFiles.size())); } + + Utils.validateArg(inputReadCountFiles.size() == new HashSet<>(inputReadCountFiles).size(), + "List of input read-counts files cannot contain duplicates."); + + inputReadCountFiles.forEach(CopyNumberArgumentValidationUtils::validateInputs); + CopyNumberArgumentValidationUtils.validateInputs(inputAnnotatedIntervalsFile); + CopyNumberArgumentValidationUtils.validateOutputFiles(outputPanelOfNormalsFile); } private static RealMatrix constructReadCountMatrix(final Logger logger, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java index 9ba88bff5ae..c966af639e9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DenoiseReadCounts.java @@ -2,7 +2,6 @@ import org.apache.commons.math3.linear.RealMatrix; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hdf5.HDF5File; @@ -18,7 +17,6 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleCountCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations; -import org.broadinstitute.hellbender.utils.io.IOUtils; import java.io.File; @@ -177,12 +175,12 @@ protected Object doWork() { "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems."); } - IOUtils.canReadFile(inputReadCountFile); + validateArguments(); + logger.info(String.format("Reading read-counts file (%s)...", inputReadCountFile)); final SimpleCountCollection readCounts = SimpleCountCollection.read(inputReadCountFile); if (inputPanelOfNormalsFile != null) { //denoise using panel of normals - IOUtils.canReadFile(inputPanelOfNormalsFile); try (final HDF5File hdf5PanelOfNormalsFile = new HDF5File(inputPanelOfNormalsFile)) { //HDF5File implements AutoCloseable final SVDReadCountPanelOfNormals panelOfNormals = HDF5SVDReadCountPanelOfNormals.read(hdf5PanelOfNormalsFile); @@ -201,8 +199,7 @@ protected Object doWork() { } final SVDDenoisedCopyRatioResult denoisedCopyRatioResult = panelOfNormals.denoise(readCounts, numEigensamples); - logger.info("Writing standardized and denoised copy ratios..."); - denoisedCopyRatioResult.write(outputStandardizedCopyRatiosFile, outputDenoisedCopyRatiosFile); + writeResult(denoisedCopyRatioResult); } } else { //standardize and perform optional GC-bias correction //get GC content (null if not provided) @@ -226,11 +223,29 @@ protected Object doWork() { readCounts.getIntervals(), standardizedCopyRatioValues, standardizedCopyRatioValues); - standardizedResult.write(outputStandardizedCopyRatiosFile, outputDenoisedCopyRatiosFile); + + writeResult(standardizedResult); } - logger.info("Read counts successfully denoised."); + logger.info("DenoiseReadCounts complete."); + + return null; + } + + private void validateArguments() { + CopyNumberArgumentValidationUtils.validateInputs( + inputReadCountFile, + inputPanelOfNormalsFile, + inputAnnotatedIntervalsFile); + CopyNumberArgumentValidationUtils.validateOutputFiles( + outputStandardizedCopyRatiosFile, + outputStandardizedCopyRatiosFile); + } - return "SUCCESS"; + private void writeResult(final SVDDenoisedCopyRatioResult result) { + logger.info(String.format("Writing standardized and denoised copy ratios to %s and %s...", + outputStandardizedCopyRatiosFile.getAbsolutePath(), + outputDenoisedCopyRatiosFile.getAbsolutePath())); + result.write(outputStandardizedCopyRatiosFile, outputDenoisedCopyRatiosFile); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DetermineGermlineContigPloidy.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DetermineGermlineContigPloidy.java index ca73501c6db..5100ff26075 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DetermineGermlineContigPloidy.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/DetermineGermlineContigPloidy.java @@ -1,6 +1,9 @@ package org.broadinstitute.hellbender.tools.copynumber; -import org.broadinstitute.barclay.argparser.*; +import org.broadinstitute.barclay.argparser.Advanced; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; @@ -18,7 +21,6 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.CoveragePerContig; import org.broadinstitute.hellbender.tools.copynumber.formats.records.SimpleCount; -import org.broadinstitute.hellbender.tools.copynumber.utils.CopyNumberUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.io.IOUtils; @@ -196,8 +198,8 @@ public enum RunMode { COHORT, CASE } - private static final String COHORT_DETERMINE_PLOIDY_AND_DEPTH_PYTHON_SCRIPT = "cohort_determine_ploidy_and_depth.py"; - private static final String CASE_DETERMINE_PLOIDY_AND_DEPTH_PYTHON_SCRIPT = "case_determine_ploidy_and_depth.py"; + public static final String COHORT_DETERMINE_PLOIDY_AND_DEPTH_PYTHON_SCRIPT = "cohort_determine_ploidy_and_depth.py"; + public static final String CASE_DETERMINE_PLOIDY_AND_DEPTH_PYTHON_SCRIPT = "case_determine_ploidy_and_depth.py"; //name of the interval file output by the python code in the model directory public static final String INPUT_MODEL_INTERVAL_FILE = "interval_list.tsv"; @@ -231,7 +233,7 @@ public enum RunMode { fullName = CopyNumberStandardArgument.MODEL_LONG_NAME, optional = true ) - private String inputModelDir; + private File inputModelDir; @Argument( doc = "Prefix for output filenames.", @@ -240,12 +242,11 @@ public enum RunMode { private String outputPrefix; @Argument( - doc = "Output directory for sample contig-ploidy calls and the contig-ploidy model parameters for " + - "future use.", + doc = "Output directory. This will be created if it does not exist.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) - private String outputDir; + private File outputDir; @ArgumentCollection protected IntervalArgumentCollection intervalArgumentCollection @@ -273,7 +274,9 @@ protected void onStartup() { @Override protected Object doWork() { - setModeAndValidateArguments(); + validateArguments(); + + setModeAndResolveIntervals(); //read in count files and output intervals and samples x coverage-per-contig table to temporary files specifiedIntervalsFile = IOUtils.createTempFile("intervals", ".tsv"); @@ -289,24 +292,30 @@ protected Object doWork() { throw new UserException("Python return code was non-zero."); } - logger.info("Germline contig ploidy determination complete."); + logger.info("DetermineGermlineContigPloidy complete."); - return "SUCCESS"; + return null; } - private void setModeAndValidateArguments() { + private void validateArguments() { germlineContigPloidyModelArgumentCollection.validate(); germlineContigPloidyHybridADVIArgumentCollection.validate(); - Utils.nonNull(outputPrefix); - inputReadCountFiles.forEach(IOUtils::canReadFile); + Utils.validateArg(inputReadCountFiles.size() == new HashSet<>(inputReadCountFiles).size(), "List of input read-count files cannot contain duplicates."); + inputReadCountFiles.forEach(CopyNumberArgumentValidationUtils::validateInputs); + CopyNumberArgumentValidationUtils.validateInputs( + inputContigPloidyPriorsFile, + inputModelDir); + Utils.nonEmpty(outputPrefix); + CopyNumberArgumentValidationUtils.validateAndPrepareOutputDirectories(outputDir); + } + + private void setModeAndResolveIntervals() { if (inputModelDir != null) { runMode = RunMode.CASE; logger.info("A contig-ploidy model was provided, running in case mode..."); - Utils.validateArg(new File(inputModelDir).exists(), - String.format("Input ploidy-model directory %s does not exist.", inputModelDir)); if (inputContigPloidyPriorsFile != null) { throw new UserException.BadInput("Invalid combination of inputs: Running in case mode, " + "but contig-ploidy priors were provided."); @@ -329,7 +338,6 @@ private void setModeAndValidateArguments() { if (inputContigPloidyPriorsFile == null){ throw new UserException.BadInput("Contig-ploidy priors must be provided in cohort mode."); } - IOUtils.canReadFile(inputContigPloidyPriorsFile); //get sequence dictionary and intervals from the first read-count file to use to validate remaining files //(this first file is read again below, which is slightly inefficient but is probably not worth the extra code) @@ -382,25 +390,23 @@ private void writeSamplesByCoveragePerContig(final File samplesByCoveragePerCont private boolean executeDeterminePloidyAndDepthPythonScript(final File samplesByCoveragePerContigFile, final File intervalsFile) { final PythonScriptExecutor executor = new PythonScriptExecutor(true); - final String outputDirArg = Utils.nonEmpty(outputDir).endsWith(File.separator) - ? outputDir - : outputDir + File.separator; //add trailing slash if necessary + final String outputDirArg = CopyNumberArgumentValidationUtils.addTrailingSlashIfNecessary(outputDir.getAbsolutePath()); //note that the samples x coverage-by-contig table is referred to as "metadata" by gcnvkernel final List arguments = new ArrayList<>(Arrays.asList( - "--sample_coverage_metadata=" + CopyNumberUtils.getCanonicalPath(samplesByCoveragePerContigFile), - "--output_calls_path=" + CopyNumberUtils.getCanonicalPath(outputDirArg + outputPrefix + CALLS_PATH_SUFFIX))); + "--sample_coverage_metadata=" + CopyNumberArgumentValidationUtils.getCanonicalPath(samplesByCoveragePerContigFile), + "--output_calls_path=" + CopyNumberArgumentValidationUtils.getCanonicalPath(outputDirArg + outputPrefix + CALLS_PATH_SUFFIX))); arguments.addAll(germlineContigPloidyModelArgumentCollection.generatePythonArguments(runMode)); arguments.addAll(germlineContigPloidyHybridADVIArgumentCollection.generatePythonArguments()); final String script; if (runMode == RunMode.COHORT) { script = COHORT_DETERMINE_PLOIDY_AND_DEPTH_PYTHON_SCRIPT; - arguments.add("--interval_list=" + CopyNumberUtils.getCanonicalPath(intervalsFile)); - arguments.add("--contig_ploidy_prior_table=" + CopyNumberUtils.getCanonicalPath(inputContigPloidyPriorsFile)); - arguments.add("--output_model_path=" + CopyNumberUtils.getCanonicalPath(outputDirArg + outputPrefix + MODEL_PATH_SUFFIX)); + arguments.add("--interval_list=" + CopyNumberArgumentValidationUtils.getCanonicalPath(intervalsFile)); + arguments.add("--contig_ploidy_prior_table=" + CopyNumberArgumentValidationUtils.getCanonicalPath(inputContigPloidyPriorsFile)); + arguments.add("--output_model_path=" + CopyNumberArgumentValidationUtils.getCanonicalPath(outputDirArg + outputPrefix + MODEL_PATH_SUFFIX)); } else { script = CASE_DETERMINE_PLOIDY_AND_DEPTH_PYTHON_SCRIPT; - arguments.add("--input_model_path=" + CopyNumberUtils.getCanonicalPath(inputModelDir)); + arguments.add("--input_model_path=" + CopyNumberArgumentValidationUtils.getCanonicalPath(inputModelDir)); } return executor.executeScript( new Resource(script, GermlineCNVCaller.class), diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/FilterIntervals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/FilterIntervals.java index 96bbc576507..0eac4d0f98a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/FilterIntervals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/FilterIntervals.java @@ -9,7 +9,6 @@ import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.ArgumentCollection; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; @@ -157,6 +156,8 @@ public final class FilterIntervals extends CommandLineProgram { @Argument( doc = "Minimum allowed value for GC-content annotation (inclusive).", fullName = MINIMUM_GC_CONTENT_LONG_NAME, + minValue = 0., + maxValue = 1., optional = true ) private double minimumGCContent = 0.1; @@ -164,6 +165,8 @@ public final class FilterIntervals extends CommandLineProgram { @Argument( doc = "Maximum allowed value for GC-content annotation (inclusive).", fullName = MAXIMUM_GC_CONTENT_LONG_NAME, + minValue = 0., + maxValue = 1., optional = true ) private double maximumGCContent = 0.9; @@ -171,6 +174,8 @@ public final class FilterIntervals extends CommandLineProgram { @Argument( doc = "Minimum allowed value for mappability annotation (inclusive).", fullName = MINIMUM_MAPPABILITY_LONG_NAME, + minValue = 0., + maxValue = 1., optional = true ) private double minimumMappability = 0.9; @@ -178,6 +183,8 @@ public final class FilterIntervals extends CommandLineProgram { @Argument( doc = "Maximum allowed value for mappability annotation (inclusive).", fullName = MAXIMUM_MAPPABILITY_LONG_NAME, + minValue = 0., + maxValue = 1., optional = true ) private double maximumMappability = 1.; @@ -185,6 +192,8 @@ public final class FilterIntervals extends CommandLineProgram { @Argument( doc = "Minimum allowed value for segmental-duplication-content annotation (inclusive).", fullName = MINIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME, + minValue = 0., + maxValue = 1., optional = true ) private double minimumSegmentalDuplicationContent = 0.; @@ -192,6 +201,8 @@ public final class FilterIntervals extends CommandLineProgram { @Argument( doc = "Maximum allowed value for segmental-duplication-content annotation (inclusive).", fullName = MAXIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME, + minValue = 0., + maxValue = 1., optional = true ) private double maximumSegmentalDuplicationContent = 0.5; @@ -260,29 +271,36 @@ public final class FilterIntervals extends CommandLineProgram { @Override public Object doWork() { - validateFilesAndResolveIntervals(); + validateArguments(); + + resolveAndValidateIntervals(); + final SimpleIntervalCollection filteredIntervals = filterIntervals(); - logger.info(String.format("Writing filtered intervals to %s...", outputFilteredIntervalsFile)); + logger.info(String.format("Writing filtered intervals to %s...", outputFilteredIntervalsFile.getAbsolutePath())); final IntervalList filteredIntervalList = new IntervalList(filteredIntervals.getMetadata().getSequenceDictionary()); filteredIntervals.getIntervals().forEach(i -> filteredIntervalList.add(new Interval(i))); filteredIntervalList.write(outputFilteredIntervalsFile); - return "SUCCESS"; + logger.info("FilterIntervals complete."); + + return null; } - private void validateFilesAndResolveIntervals() { - //validate input intervals and files + private void validateArguments() { CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(intervalArgumentCollection); if (inputAnnotatedIntervalsFile == null && inputReadCountFiles.isEmpty()) { throw new UserException("Must provide annotated intervals or counts."); } - if (inputAnnotatedIntervalsFile != null) { - IOUtils.canReadFile(inputAnnotatedIntervalsFile); - } - inputReadCountFiles.forEach(IOUtils::canReadFile); + Utils.validateArg(inputReadCountFiles.size() == new HashSet<>(inputReadCountFiles).size(), "List of input read-count files cannot contain duplicates."); - + + CopyNumberArgumentValidationUtils.validateInputs(inputAnnotatedIntervalsFile); + inputReadCountFiles.forEach(IOUtils::canReadFile); + CopyNumberArgumentValidationUtils.validateOutputFiles(outputFilteredIntervalsFile); + } + + private void resolveAndValidateIntervals() { //parse inputs to resolve and intersect intervals final LocatableMetadata metadata; final List resolved; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java index 658e896668c..0087a2e9e82 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java @@ -1,6 +1,9 @@ package org.broadinstitute.hellbender.tools.copynumber; -import org.broadinstitute.barclay.argparser.*; +import org.broadinstitute.barclay.argparser.Advanced; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.ArgumentCollection; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; @@ -13,7 +16,6 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleCountCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleIntervalCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.records.SimpleCount; -import org.broadinstitute.hellbender.tools.copynumber.utils.CopyNumberUtils; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.io.IOUtils; @@ -214,7 +216,7 @@ public enum RunMode { doc = "Input contig-ploidy calls directory (output of DetermineGermlineContigPloidy).", fullName = CONTIG_PLOIDY_CALLS_DIRECTORY_LONG_NAME ) - private String inputContigPloidyCallsDir; + private File inputContigPloidyCallsDir; @Argument( doc = "Input denoising-model directory. In COHORT mode, this argument is optional; if provided," + @@ -223,7 +225,7 @@ public enum RunMode { fullName = CopyNumberStandardArgument.MODEL_LONG_NAME, optional = true ) - private String inputModelDir = null; + private File inputModelDir = null; @Argument( doc = "Input annotated-intervals file containing annotations for GC content in genomic intervals " + @@ -242,11 +244,11 @@ public enum RunMode { private String outputPrefix; @Argument( - doc = "Output directory.", + doc = "Output directory. This will be created if it does not exist.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) - private String outputDir; + private File outputDir; @ArgumentCollection protected IntervalArgumentCollection intervalArgumentCollection @@ -280,6 +282,8 @@ protected void onStartup() { protected Object doWork() { validateArguments(); + resolveIntervals(); + //read in count files, validate they contain specified subset of intervals, and output //count files for these intervals to temporary files final List intervalSubsetReadCountFiles = writeIntervalSubsetReadCountFiles(); @@ -291,21 +295,29 @@ protected Object doWork() { throw new UserException("Python return code was non-zero."); } - logger.info("Germline denoising and CNV calling complete."); + logger.info("GermlineCNVCaller complete."); - return "SUCCESS"; + return null; } private void validateArguments() { - inputReadCountFiles.forEach(IOUtils::canReadFile); + germlineCallingArgumentCollection.validate(); + germlineDenoisingModelArgumentCollection.validate(); + germlineCNVHybridADVIArgumentCollection.validate(); + Utils.validateArg(inputReadCountFiles.size() == new HashSet<>(inputReadCountFiles).size(), "List of input read-count files cannot contain duplicates."); - if (inputModelDir != null) { - Utils.validateArg(new File(inputModelDir).exists(), - String.format("Input denoising-model directory %s does not exist.", inputModelDir)); - } + inputReadCountFiles.forEach(IOUtils::canReadFile); + CopyNumberArgumentValidationUtils.validateInputs( + inputContigPloidyCallsDir, + inputModelDir, + inputAnnotatedIntervalsFile); + Utils.nonEmpty(outputPrefix); + CopyNumberArgumentValidationUtils.validateAndPrepareOutputDirectories(outputDir); + } + private void resolveIntervals() { if (inputModelDir != null) { //intervals are retrieved from the input model directory specifiedIntervalsFile = new File(inputModelDir, INPUT_MODEL_INTERVAL_FILE); @@ -354,16 +366,6 @@ private void validateArguments() { "but annotated intervals were provided."); } } - - Utils.nonNull(outputPrefix); - Utils.validateArg(new File(inputContigPloidyCallsDir).exists(), - String.format("Input contig-ploidy calls directory %s does not exist.", inputContigPloidyCallsDir)); - Utils.validateArg(new File(outputDir).exists(), - String.format("Output directory %s does not exist.", outputDir)); - - germlineCallingArgumentCollection.validate(); - germlineDenoisingModelArgumentCollection.validate(); - germlineCNVHybridADVIArgumentCollection.validate(); } private List writeIntervalSubsetReadCountFiles() { @@ -400,27 +402,25 @@ private List writeIntervalSubsetReadCountFiles() { private boolean executeGermlineCNVCallerPythonScript(final List intervalSubsetReadCountFiles) { final PythonScriptExecutor executor = new PythonScriptExecutor(true); - final String outputDirArg = Utils.nonEmpty(outputDir).endsWith(File.separator) - ? outputDir - : outputDir + File.separator; //add trailing slash if necessary + final String outputDirArg = CopyNumberArgumentValidationUtils.addTrailingSlashIfNecessary(outputDir.getAbsolutePath()); //add required arguments final List arguments = new ArrayList<>(Arrays.asList( - "--ploidy_calls_path=" + CopyNumberUtils.getCanonicalPath(inputContigPloidyCallsDir), - "--output_calls_path=" + CopyNumberUtils.getCanonicalPath(outputDirArg + outputPrefix + CALLS_PATH_SUFFIX), - "--output_tracking_path=" + CopyNumberUtils.getCanonicalPath(outputDirArg + outputPrefix + TRACKING_PATH_SUFFIX))); + "--ploidy_calls_path=" + CopyNumberArgumentValidationUtils.getCanonicalPath(inputContigPloidyCallsDir), + "--output_calls_path=" + CopyNumberArgumentValidationUtils.getCanonicalPath(outputDirArg + outputPrefix + CALLS_PATH_SUFFIX), + "--output_tracking_path=" + CopyNumberArgumentValidationUtils.getCanonicalPath(outputDirArg + outputPrefix + TRACKING_PATH_SUFFIX))); //if a model path is given, add it to the argument (both COHORT and CASE modes) if (inputModelDir != null) { - arguments.add("--input_model_path=" + CopyNumberUtils.getCanonicalPath(inputModelDir)); + arguments.add("--input_model_path=" + CopyNumberArgumentValidationUtils.getCanonicalPath(inputModelDir)); } final String script; if (runMode == RunMode.COHORT) { script = COHORT_DENOISING_CALLING_PYTHON_SCRIPT; //these are the annotated intervals, if provided - arguments.add("--modeling_interval_list=" + CopyNumberUtils.getCanonicalPath(specifiedIntervalsFile)); - arguments.add("--output_model_path=" + CopyNumberUtils.getCanonicalPath(outputDirArg + outputPrefix + MODEL_PATH_SUFFIX)); + arguments.add("--modeling_interval_list=" + CopyNumberArgumentValidationUtils.getCanonicalPath(specifiedIntervalsFile)); + arguments.add("--output_model_path=" + CopyNumberArgumentValidationUtils.getCanonicalPath(outputDirArg + outputPrefix + MODEL_PATH_SUFFIX)); if (inputAnnotatedIntervalsFile != null) { arguments.add("--enable_explicit_gc_bias_modeling=True"); } else { @@ -432,7 +432,7 @@ private boolean executeGermlineCNVCallerPythonScript(final List intervalSu } arguments.add("--read_count_tsv_files"); - arguments.addAll(intervalSubsetReadCountFiles.stream().map(CopyNumberUtils::getCanonicalPath).collect(Collectors.toList())); + arguments.addAll(intervalSubsetReadCountFiles.stream().map(CopyNumberArgumentValidationUtils::getCanonicalPath).collect(Collectors.toList())); arguments.addAll(germlineDenoisingModelArgumentCollection.generatePythonArguments(runMode)); arguments.addAll(germlineCallingArgumentCollection.generatePythonArguments(runMode)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegments.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegments.java index da234c4465d..da3a0e52849 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegments.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegments.java @@ -5,7 +5,6 @@ import org.apache.commons.math3.special.Beta; import org.apache.commons.math3.util.FastMath; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; @@ -26,7 +25,6 @@ import org.broadinstitute.hellbender.tools.copynumber.segmentation.MultidimensionalKernelSegmenter; import org.broadinstitute.hellbender.tools.copynumber.utils.segmentation.KernelSegmenter; import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.io.IOUtils; import java.io.File; import java.util.*; @@ -102,7 +100,7 @@ * *
  • * Output directory. - * This must be a pre-existing directory. + * This will be created if it does not exist. *
  • * * @@ -279,17 +277,17 @@ public final class ModelSegments extends CommandLineProgram { private File inputNormalAllelicCountsFile = null; @Argument( - doc = "Prefix for output files.", + doc = "Prefix for output filenames.", fullName = CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME ) private String outputPrefix; @Argument( - doc = "Output directory.", + doc = "Output directory. This will be created if it does not exist.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) - private String outputDir; + private File outputDir; @Argument( doc = "Minimum total count for filtering allelic counts in the case sample, if available. " + @@ -323,6 +321,8 @@ public final class ModelSegments extends CommandLineProgram { "from zero base-error rate up to this value. Decreasing this value will increase " + "the number of sites assumed to be heterozygous for modeling.", fullName = GENOTYPING_BASE_ERROR_RATE_LONG_NAME, + minValue = 0., + maxValue = 1., optional = true ) private double genotypingBaseErrorRate = 5E-2; @@ -566,29 +566,23 @@ protected Object doWork() { writeSegments(copyRatioLegacySegments, COPY_RATIO_LEGACY_SEGMENTS_FILE_SUFFIX); writeSegments(alleleFractionLegacySegments, ALLELE_FRACTION_LEGACY_SEGMENTS_FILE_SUFFIX); - logger.info("SUCCESS: ModelSegments run complete."); + logger.info("ModelSegments complete."); - return "SUCCESS"; + return null; } private void validateArguments() { - Utils.nonNull(outputPrefix); Utils.validateArg(!(inputDenoisedCopyRatiosFile == null && inputAllelicCountsFile == null), "Must provide at least a denoised-copy-ratios file or an allelic-counts file."); Utils.validateArg(!(inputAllelicCountsFile == null && inputNormalAllelicCountsFile != null), "Must provide an allelic-counts file for the case sample to run in matched-normal mode."); - if (inputDenoisedCopyRatiosFile != null) { - IOUtils.canReadFile(inputDenoisedCopyRatiosFile); - } - if (inputAllelicCountsFile != null) { - IOUtils.canReadFile(inputAllelicCountsFile); - } - if (inputNormalAllelicCountsFile != null) { - IOUtils.canReadFile(inputNormalAllelicCountsFile); - } - if (!new File(outputDir).exists()) { - throw new UserException(String.format("Output directory %s does not exist.", outputDir)); - } + + CopyNumberArgumentValidationUtils.validateInputs( + inputDenoisedCopyRatiosFile, + inputAllelicCountsFile, + inputNormalAllelicCountsFile); + Utils.nonEmpty(outputPrefix); + CopyNumberArgumentValidationUtils.validateAndPrepareOutputDirectories(outputDir); } private T readOptionalFileOrNull(final File file, @@ -664,11 +658,11 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada filteredAllelicCounts.getRecords().stream() .filter(ac -> calculateHomozygousLogRatio(ac, genotypingBaseErrorRate) < genotypingHomozygousLogRatioThreshold) .collect(Collectors.toList())); - final File hetAllelicCountsFile = new File(outputDir, outputPrefix + HET_ALLELIC_COUNTS_FILE_SUFFIX); - hetAllelicCounts.write(hetAllelicCountsFile); logger.info(String.format("Retained %d / %d sites after testing for heterozygosity...", hetAllelicCounts.size(), allelicCounts.size())); - logger.info(String.format("Heterozygous allelic counts written to %s.", hetAllelicCountsFile)); + final File hetAllelicCountsFile = new File(outputDir, outputPrefix + HET_ALLELIC_COUNTS_FILE_SUFFIX); + logger.info(String.format("Writing heterozygous allelic counts to %s...", hetAllelicCountsFile.getAbsolutePath())); + hetAllelicCounts.write(hetAllelicCountsFile); } else { //use matched normal logger.info("Matched normal was provided, running in matched-normal mode..."); @@ -712,11 +706,11 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada filteredNormalAllelicCounts.getRecords().stream() .filter(ac -> calculateHomozygousLogRatio(ac, genotypingBaseErrorRate) < genotypingHomozygousLogRatioThreshold) .collect(Collectors.toList())); - final File hetNormalAllelicCountsFile = new File(outputDir, outputPrefix + NORMAL_HET_ALLELIC_COUNTS_FILE_SUFFIX); - hetNormalAllelicCounts.write(hetNormalAllelicCountsFile); logger.info(String.format("Retained %d / %d sites in matched normal after testing for heterozygosity...", hetNormalAllelicCounts.size(), normalAllelicCounts.size())); - logger.info(String.format("Heterozygous allelic counts for matched normal written to %s.", hetNormalAllelicCountsFile)); + final File hetNormalAllelicCountsFile = new File(outputDir, outputPrefix + NORMAL_HET_ALLELIC_COUNTS_FILE_SUFFIX); + logger.info(String.format("Writing heterozygous allelic counts for matched normal to %s...", hetNormalAllelicCountsFile.getAbsolutePath())); + hetNormalAllelicCounts.write(hetNormalAllelicCountsFile); //retrieve sites in case sample logger.info("Retrieving allelic counts at these sites in case sample..."); @@ -726,8 +720,8 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada .filter(ac -> hetNormalAllelicCounts.getOverlapDetector().overlapsAny(ac)) .collect(Collectors.toList())); final File hetAllelicCountsFile = new File(outputDir, outputPrefix + HET_ALLELIC_COUNTS_FILE_SUFFIX); + logger.info(String.format("Writing allelic counts for case sample at heterozygous sites in matched normal to %s...", hetAllelicCountsFile.getAbsolutePath())); hetAllelicCounts.write(hetAllelicCountsFile); - logger.info(String.format("Allelic counts for case sample at heterozygous sites in matched normal written to %s.", hetAllelicCountsFile)); } return hetAllelicCounts; } @@ -765,7 +759,7 @@ private void writeModeledSegmentsAndParameterFiles(final MultidimensionalModelle private void writeSegments(final AbstractRecordCollection segments, final String fileSuffix) { final File segmentsFile = new File(outputDir, outputPrefix + fileSuffix); + logger.info(String.format("Writing segments to %s...", segmentsFile.getAbsolutePath())); segments.write(segmentsFile); - logger.info(String.format("Segments written to %s", segmentsFile)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PostprocessGermlineCNVCalls.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PostprocessGermlineCNVCalls.java index 505a2f766c5..79b283f8eb6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PostprocessGermlineCNVCalls.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PostprocessGermlineCNVCalls.java @@ -3,16 +3,14 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.variant.variantcontext.writer.VariantContextWriter; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.*; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.CopyNumberPosteriorDistribution; @@ -97,8 +95,6 @@ ) @DocumentedFeature public final class PostprocessGermlineCNVCalls extends GATKTool { - private static final Logger logger = LogManager.getLogger(PostprocessGermlineCNVCalls.class); - public static final String SEGMENT_GERMLINE_CNV_CALLS_PYTHON_SCRIPT = "segment_gcnv_calls.py"; public static final String CALLS_SHARD_PATH_LONG_NAME = "calls-shard-path"; @@ -115,20 +111,20 @@ public final class PostprocessGermlineCNVCalls extends GATKTool { fullName = CALLS_SHARD_PATH_LONG_NAME, minElements = 1 ) - private List unsortedCallsShardPaths; + private List inputUnsortedCallsShardPaths; @Argument( doc = "List of paths to GermlineCNVCaller model directories.", fullName = MODEL_SHARD_PATH_LONG_NAME, minElements = 1 ) - private List unsortedModelShardPaths; + private List inputUnsortedModelShardPaths; @Argument( doc = "Path to contig-ploidy calls directory (output of DetermineGermlineContigPloidy).", fullName = CONTIG_PLOIDY_CALLS_LONG_NAME ) - private File contigPloidyCallsPath; + private File inputContigPloidyCallsPath; @Argument( doc = "Sample index in the call-set (must be contained in all shards).", @@ -160,10 +156,9 @@ public final class PostprocessGermlineCNVCalls extends GATKTool { @Argument( doc = "Output segments VCF file.", - fullName = OUTPUT_SEGMENTS_VCF_LONG_NAME, - optional = true + fullName = OUTPUT_SEGMENTS_VCF_LONG_NAME ) - private File outputSegmentsVCFFile = null; + private File outputSegmentsVCFFile; /** * A list of {@link SimpleIntervalCollection} for each shard @@ -203,10 +198,8 @@ public final class PostprocessGermlineCNVCalls extends GATKTool { @Override public void onStartup() { super.onStartup(); - if (outputSegmentsVCFFile != null) { - /* check for successful import of gcnvkernel */ - PythonScriptExecutor.checkPythonEnvironmentForPackage("gcnvkernel"); - } + /* check for successful import of gcnvkernel */ + PythonScriptExecutor.checkPythonEnvironmentForPackage("gcnvkernel"); } /** @@ -215,15 +208,15 @@ public void onStartup() { */ @Override public void onTraversalStart() { - numShards = unsortedCallsShardPaths.size(); - Utils.validateArg(unsortedModelShardPaths.size() == numShards, - "The number of input model shards must match the number of input call shards."); + validateArguments(); + + numShards = inputUnsortedCallsShardPaths.size(); /* get intervals from each call and model shard in the provided (potentially arbitrary) order */ final List unsortedIntervalCollectionsFromCalls = - getIntervalCollectionsFromPaths(unsortedCallsShardPaths); + getIntervalCollectionsFromPaths(inputUnsortedCallsShardPaths); final List unsortedIntervalCollectionsFromModels = - getIntervalCollectionsFromPaths(unsortedModelShardPaths); + getIntervalCollectionsFromPaths(inputUnsortedModelShardPaths); /* assert that all shards have the same SAM sequence dictionary */ final SAMSequenceDictionary samSequenceDictionary = unsortedIntervalCollectionsFromCalls.get(0) @@ -248,10 +241,10 @@ public void onTraversalStart() { final List sortedModelShardsOrder = AbstractLocatableCollection.getShardedCollectionSortOrder( unsortedIntervalCollectionsFromModels); sortedCallsShardPaths = sortedCallShardsOrder.stream() - .map(unsortedCallsShardPaths::get) + .map(inputUnsortedCallsShardPaths::get) .collect(Collectors.toList()); sortedModelShardPaths = sortedModelShardsOrder.stream() - .map(unsortedModelShardPaths::get) + .map(inputUnsortedModelShardPaths::get) .collect(Collectors.toList()); final List sortedIntervalCollectionsFromCalls = sortedCallShardsOrder.stream() .map(unsortedIntervalCollectionsFromCalls::get) @@ -290,10 +283,26 @@ public void onTraversalStart() { refAutosomalIntegerCopyNumberState = new IntegerCopyNumberState(refAutosomalCopyNumber); } + private void validateArguments() { + Utils.validateArg(inputUnsortedCallsShardPaths.size() == inputUnsortedModelShardPaths.size(), + "The number of input call shards must match the number of input model shards."); + + inputUnsortedCallsShardPaths.forEach(CopyNumberArgumentValidationUtils::validateInputs); + inputUnsortedModelShardPaths.forEach(CopyNumberArgumentValidationUtils::validateInputs); + CopyNumberArgumentValidationUtils.validateInputs(inputContigPloidyCallsPath); + CopyNumberArgumentValidationUtils.validateOutputFiles( + outputIntervalsVCFFile, + outputSegmentsVCFFile); + } + @Override - public void traverse() { + public Object onTraversalSuccess() { generateIntervalsVCFFileFromAllShards(); generateSegmentsVCFFileFromAllShards(); + + logger.info("PostprocessGermlineCNVCalls complete."); + + return null; } private void generateIntervalsVCFFileFromAllShards() { @@ -305,6 +314,7 @@ private void generateIntervalsVCFFileFromAllShards() { refAutosomalIntegerCopyNumberState, allosomalContigSet); germlineCNVIntervalVariantComposer.composeVariantContextHeader(getDefaultToolVCFHeaderLines()); + logger.info(String.format("Writing intervals VCF file to %s...", outputIntervalsVCFFile.getAbsolutePath())); for (int shardIndex = 0; shardIndex < numShards; shardIndex++) { logger.info(String.format("Analyzing shard %d...", shardIndex)); germlineCNVIntervalVariantComposer.writeAll(getShardIntervalCopyNumberPosteriorData(shardIndex)); @@ -427,40 +437,37 @@ private static File getIntervalFileFromShardDirectory(final File shardPath) { } private void generateSegmentsVCFFileFromAllShards() { - if (outputSegmentsVCFFile != null) { - logger.info("Generating segments VCF file..."); - - /* perform segmentation */ - final File pythonScriptOutputPath = IOUtils.createTempDir("gcnv-segmented-calls"); - final boolean pythonScriptSucceeded = executeSegmentGermlineCNVCallsPythonScript( - sampleIndex, contigPloidyCallsPath, sortedCallsShardPaths, sortedModelShardPaths, - pythonScriptOutputPath); - if (!pythonScriptSucceeded) { - throw new UserException("Python return code was non-zero."); - } - - /* parse segments */ - final File copyNumberSegmentsFile = getCopyNumberSegmentsFile(pythonScriptOutputPath, sampleIndex); - final IntegerCopyNumberSegmentCollection integerCopyNumberSegmentCollection = - new IntegerCopyNumberSegmentCollection(copyNumberSegmentsFile); - final String sampleNameFromSegmentCollection = integerCopyNumberSegmentCollection - .getMetadata().getSampleName(); - Utils.validate(sampleNameFromSegmentCollection.equals(sampleName), - String.format("Sample name found in the header of copy-number segments file is " + - "different from the expected sample name (found: %s, expected: %s).", - sampleNameFromSegmentCollection, sampleName)); - - /* write variants */ - final VariantContextWriter segmentsVCFWriter = createVCFWriter(outputSegmentsVCFFile); - final GermlineCNVSegmentVariantComposer germlineCNVSegmentVariantComposer = - new GermlineCNVSegmentVariantComposer(segmentsVCFWriter, sampleName, - refAutosomalIntegerCopyNumberState, allosomalContigSet); - germlineCNVSegmentVariantComposer.composeVariantContextHeader(getDefaultToolVCFHeaderLines()); - germlineCNVSegmentVariantComposer.writeAll(integerCopyNumberSegmentCollection.getRecords()); - segmentsVCFWriter.close(); - } else { - logger.info("No segments output VCF file was provided -- skipping segmentation."); + logger.info("Generating segments VCF file..."); + + /* perform segmentation */ + final File pythonScriptOutputPath = IOUtils.createTempDir("gcnv-segmented-calls"); + final boolean pythonScriptSucceeded = executeSegmentGermlineCNVCallsPythonScript( + sampleIndex, inputContigPloidyCallsPath, sortedCallsShardPaths, sortedModelShardPaths, + pythonScriptOutputPath); + if (!pythonScriptSucceeded) { + throw new UserException("Python return code was non-zero."); } + + /* parse segments */ + final File copyNumberSegmentsFile = getCopyNumberSegmentsFile(pythonScriptOutputPath, sampleIndex); + final IntegerCopyNumberSegmentCollection integerCopyNumberSegmentCollection = + new IntegerCopyNumberSegmentCollection(copyNumberSegmentsFile); + final String sampleNameFromSegmentCollection = integerCopyNumberSegmentCollection + .getMetadata().getSampleName(); + Utils.validate(sampleNameFromSegmentCollection.equals(sampleName), + String.format("Sample name found in the header of copy-number segments file is " + + "different from the expected sample name (found: %s, expected: %s).", + sampleNameFromSegmentCollection, sampleName)); + + /* write variants */ + logger.info(String.format("Writing segments VCF file to %s...", outputSegmentsVCFFile.getAbsolutePath())); + final VariantContextWriter segmentsVCFWriter = createVCFWriter(outputSegmentsVCFFile); + final GermlineCNVSegmentVariantComposer germlineCNVSegmentVariantComposer = + new GermlineCNVSegmentVariantComposer(segmentsVCFWriter, sampleName, + refAutosomalIntegerCopyNumberState, allosomalContigSet); + germlineCNVSegmentVariantComposer.composeVariantContextHeader(getDefaultToolVCFHeaderLines()); + germlineCNVSegmentVariantComposer.writeAll(integerCopyNumberSegmentCollection.getRecords()); + segmentsVCFWriter.close(); } /** @@ -486,13 +493,13 @@ private static boolean executeSegmentGermlineCNVCallsPythonScript(final int samp final PythonScriptExecutor executor = new PythonScriptExecutor(true); final List arguments = new ArrayList<>(); arguments.add("--ploidy_calls_path"); - arguments.add(contigPloidyCallsPath.getAbsolutePath()); + arguments.add(CopyNumberArgumentValidationUtils.getCanonicalPath(contigPloidyCallsPath)); arguments.add("--model_shards"); - arguments.addAll(sortedModelDirectories.stream().map(File::getAbsolutePath).collect(Collectors.toList())); + arguments.addAll(sortedModelDirectories.stream().map(CopyNumberArgumentValidationUtils::getCanonicalPath).collect(Collectors.toList())); arguments.add("--calls_shards"); - arguments.addAll(sortedCallDirectories.stream().map(File::getAbsolutePath).collect(Collectors.toList())); + arguments.addAll(sortedCallDirectories.stream().map(CopyNumberArgumentValidationUtils::getCanonicalPath).collect(Collectors.toList())); arguments.add("--output_path"); - arguments.add(pythonScriptOutputPath.getAbsolutePath()); + arguments.add(CopyNumberArgumentValidationUtils.getCanonicalPath(pythonScriptOutputPath)); arguments.add("--sample_index"); arguments.add(String.valueOf(sampleIndex)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java index b47c93bf70b..85be6adc559 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/PreprocessIntervals.java @@ -5,7 +5,6 @@ import htsjdk.samtools.util.IntervalList; import org.apache.commons.math3.util.FastMath; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; @@ -121,7 +120,7 @@ public final class PreprocessIntervals extends GATKTool { fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) - private File outputFile; + private File outputPreprocessedIntervalsFile; @Override public boolean requiresReference() { @@ -130,16 +129,17 @@ public boolean requiresReference() { @Override public void onTraversalStart() { + validateArguments(); + } + + @Override + public Object onTraversalSuccess() { final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary(); - final List inputIntervals; - if (hasUserSuppliedIntervals()) { - CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(intervalArgumentCollection); - inputIntervals = intervalArgumentCollection.getIntervals(sequenceDictionary); - } else { - // if the user didn't add any intervals, we assume that they wanted to do whole genome sequencing - inputIntervals = IntervalUtils.getAllIntervalsForReference(sequenceDictionary); - } + final List inputIntervals = hasUserSuppliedIntervals() + ? intervalArgumentCollection.getIntervals(sequenceDictionary) + : IntervalUtils.getAllIntervalsForReference(sequenceDictionary); // if the user didn't add any intervals, + // we assume that they wanted to do whole genome sequencing logger.info("Padding intervals..."); final IntervalList paddedIntervalList = padIntervals(inputIntervals, padding, sequenceDictionary); @@ -151,8 +151,19 @@ public void onTraversalStart() { final ReferenceDataSource reference = ReferenceDataSource.of(referenceArguments.getReferencePath()); final IntervalList bins = filterBinsContainingOnlyNs(unfilteredBins, reference); - logger.info(String.format("Writing bins to %s...", outputFile)); - bins.write(outputFile); + logger.info(String.format("Writing bins to %s...", outputPreprocessedIntervalsFile.getAbsolutePath())); + bins.write(outputPreprocessedIntervalsFile); + + logger.info("PreprocessIntervals complete."); + + return null; + } + + private void validateArguments() { + if (hasUserSuppliedIntervals()) { + CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(intervalArgumentCollection); + } + CopyNumberArgumentValidationUtils.validateOutputFiles(outputPreprocessedIntervalsFile); } private static IntervalList padIntervals(final List inputIntervals, final int padding, final SAMSequenceDictionary sequenceDictionary) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/CopyNumberArgumentValidationUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/CopyNumberArgumentValidationUtils.java index 80dc8f82571..f00937f4366 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/CopyNumberArgumentValidationUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/CopyNumberArgumentValidationUtils.java @@ -6,6 +6,7 @@ import htsjdk.samtools.util.Locatable; import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AbstractLocatableCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleCountCollection; @@ -15,8 +16,10 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval; import org.broadinstitute.hellbender.utils.*; import org.broadinstitute.hellbender.utils.io.IOUtils; +import org.broadinstitute.hellbender.utils.python.PythonScriptExecutor; import java.io.File; +import java.io.IOException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -184,4 +187,82 @@ public static AnnotatedIntervalCollection validateAnnotatedIntervalsSubset(final "Annotated intervals do not contain all specified intervals."); return new AnnotatedIntervalCollection(locatableCollection.getMetadata(), subsetAnnotatedIntervals); } + + /** + * Validate that input files and/or directories are readable if they are not {@code null} (i.e., optional inputs). + */ + public static void validateInputs(final File ... inputs) { + if (inputs != null) { + for (final File input : inputs) { + if (input != null) { + if (input.isFile()) { + IOUtils.canReadFile(input); + } else if (input.isDirectory() && !input.canRead()) { + throw new UserException.CouldNotReadInputFile(input); + } + } + } + } + } + + /** + * Validate that output files are writeable, whether or not they already exist. + */ + public static void validateOutputFiles(final File ... outputs) { + Utils.nonNull(outputs); + for (final File output : outputs) { + Utils.nonNull(output); + if ((output.exists() && !output.canWrite()) || (!output.exists() && !output.getAbsoluteFile().getParentFile().canWrite())) { + throw new UserException.CouldNotCreateOutputFile(output, ": The output file is not writeable."); + } + } + } + + /** + * Validate that output directories are writeable. If a directory does not exist, create it. + */ + public static void validateAndPrepareOutputDirectories(final File ... outputs) { + Utils.nonNull(outputs); + for (final File output : outputs) { + Utils.nonNull(output); + if (output.exists()) { + if (!output.canWrite()) { + throw new UserException.CouldNotCreateOutputFile(output, ": The output directory is not writeable."); + } + } else { + try { + IOUtils.createDirectory(output.getAbsolutePath()); + } catch (final IOException e) { + throw new UserException.CouldNotCreateOutputFile(output, ": The output directory does not exist and could not be created."); + } + } + } + } + + /** + * File paths that are passed to {@link PythonScriptExecutor} must be canonical (rather than absolute). + * See https://github.com/broadinstitute/gatk/issues/4724. + */ + public static String getCanonicalPath(final File file) { + Utils.nonNull(file); + try { + return file.getCanonicalPath(); + } catch (final IOException e) { + throw new UserException.BadInput(String.format("Could not resolve a canonical file path: %s", file)); + } + } + + /** + * File paths that are passed to {@link PythonScriptExecutor} must be canonical (rather than absolute). + * See https://github.com/broadinstitute/gatk/issues/4724. + */ + public static String getCanonicalPath(final String filename) { + Utils.nonEmpty(filename); + return getCanonicalPath(new File(filename)); + } + + public static String addTrailingSlashIfNecessary(final String outputDir) { + Utils.nonEmpty(outputDir); + return outputDir.endsWith(File.separator) ? outputDir : outputDir + File.separator; + } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCNVHybridADVIArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCNVHybridADVIArgumentCollection.java index b0b05b00cd8..4a63fb1e958 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCNVHybridADVIArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCNVHybridADVIArgumentCollection.java @@ -28,7 +28,7 @@ public final class GermlineCNVHybridADVIArgumentCollection extends HybridADVIArg defaultValue.put(HybridADVIArgument.MAX_CALLING_ITERS, 10); defaultValue.put(HybridADVIArgument.CALLER_UPDATE_CONVERGENCE_THRESHOLD, 0.001); defaultValue.put(HybridADVIArgument.CALLER_INTERNAL_ADMIXING_RATE, 0.75); - defaultValue.put(HybridADVIArgument.CALLER_EXTERNAL_ADMIXING_RATE, 1.00); + defaultValue.put(HybridADVIArgument.CALLER_EXTERNAL_ADMIXING_RATE, 1.0); defaultValue.put(HybridADVIArgument.DISABLE_ANNEALING, false); defaultValue.put(HybridADVIArgument.DISABLE_CALLER, false); defaultValue.put(HybridADVIArgument.DISABLE_SAMPLER, false); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java index 89f9ef21b67..cffad07d934 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java @@ -26,6 +26,7 @@ public final class GermlineCallingArgumentCollection implements Serializable { "is set to the contig integer ploidy)", fullName = P_ALT_LONG_NAME, minValue = 0., + maxValue = 1., optional = true ) private double pAlt = 1e-6; @@ -35,6 +36,7 @@ public final class GermlineCallingArgumentCollection implements Serializable { "copy-number states are equally likely to be called).", fullName = P_ACTIVE_LONG_NAME, minValue = 0., + maxValue = 1., optional = true ) private double pActive = 1e-2; @@ -77,15 +79,14 @@ public List generatePythonArguments(final GermlineCNVCaller.RunMode runM } public void validate() { - ParamUtils.inRange(pAlt, 0.0, 1.0, - "Prior probability of alternative copy-number states must be between 0 and 1."); - ParamUtils.inRange(pActive, 0.0, 1.0, - "Prior probability of treating an interval as CNV-active must be between 0 and 1."); ParamUtils.isPositive(cnvCoherenceLength, - "Coherence length of CNV events must be positive."); + String.format("Coherence length of CNV events (%s) must be positive.", + CNV_COHERENCE_LENGTH_LONG_NAME)); ParamUtils.isPositive(classCoherenceLength, - "Coherence length of CNV class domains must be positive."); + String.format("Coherence length of CNV class domains (%s) must be positive.", + CLASS_COHERENCE_LENGTH_LONG_NAME)); ParamUtils.isPositive(maxCopyNumber, - "Highest allowed copy-number must be positive."); + String.format("Highest allowed copy-number (%s) must be positive.", + MAX_COPY_NUMBER_LONG_NAME)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineContigPloidyModelArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineContigPloidyModelArgumentCollection.java index 604537e30e9..fd7cd7f6d93 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineContigPloidyModelArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineContigPloidyModelArgumentCollection.java @@ -33,6 +33,7 @@ public final class GermlineContigPloidyModelArgumentCollection implements Serial doc = "Typical mapping error rate.", fullName = MAPPING_ERROR_RATE_LONG_NAME, minValue = 0., + maxValue = 1., optional = true ) private double mappingErrorRate = 0.01; @@ -68,13 +69,17 @@ public List generatePythonArguments(final DetermineGermlineContigPloidy. public void validate() { ParamUtils.isPositive(meanBiasStandardDeviation, - "Prior standard deviation of the contig-level mean coverage bias must be positive."); + String.format("Prior standard deviation of the contig-level mean coverage bias (%s) must be positive.", + MEAN_BIAS_STANDARD_DEVIATION_LONG_NAME)); ParamUtils.isPositive(mappingErrorRate, - "Typical mapping error rate must be positive."); + String.format("Typical mapping error rate (%s) must be positive.", + MAPPING_ERROR_RATE_LONG_NAME)); ParamUtils.isPositive(globalPsiScale, - "Prior scale of contig coverage unexplained variance must be positive."); + String.format("Prior scale of contig coverage unexplained variance (%s) must be positive.", + GLOBAL_PSI_SCALE_LONG_NAME)); ParamUtils.isPositive(samplePsiScale, - "Prior scale of the sample-specific correction to the coverage unexplained variance " + - "must be positive."); + String.format("Prior scale of the sample-specific correction to the coverage unexplained variance " + + "(%s) must be positive.", + SAMPLE_PSI_SCALE_LONG_NAME)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java index 94cd41a1b5e..2c00f60bc14 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java @@ -2,7 +2,6 @@ import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.hellbender.tools.copynumber.GermlineCNVCaller; -import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.param.ParamUtils; import java.io.Serializable; @@ -53,6 +52,7 @@ public enum CopyNumberPosteriorExpectationMode { doc = "Typical mapping error rate.", fullName = MAPPING_ERROR_RATE_LONG_NAME, minValue = 0., + maxValue = 1., optional = true ) private double mappingErrorRate = 0.01; @@ -101,7 +101,7 @@ public enum CopyNumberPosteriorExpectationMode { @Argument( doc = "Number of bins used to represent the GC-bias curves.", fullName = NUM_GC_BINS_LONG_NAME, - minValue = 1, + minValue = 2, optional = true ) private int numGCBins = 20; @@ -170,26 +170,33 @@ public List generatePythonArguments(final GermlineCNVCaller.RunMode runM public void validate() { ParamUtils.isPositive(maxBiasFactors, - "Maximum number of bias factors must be positive."); + String.format("Maximum number of bias factors (%s) must be positive.", + MAX_BIAS_FACTORS_LONG_NAME)); ParamUtils.isPositive(mappingErrorRate, - "Mapping error rate must be positive."); + String.format("Mapping error rate (%s) must be positive.", + MAPPING_ERROR_RATE_LONG_NAME)); ParamUtils.isPositive(intervalPsiScale, - "Typical scale of interval-specific unexplained variance must be positive."); + String.format("Typical scale of interval-specific unexplained variance (%s) must be positive.", + INTERVAL_PSI_SCALE_LONG_NAME)); ParamUtils.isPositive(samplePsiScale, - "Typical scale of sample-specific correction to the unexplained variance must be positive."); + String.format("Typical scale of sample-specific correction to the unexplained variance (%s) must be positive.", + SAMPLE_PSI_SCALE_LONG_NAME)); ParamUtils.isPositive(depthCorrectionTau, - "Precision of read depth pinning to its global value must be positive."); + String.format("Precision of read depth pinning to its global value (%s) must be positive.", + DEPTH_CORRECTION_TAU_LONG_NAME)); ParamUtils.isPositive(logMeanBiasStandardDeviation, - "Standard deviation of log mean bias must be positive."); + String.format("Standard deviation of log mean bias (%s) must be positive.", + LOG_MEAN_BIAS_STANDARD_DEVIATION_LONG_NAME)); ParamUtils.isPositive(initARDRelUnexplainedVariance, - "Initial value of ARD prior precision relative to the scale of " + - "interval-specific unexplained variance must be positive."); - Utils.validateArg(numGCBins >= 2, - "Number of bins used to represent the GC-bias curves must be at least 2."); + String.format("Initial value of ARD prior precision relative to the scale of " + + "interval-specific unexplained variance (%s) must be positive.", + INIT_ARD_REL_UNEXPLAINED_VARIANCE_LONG_NAME)); ParamUtils.isPositive(gcCurveStandardDeviation, - "Prior standard deviation of the GC curve from flat must be positive."); + String.format("Prior standard deviation of the GC curve from flat (%s) must be positive.", + GC_CURVE_STANDARD_DEVIATION_LONG_NAME)); ParamUtils.isPositiveOrZero(activeClassPaddingHybridMode, - "CNV-active interval padding in HYBRID copy-number posterior expectation mode must " + - "be non-negative."); + String.format("CNV-active interval padding in HYBRID copy-number posterior expectation mode (%s) must " + + "be non-negative.", + ACTIVE_CLASS_PADDING_HYBRID_MODE_LONG_NAME)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/HybridADVIArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/HybridADVIArgumentCollection.java index 379a38ea804..6152a3854da 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/HybridADVIArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/HybridADVIArgumentCollection.java @@ -1,7 +1,6 @@ package org.broadinstitute.hellbender.tools.copynumber.arguments; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.hellbender.utils.Utils; import java.io.Serializable; import java.util.ArrayList; @@ -77,27 +76,27 @@ public enum HybridADVIArgument { minValue = 0. ) private double learningRate = - (Double)getDefaultValue(HybridADVIArgument.LEARNING_RATE); + (Double) getDefaultValue(HybridADVIArgument.LEARNING_RATE); @Argument( doc="Adamax optimizer first moment estimation forgetting factor.", fullName = ADAMAX_BETA_1_LONG_NAME, optional = true, minValue = 0., - maxValue = 1.0 + maxValue = 1. ) private double adamaxBeta1 = - (Double)getDefaultValue(HybridADVIArgument.ADAMAX_BETA_1); + (Double) getDefaultValue(HybridADVIArgument.ADAMAX_BETA_1); @Argument( doc="Adamax optimizer second moment estimation forgetting factor.", fullName = ADAMAX_BETA_2_LONG_NAME, optional = true, minValue = 0., - maxValue = 1.0 + maxValue = 1. ) private double adamaxBeta2 = - (Double)getDefaultValue(HybridADVIArgument.ADAMAX_BETA_2); + (Double) getDefaultValue(HybridADVIArgument.ADAMAX_BETA_2); @Argument( doc="Log emission samples drawn per round of sampling.", @@ -106,7 +105,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int logEmissionSamplesPerRound = - (Integer)getDefaultValue(HybridADVIArgument.LOG_EMISSION_SAMPLES_PER_ROUND); + (Integer) getDefaultValue(HybridADVIArgument.LOG_EMISSION_SAMPLES_PER_ROUND); @Argument( doc="Maximum tolerated median relative error in log emission sampling.", @@ -115,7 +114,7 @@ public enum HybridADVIArgument { minValue = 0 ) private double logEmissionMedianRelError = - (Double)getDefaultValue(HybridADVIArgument.LOG_EMISSION_SAMPLING_MEDIAN_REL_ERROR); + (Double) getDefaultValue(HybridADVIArgument.LOG_EMISSION_SAMPLING_MEDIAN_REL_ERROR); @Argument( doc="Log emission maximum sampling rounds.", @@ -124,7 +123,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int logEmissionSamplingRounds = - (Integer)getDefaultValue(HybridADVIArgument.LOG_EMISSION_SAMPLING_ROUNDS); + (Integer) getDefaultValue(HybridADVIArgument.LOG_EMISSION_SAMPLING_ROUNDS); @Argument( doc="Maximum ADVI iterations in the first epoch.", @@ -133,7 +132,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int maxADVIItersFirstEpoch = - (Integer)getDefaultValue(HybridADVIArgument.MAX_ADVI_ITER_FIRST_EPOCH); + (Integer) getDefaultValue(HybridADVIArgument.MAX_ADVI_ITER_FIRST_EPOCH); @Argument( doc="Maximum ADVI iterations in subsequent epochs.", @@ -142,7 +141,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int maxADVIItersSubsequentEpochs = - (Integer)getDefaultValue(HybridADVIArgument.MAX_ADVI_ITER_SUBSEQUENT_EPOCHS); + (Integer) getDefaultValue(HybridADVIArgument.MAX_ADVI_ITER_SUBSEQUENT_EPOCHS); @Argument( doc="Minimum number of training epochs.", @@ -151,7 +150,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int minTrainingEpochs = - (Integer)getDefaultValue(HybridADVIArgument.MIN_TRAINING_EPOCHS); + (Integer) getDefaultValue(HybridADVIArgument.MIN_TRAINING_EPOCHS); @Argument( doc="Maximum number of training epochs.", @@ -160,7 +159,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int maxTrainingEpochs = - (Integer)getDefaultValue(HybridADVIArgument.MAX_TRAINING_EPOCHS); + (Integer) getDefaultValue(HybridADVIArgument.MAX_TRAINING_EPOCHS); @Argument( doc="Initial temperature (for DA-ADVI).", @@ -169,7 +168,7 @@ public enum HybridADVIArgument { minValue = 0 ) private double initialTemperature = - (Double)getDefaultValue(HybridADVIArgument.INITIAL_TEMPERATURE); + (Double) getDefaultValue(HybridADVIArgument.INITIAL_TEMPERATURE); @Argument( doc="Number of thermal ADVI iterations (for DA-ADVI).", @@ -178,7 +177,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int numThermalADVIIters = - (Integer)getDefaultValue(HybridADVIArgument.NUM_THERMAL_ADVI_ITERS); + (Integer) getDefaultValue(HybridADVIArgument.NUM_THERMAL_ADVI_ITERS); @Argument( doc="Averaging window for calculating training signal-to-noise ratio (SNR) for convergence checking.", @@ -187,7 +186,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int convergenceSNRAveragingWindow = - (Integer)getDefaultValue(HybridADVIArgument.CONVERGENCE_SNR_AVERAGING_WINDOW); + (Integer) getDefaultValue(HybridADVIArgument.CONVERGENCE_SNR_AVERAGING_WINDOW); @Argument( doc="The SNR threshold to be reached before triggering the convergence countdown.", @@ -196,7 +195,7 @@ public enum HybridADVIArgument { minValue = 0 ) private double convergenceSNRTriggerThreshold = - (Double)getDefaultValue(HybridADVIArgument.CONVERGENCE_SNR_TRIGGER_THRESHOLD); + (Double) getDefaultValue(HybridADVIArgument.CONVERGENCE_SNR_TRIGGER_THRESHOLD); @Argument( doc="The number of ADVI iterations during which the SNR is required to stay below the set threshold " + @@ -206,7 +205,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int convergenceSNRCountdownWindow = - (Integer)getDefaultValue(HybridADVIArgument.CONVERGENCE_SNR_COUNTDOWN_WINDOW); + (Integer) getDefaultValue(HybridADVIArgument.CONVERGENCE_SNR_COUNTDOWN_WINDOW); @Argument( doc="Maximum number of internal self-consistency iterations within each calling step.", @@ -215,7 +214,7 @@ public enum HybridADVIArgument { minValue = 0 ) private int maxCallingIters = - (Integer)getDefaultValue(HybridADVIArgument.MAX_CALLING_ITERS); + (Integer) getDefaultValue(HybridADVIArgument.MAX_CALLING_ITERS); @Argument( doc="Maximum tolerated calling update size for convergence.", @@ -224,7 +223,7 @@ public enum HybridADVIArgument { minValue = 0 ) private double callerUpdateConvergenceThreshold = - (Double)getDefaultValue(HybridADVIArgument.CALLER_UPDATE_CONVERGENCE_THRESHOLD); + (Double) getDefaultValue(HybridADVIArgument.CALLER_UPDATE_CONVERGENCE_THRESHOLD); @Argument( doc="Admixing ratio of new and old called posteriors (between 0 and 1; larger values implies using " + @@ -234,7 +233,7 @@ public enum HybridADVIArgument { minValue = 0 ) private double callerInternalAdmixingRate = - (Double)getDefaultValue(HybridADVIArgument.CALLER_INTERNAL_ADMIXING_RATE); + (Double) getDefaultValue(HybridADVIArgument.CALLER_INTERNAL_ADMIXING_RATE); @Argument( doc="Admixing ratio of new and old called posteriors (between 0 and 1; larger values implies using " + @@ -244,7 +243,7 @@ public enum HybridADVIArgument { minValue = 0 ) private double callerExternalAdmixingRate = - (Double)getDefaultValue(HybridADVIArgument.CALLER_EXTERNAL_ADMIXING_RATE); + (Double) getDefaultValue(HybridADVIArgument.CALLER_EXTERNAL_ADMIXING_RATE); @Argument( doc="(advanced) Disable sampler.", @@ -252,7 +251,7 @@ public enum HybridADVIArgument { optional = true ) private boolean disableSampler = - (Boolean)getDefaultValue(HybridADVIArgument.DISABLE_SAMPLER); + (Boolean) getDefaultValue(HybridADVIArgument.DISABLE_SAMPLER); @Argument( doc="(advanced) Disable caller.", @@ -260,7 +259,7 @@ public enum HybridADVIArgument { optional = true ) private boolean disableCaller = - (Boolean)getDefaultValue(HybridADVIArgument.DISABLE_CALLER); + (Boolean) getDefaultValue(HybridADVIArgument.DISABLE_CALLER); @Argument( doc="(advanced) Disable annealing.", @@ -268,12 +267,12 @@ public enum HybridADVIArgument { optional = true ) private boolean disableAnnealing = - (Boolean)getDefaultValue(HybridADVIArgument.DISABLE_ANNEALING); + (Boolean) getDefaultValue(HybridADVIArgument.DISABLE_ANNEALING); public void validate() { - Utils.validateArg(maxTrainingEpochs >= minTrainingEpochs, - "Maximum number of training epochs must be greater or equal to minimum number of training " + - "epochs."); + //we could validate some of these arguments and fail early, + //but let's assume most users won't mess with them unless they know what they're doing; + //they are validated with assertion statements in the HybridInferenceParameters class in inference_task_base.py } public List generatePythonArguments() { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/denoising/HDF5SVDReadCountPanelOfNormals.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/denoising/HDF5SVDReadCountPanelOfNormals.java index e786f04d379..4290700159e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/denoising/HDF5SVDReadCountPanelOfNormals.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/denoising/HDF5SVDReadCountPanelOfNormals.java @@ -20,7 +20,6 @@ import org.broadinstitute.hellbender.tools.copynumber.CreateReadCountPanelOfNormals; import org.broadinstitute.hellbender.tools.copynumber.utils.HDF5Utils; import org.broadinstitute.hellbender.utils.SimpleInterval; -import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.spark.SparkConverter; @@ -130,7 +129,6 @@ public final class HDF5SVDReadCountPanelOfNormals implements SVDReadCountPanelOf * Otherwise, some operations will hang. */ private HDF5SVDReadCountPanelOfNormals(final HDF5File file) { - Utils.nonNull(file); IOUtils.canReadFile(file.getFile()); this.file = file; sequenceDictionary = new Lazy<>(() -> { @@ -224,7 +222,6 @@ public double[][] getEigensampleVectors() { * version number is not up to date. */ public static HDF5SVDReadCountPanelOfNormals read(final HDF5File file) { - Utils.nonNull(file); IOUtils.canReadFile(file.getFile()); final HDF5SVDReadCountPanelOfNormals pon = new HDF5SVDReadCountPanelOfNormals(file); if (pon.getVersion() < CURRENT_PON_VERSION) { @@ -257,7 +254,7 @@ public static void create(final File outFile, final int maximumChunkSize, final JavaSparkContext ctx) { try (final HDF5File file = new HDF5File(outFile, HDF5File.OpenMode.CREATE)) { - logger.info("Creating " + outFile.getAbsolutePath() + "..."); + logger.info(String.format("Creating read-count panel of normals at %s...", outFile.getAbsolutePath())); final HDF5SVDReadCountPanelOfNormals pon = new HDF5SVDReadCountPanelOfNormals(file); logger.info(String.format("Writing version number (" + PON_VERSION_STRING_FORMAT + ")...", CURRENT_PON_VERSION)); @@ -357,7 +354,7 @@ public static void create(final File outFile, throw new GATKException(String.format("Could not create panel of normals. It may be necessary to use stricter parameters for filtering. " + "For example, use a larger value of %s.", CreateReadCountPanelOfNormals.MINIMUM_INTERVAL_MEDIAN_PERCENTILE_LONG_NAME), exception); } - logger.info(String.format("Read-count panel of normals written to %s.", outFile)); + logger.info(String.format("Read-count panel of normals written to %s.", outFile.getAbsolutePath())); } //PRIVATE WRITERS (write values to HDF5 file) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java index cdd112163d3..037d76e96ba 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/AbstractRecordCollection.java @@ -110,6 +110,7 @@ public final List getRecords() { * Writes the records to file. */ public void write(final File outputFile) { + Utils.nonNull(outputFile); try (final FileWriter writer = new FileWriter(outputFile)) { writer.write(metadata.toHeader().getSAMString()); } catch (final IOException e) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java index ddaf921d132..ece3990b15a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/CalledLegacySegmentCollection.java @@ -1,6 +1,7 @@ package org.broadinstitute.hellbender.tools.copynumber.formats.collections; import org.apache.commons.lang3.StringUtils; +import org.broadinstitute.barclay.utils.Utils; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.CalledCopyRatioSegment; @@ -93,6 +94,7 @@ public CalledLegacySegmentCollection(final File inputFile) { // output of SAM-style header is suppressed @Override public void write(final File outputFile) { + Utils.nonNull(outputFile); try (final RecordWriter recordWriter = new RecordWriter(new FileWriter(outputFile, true))) { recordWriter.writeAllRecords(getRecords()); } catch (final IOException e) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/HDF5SimpleCountCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/HDF5SimpleCountCollection.java index 15dbf5a7be4..a9ae637c4c2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/HDF5SimpleCountCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/HDF5SimpleCountCollection.java @@ -64,7 +64,7 @@ public final class HDF5SimpleCountCollection { * Otherwise, some operations will hang. */ HDF5SimpleCountCollection(final HDF5File file) { - Utils.nonNull(file, "The input file cannot be null."); + Utils.nonNull(file); this.file = file; sampleName = new Lazy<>(() -> file.readStringArray(SAMPLE_NAME_PATH)[0]); sequenceDictionary = new Lazy<>(() -> { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/LegacySegmentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/LegacySegmentCollection.java index e1f733a5116..85e44635511 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/LegacySegmentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/collections/LegacySegmentCollection.java @@ -4,6 +4,7 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata; import org.broadinstitute.hellbender.tools.copynumber.formats.records.LegacySegment; import org.broadinstitute.hellbender.utils.SimpleInterval; +import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.tsv.DataLine; import org.broadinstitute.hellbender.utils.tsv.TableColumnCollection; @@ -78,6 +79,7 @@ public LegacySegmentCollection(final SampleLocatableMetadata metadata, // output of SAM-style header is suppressed @Override public void write(final File outputFile) { + Utils.nonNull(outputFile); try (final RecordWriter recordWriter = new RecordWriter(new FileWriter(outputFile, true))) { recordWriter.writeAllRecords(getRecords()); } catch (final IOException e) { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/MultidimensionalModeller.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/MultidimensionalModeller.java index 5d78ba76be3..b3db7e09243 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/MultidimensionalModeller.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/models/MultidimensionalModeller.java @@ -191,9 +191,9 @@ public void writeModelParameterFiles(final File copyRatioParameterFile, Utils.nonNull(copyRatioParameterFile); Utils.nonNull(alleleFractionParameterFile); ensureModelIsFit(); - logger.info("Writing posterior summaries for copy-ratio global parameters to " + copyRatioParameterFile); + logger.info(String.format("Writing posterior summaries for copy-ratio global parameters to %s...", copyRatioParameterFile.getAbsolutePath())); copyRatioModeller.getGlobalParameterDeciles().write(copyRatioParameterFile); - logger.info("Writing posterior summaries for allele-fraction global parameters to " + alleleFractionParameterFile); + logger.info(String.format("Writing posterior summaries for allele-fraction global parameters to %s...", alleleFractionParameterFile.getAbsolutePath())); alleleFractionModeller.getGlobalParameterDeciles().write(alleleFractionParameterFile); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotDenoisedCopyRatios.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotDenoisedCopyRatios.java index e8c85451509..f13b1fed58a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotDenoisedCopyRatios.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotDenoisedCopyRatios.java @@ -2,19 +2,17 @@ import htsjdk.samtools.SAMSequenceDictionary; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; -import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.copynumber.DenoiseReadCounts; +import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils; import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberStandardArgument; import org.broadinstitute.hellbender.tools.copynumber.formats.collections.CopyRatioCollection; import org.broadinstitute.hellbender.utils.R.RScriptExecutor; import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.io.Resource; import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; @@ -46,7 +44,7 @@ * *
  • * Output directory. - * This must be a pre-existing directory. + * This will be created if it does not exist. *
  • * * @@ -110,6 +108,7 @@ public final class PlotDenoisedCopyRatios extends CommandLineProgram { @Argument( doc = PlottingUtils.MINIMUM_CONTIG_LENGTH_DOC_STRING, fullName = PlottingUtils.MINIMUM_CONTIG_LENGTH_LONG_NAME, + minValue = 0, optional = true ) private int minContigLength = PlottingUtils.DEFAULT_MINIMUM_CONTIG_LENGTH; @@ -121,15 +120,15 @@ public final class PlotDenoisedCopyRatios extends CommandLineProgram { private String outputPrefix; @Argument( - doc = "Output directory.", + doc = "Output directory. This will be created if it does not exist.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) - private String outputDir; + private File outputDir; @Override protected Object doWork() { - checkRegularReadableUserFiles(); + validateArguments(); logger.info("Reading and validating input files..."); final CopyRatioCollection standardizedCopyRatios = new CopyRatioCollection(inputStandardizedCopyRatiosFile); @@ -150,25 +149,26 @@ protected Object doWork() { PlottingUtils.validateContigs(contigLengthMap, standardizedCopyRatios, inputStandardizedCopyRatiosFile, logger); PlottingUtils.validateContigs(contigLengthMap, denoisedCopyRatios, inputDenoisedCopyRatiosFile, logger); - logger.info("Generating plots..."); + logger.info(String.format("Writing plots to %s...", outputDir.getAbsolutePath())); final List contigNames = new ArrayList<>(contigLengthMap.keySet()); final List contigLengths = new ArrayList<>(contigLengthMap.values()); writeDenoisingPlots(sampleName, contigNames, contigLengths); - return "SUCCESS"; + logger.info("PlotDenoisedCopyRatios complete."); + + return null; } - private void checkRegularReadableUserFiles() { - Utils.nonNull(outputPrefix); - IOUtils.canReadFile(inputStandardizedCopyRatiosFile); - IOUtils.canReadFile(inputDenoisedCopyRatiosFile); - IOUtils.canReadFile(inputSequenceDictionaryFile); - if (!new File(outputDir).exists()) { - throw new UserException(String.format("Output directory %s does not exist.", outputDir)); - } + private void validateArguments() { + CopyNumberArgumentValidationUtils.validateInputs( + inputStandardizedCopyRatiosFile, + inputDenoisedCopyRatiosFile, + inputSequenceDictionaryFile); + Utils.nonEmpty(outputPrefix); + CopyNumberArgumentValidationUtils.validateAndPrepareOutputDirectories(outputDir); } - private String getSampleName(final CopyRatioCollection standardizedCopyRatios, + private static String getSampleName(final CopyRatioCollection standardizedCopyRatios, final CopyRatioCollection denoisedCopyRatios) { Utils.validateArg(standardizedCopyRatios.getMetadata().equals(denoisedCopyRatios.getMetadata()), "Metadata in input files must be identical."); @@ -185,7 +185,7 @@ private void writeDenoisingPlots(final String sampleName, final List contigLengths) { final String contigNamesArg = contigNames.stream().collect(Collectors.joining(PlottingUtils.CONTIG_DELIMITER)); //names separated by delimiter final String contigLengthsArg = contigLengths.stream().map(Object::toString).collect(Collectors.joining(PlottingUtils.CONTIG_DELIMITER)); //names separated by delimiter - final String outputDirArg = PlottingUtils.addTrailingSlashIfNecessary(outputDir); + final String outputDirArg = CopyNumberArgumentValidationUtils.addTrailingSlashIfNecessary(CopyNumberArgumentValidationUtils.getCanonicalPath(outputDir)); final RScriptExecutor executor = new RScriptExecutor(); @@ -195,8 +195,8 @@ private void writeDenoisingPlots(final String sampleName, //--args is needed for Rscript to recognize other arguments properly executor.addArgs("--args", "--sample_name=" + sampleName, - "--standardized_copy_ratios_file=" + inputStandardizedCopyRatiosFile, - "--denoised_copy_ratios_file=" + inputDenoisedCopyRatiosFile, + "--standardized_copy_ratios_file=" + CopyNumberArgumentValidationUtils.getCanonicalPath(inputStandardizedCopyRatiosFile), + "--denoised_copy_ratios_file=" + CopyNumberArgumentValidationUtils.getCanonicalPath(inputDenoisedCopyRatiosFile), "--contig_names=" + contigNamesArg, "--contig_lengths=" + contigLengthsArg, "--output_dir=" + outputDirArg, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegments.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegments.java index ec84df6ad55..4af23627d0a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegments.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegments.java @@ -2,13 +2,11 @@ import htsjdk.samtools.SAMSequenceDictionary; import org.broadinstitute.barclay.argparser.Argument; -import org.broadinstitute.barclay.argparser.BetaFeature; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup; -import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.tools.copynumber.DenoiseReadCounts; import org.broadinstitute.hellbender.tools.copynumber.ModelSegments; import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils; @@ -21,7 +19,6 @@ import org.broadinstitute.hellbender.tools.copynumber.formats.records.ModeledSegment; import org.broadinstitute.hellbender.utils.R.RScriptExecutor; import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.io.Resource; import org.broadinstitute.hellbender.utils.reference.ReferenceUtils; @@ -59,7 +56,7 @@ * *
  • * Output directory. - * This must be a pre-existing directory. + * This will be created if it does not exist. *
  • * * @@ -113,6 +110,7 @@ @DocumentedFeature public final class PlotModeledSegments extends CommandLineProgram { private static final String PLOT_MODELED_SEGMENTS_R_SCRIPT = "PlotModeledSegments.R"; + private static final String MODELED_SEGMENTS_PLOT_FILE_SUFFIX = ".modeled.png"; @Argument( doc = "Input file containing denoised copy ratios (output of DenoiseReadCounts).", @@ -143,7 +141,9 @@ public final class PlotModeledSegments extends CommandLineProgram { @Argument( doc = PlottingUtils.MINIMUM_CONTIG_LENGTH_DOC_STRING, - fullName = PlottingUtils.MINIMUM_CONTIG_LENGTH_LONG_NAME + fullName = PlottingUtils.MINIMUM_CONTIG_LENGTH_LONG_NAME, + minValue = 0, + optional = true ) private int minContigLength = PlottingUtils.DEFAULT_MINIMUM_CONTIG_LENGTH; @@ -154,11 +154,11 @@ public final class PlotModeledSegments extends CommandLineProgram { private String outputPrefix; @Argument( - doc = "Output directory.", + doc = "Output directory. This will be created if it does not exist.", fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) - private String outputDir; + private File outputDir; //read input files private CopyRatioCollection denoisedCopyRatios; @@ -167,7 +167,7 @@ public final class PlotModeledSegments extends CommandLineProgram { @Override protected Object doWork() { - checkRegularReadableUserFiles(); + validateArguments(); logger.info("Reading and validating input files..."); denoisedCopyRatios = inputDenoisedCopyRatiosFile == null ? null : new CopyRatioCollection(inputDenoisedCopyRatiosFile); @@ -201,28 +201,29 @@ protected Object doWork() { PlottingUtils.validateContigs(contigLengthMap, allelicCounts, inputAllelicCountsFile, logger); PlottingUtils.validateContigs(contigLengthMap, modeledSegments, inputModeledSegmentsFile, logger); - logger.info("Generating plot..."); final List contigNames = new ArrayList<>(contigLengthMap.keySet()); final List contigLengths = new ArrayList<>(contigLengthMap.values()); - writeModeledSegmentsPlot(sampleName, contigNames, contigLengths); - return "SUCCESS"; + final File outputFile = new File(outputDir, outputPrefix + MODELED_SEGMENTS_PLOT_FILE_SUFFIX); + + logger.info(String.format("Writing plot to %s...", outputFile.getAbsolutePath())); + writeModeledSegmentsPlot(sampleName, contigNames, contigLengths, outputFile); + + logger.info("PlotModeledSegments complete."); + + return null; } - private void checkRegularReadableUserFiles() { - Utils.nonNull(outputPrefix); + private void validateArguments() { Utils.validateArg(!(inputDenoisedCopyRatiosFile == null && inputAllelicCountsFile == null), "Must provide at least a denoised-copy-ratios file or an allelic-counts file."); - if (inputDenoisedCopyRatiosFile != null) { - IOUtils.canReadFile(inputDenoisedCopyRatiosFile); - } - if (inputAllelicCountsFile != null) { - IOUtils.canReadFile(inputAllelicCountsFile); - } - IOUtils.canReadFile(inputModeledSegmentsFile); - IOUtils.canReadFile(inputSequenceDictionaryFile); - if (!new File(outputDir).exists()) { - throw new UserException(String.format("Output directory %s does not exist.", outputDir)); - } + + CopyNumberArgumentValidationUtils.validateInputs( + inputDenoisedCopyRatiosFile, + inputAllelicCountsFile, + inputModeledSegmentsFile, + inputSequenceDictionaryFile); + Utils.nonEmpty(outputPrefix); + CopyNumberArgumentValidationUtils.validateAndPrepareOutputDirectories(outputDir); } private String getSampleName() { @@ -267,10 +268,10 @@ private void validateNumPointsPerContig() { */ private void writeModeledSegmentsPlot(final String sampleName, final List contigNames, - final List contigLengths) { + final List contigLengths, + final File outputFile) { final String contigNamesArg = contigNames.stream().collect(Collectors.joining(PlottingUtils.CONTIG_DELIMITER)); //names separated by delimiter final String contigLengthsArg = contigLengths.stream().map(Object::toString).collect(Collectors.joining(PlottingUtils.CONTIG_DELIMITER)); //lengths separated by delimiter - final String outputDirArg = PlottingUtils.addTrailingSlashIfNecessary(outputDir); final RScriptExecutor executor = new RScriptExecutor(); //this runs the R statement "source("CNVPlottingLibrary.R")" before the main script runs @@ -279,13 +280,12 @@ private void writeModeledSegmentsPlot(final String sampleName, //--args is needed for Rscript to recognize other arguments properly executor.addArgs("--args", "--sample_name=" + sampleName, - "--denoised_copy_ratios_file=" + inputDenoisedCopyRatiosFile, - "--allelic_counts_file=" + inputAllelicCountsFile, - "--modeled_segments_file=" + inputModeledSegmentsFile, + "--denoised_copy_ratios_file=" + (inputDenoisedCopyRatiosFile == null ? null : CopyNumberArgumentValidationUtils.getCanonicalPath(inputDenoisedCopyRatiosFile)), + "--allelic_counts_file=" + (inputAllelicCountsFile == null ? null : CopyNumberArgumentValidationUtils.getCanonicalPath(inputAllelicCountsFile)), + "--modeled_segments_file=" + CopyNumberArgumentValidationUtils.getCanonicalPath(inputModeledSegmentsFile), "--contig_names=" + contigNamesArg, "--contig_lengths=" + contigLengthsArg, - "--output_dir=" + outputDirArg, - "--output_prefix=" + outputPrefix); + "--output_file=" + CopyNumberArgumentValidationUtils.getCanonicalPath(outputFile)); executor.exec(); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlottingUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlottingUtils.java index 88ae4500b58..e601a190f2f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlottingUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlottingUtils.java @@ -100,9 +100,4 @@ static void validateSequenceDictionarySubset(final SAMSequenceDictionary sequenc String.format("Sequence dictionary (%s) must be a subset of those contained in other input files (%s).", sequencesSubset, sequences)); } - - static String addTrailingSlashIfNecessary(final String outputDir) { - Utils.nonEmpty(outputDir); - return outputDir.endsWith(File.separator) ? outputDir : outputDir + File.separator; - } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/CopyNumberUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/CopyNumberUtils.java deleted file mode 100644 index e423b464119..00000000000 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/utils/CopyNumberUtils.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.broadinstitute.hellbender.tools.copynumber.utils; - -import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.utils.python.PythonScriptExecutor; - -import java.io.File; -import java.io.IOException; - -public final class CopyNumberUtils { - private CopyNumberUtils() {} - - /** - * File paths that are passed to {@link PythonScriptExecutor} must be canonical (rather than absolute). - * See https://github.com/broadinstitute/gatk/issues/4724. - */ - public static String getCanonicalPath(final File file) { - Utils.nonNull(file); - try { - return file.getCanonicalPath(); - } catch (final IOException e) { - throw new UserException.BadInput(String.format("Could not resolve a canonical file path: %s", file)); - } - } - - /** - * File paths that are passed to {@link PythonScriptExecutor} must be canonical (rather than absolute). - * See https://github.com/broadinstitute/gatk/issues/4724. - */ - public static String getCanonicalPath(final String filename) { - Utils.nonEmpty(filename); - return getCanonicalPath(new File(filename)); - } -} diff --git a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegments.R b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegments.R index 18323c363ef..f9d93f3ec63 100644 --- a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegments.R +++ b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegments.R @@ -11,8 +11,7 @@ make_option(c("--allelic_counts_file", "-allelic_counts_file"), dest="allelic_co make_option(c("--modeled_segments_file", "-modeled_segments_file"), dest="modeled_segments_file", action="store"), make_option(c("--contig_names", "-contig_names"), dest="contig_names", action="store"), #string with elements separated by "CONTIG_DELIMITER" make_option(c("--contig_lengths", "-contig_lengths"), dest="contig_lengths", action="store"), #string with elements separated by "CONTIG_DELIMITER" -make_option(c("--output_dir", "-output_dir"), dest="output_dir", action="store"), -make_option(c("--output_prefix", "-output_prefix"), dest="output_prefix", action="store")) +make_option(c("--output_file", "-output_file"), dest="output_file", action="store")) opt = parse_args(OptionParser(option_list=option_list)) sample_name = opt[["sample_name"]] @@ -21,8 +20,7 @@ allelic_counts_file = opt[["allelic_counts_file"]] modeled_segments_file = opt[["modeled_segments_file"]] contig_names_string = opt[["contig_names"]] contig_lengths_string = opt[["contig_lengths"]] -output_dir = opt[["output_dir"]] -output_prefix = opt[["output_prefix"]] +output_file = opt[["output_file"]] #check that required input files exist; if not, quit with error code that GATK will pick up if (!file.exists(modeled_segments_file)) { @@ -38,12 +36,11 @@ contig_starts = c(0, head(contig_ends, -1)) WriteModeledSegmentsPlot = function(sample_name, allelic_counts_file, denoised_copy_ratios_file, modeled_segments_file, contig_names, contig_lengths, output_dir, output_prefix) { modeled_segments_df = ReadTSV(modeled_segments_file) - plot_file = file.path(output_dir, paste(output_prefix, ".modeled.png", sep="")) num_plots = ifelse(all(file.exists(c(denoised_copy_ratios_file, allelic_counts_file))), 2, 1) - png(plot_file, 12, 3.5 * num_plots, units="in", type="cairo", res=300, bg="white") + png(output_file, 12, 3.5 * num_plots, units="in", type="cairo", res=300, bg="white") par(mfrow=c(num_plots, 1), cex=0.75, las=1) - if (file.exists(denoised_copy_ratios_file) && denoised_copy_ratios_file!="null") { + if (file.exists(denoised_copy_ratios_file) && denoised_copy_ratios_file != "null") { denoised_copy_ratios_df = ReadTSV(denoised_copy_ratios_file) #transform to linear copy ratio @@ -56,7 +53,7 @@ WriteModeledSegmentsPlot = function(sample_name, allelic_counts_file, denoised_c PlotCopyRatiosWithModeledSegments(denoised_copy_ratios_df, modeled_segments_df, contig_names, contig_starts) } - if (file.exists(allelic_counts_file) && allelic_counts_file!="null") { + if (file.exists(allelic_counts_file) && allelic_counts_file != "null") { allelic_counts_df = ReadTSV(allelic_counts_file) SetUpPlot(sample_name, "alternate-allele fraction", 0, 1.0, "contig", contig_names, contig_starts, contig_ends, TRUE) @@ -66,7 +63,7 @@ WriteModeledSegmentsPlot = function(sample_name, allelic_counts_file, denoised_c dev.off() #check for created file and quit with error code if not found - if (!file.exists(plot_file)) { + if (!file.exists(output_file)) { quit(save="no", status=1, runLast=FALSE) } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java index bb0e7fcfbda..bffbf75a537 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/CallCopyRatioSegmentsIntegrationTest.java @@ -32,7 +32,7 @@ public void testCallSegments() { Assert.assertEquals(calledCopyRatioSegments.getRecords().stream().map(s -> s.getCall().getOutputString()).toArray(), new String[] {"+", "-", "0", "0"}); // Test writing the legacy format. Note that reading cannot be done through the CNV tools, since the header has been stripped away. - final File legacySegmentFile = CallCopyRatioSegments.createCalledLegacyOutputFilename(outputFile); + final File legacySegmentFile = CallCopyRatioSegments.createCalledLegacySegmentsFile(outputFile); Assert.assertTrue(legacySegmentFile.exists()); Assert.assertTrue(legacySegmentFile.length() > 0); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/DetermineGermlineContigPloidyIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/DetermineGermlineContigPloidyIntegrationTest.java index 00654766837..147f95f47b3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/DetermineGermlineContigPloidyIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/DetermineGermlineContigPloidyIntegrationTest.java @@ -18,56 +18,52 @@ * */ public final class DetermineGermlineContigPloidyIntegrationTest extends CommandLineProgramTest { - private static final String gCNVSimDataDir = toolsTestDir + "copynumber/gcnv-sim-data/"; - private static final File testContigPloidyPriorFile = - new File(gCNVSimDataDir, "contig_ploidy_prior.tsv"); - private static final File[] testCountFiles = IntStream.range(0, 20) - .mapToObj(n -> new File(gCNVSimDataDir, String.format("SAMPLE_%03d_counts.tsv", n))) + private static final String GCNV_SIM_DATA_DIR = toolsTestDir + "copynumber/gcnv-sim-data/"; + private static final File TEST_CONTIG_PLOIDY_PRIOR_FILE = + new File(GCNV_SIM_DATA_DIR, "contig_ploidy_prior.tsv"); + private static final File[] TEST_COUNT_FILES = IntStream.range(0, 20) + .mapToObj(n -> new File(GCNV_SIM_DATA_DIR, String.format("SAMPLE_%03d_counts.tsv", n))) .toArray(File[]::new); - private final File tempOutputDir = createTempDir("test-ploidy"); + private static final File OUTPUT_DIR = createTempDir("test-ploidy"); @Test(groups = {"python"}) public void testCohort() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - Arrays.stream(testCountFiles).forEach(argsBuilder::addInput); + Arrays.stream(TEST_COUNT_FILES).forEach(argsBuilder::addInput); argsBuilder.addFileArgument(DetermineGermlineContigPloidy.CONTIG_PLOIDY_PRIORS_FILE_LONG_NAME, - testContigPloidyPriorFile) - .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) - .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-cohort") - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + TEST_CONTIG_PLOIDY_PRIOR_FILE) + .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) + .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-cohort"); runCommandLine(argsBuilder); } @Test(groups = {"python"}, expectedExceptions = UserException.BadInput.class) public void testCohortWithoutContigPloidyPriors() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - Arrays.stream(testCountFiles).forEach(argsBuilder::addInput); - argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) - .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-cohort") - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + Arrays.stream(TEST_COUNT_FILES).forEach(argsBuilder::addInput); + argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) + .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-cohort"); runCommandLine(argsBuilder); } @Test(groups = {"python"}, expectedExceptions = UserException.BadInput.class) public void testCohortWithSingleSample() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - argsBuilder.addInput(testCountFiles[0]); - argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) - .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-cohort") - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + argsBuilder.addInput(TEST_COUNT_FILES[0]); + argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) + .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-cohort"); runCommandLine(argsBuilder); } @Test(groups = {"python"}, expectedExceptions = IllegalArgumentException.class) public void testCohortDuplicateFiles() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - Arrays.stream(testCountFiles).forEach(argsBuilder::addInput); - argsBuilder.addInput(testCountFiles[0]); //duplicate + Arrays.stream(TEST_COUNT_FILES).forEach(argsBuilder::addInput); + argsBuilder.addInput(TEST_COUNT_FILES[0]); //duplicate argsBuilder.addFileArgument(DetermineGermlineContigPloidy.CONTIG_PLOIDY_PRIORS_FILE_LONG_NAME, - testContigPloidyPriorFile) - .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) - .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-cohort") - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + TEST_CONTIG_PLOIDY_PRIOR_FILE) + .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) + .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-cohort"); runCommandLine(argsBuilder); } @@ -77,26 +73,24 @@ public void testCohortDuplicateFiles() { @Test(groups = {"python"}, dependsOnMethods = "testCohort") public void testCase() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - Arrays.stream(testCountFiles, 0, 5).forEach(argsBuilder::addInput); - argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) + Arrays.stream(TEST_COUNT_FILES, 0, 5).forEach(argsBuilder::addInput); + argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-case") .addArgument(CopyNumberStandardArgument.MODEL_LONG_NAME, - new File(tempOutputDir, "test-ploidy-cohort-model").getAbsolutePath()) - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + new File(OUTPUT_DIR, "test-ploidy-cohort-model").getAbsolutePath()); runCommandLine(argsBuilder); } @Test(groups = {"python"}, dependsOnMethods = "testCohort", expectedExceptions = UserException.BadInput.class) public void testCaseWithContigPloidyPrior() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - Arrays.stream(testCountFiles, 0, 5).forEach(argsBuilder::addInput); - argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) + Arrays.stream(TEST_COUNT_FILES, 0, 5).forEach(argsBuilder::addInput); + argsBuilder.addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) .addFileArgument(DetermineGermlineContigPloidy.CONTIG_PLOIDY_PRIORS_FILE_LONG_NAME, - testContigPloidyPriorFile) + TEST_CONTIG_PLOIDY_PRIOR_FILE) .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-ploidy-case") .addArgument(CopyNumberStandardArgument.MODEL_LONG_NAME, - new File(tempOutputDir, "test-ploidy-cohort-model").getAbsolutePath()) - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + new File(OUTPUT_DIR, "test-ploidy-cohort-model").getAbsolutePath()); runCommandLine(argsBuilder); } } \ No newline at end of file diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java index 02a98fea5ab..210e1cff9bd 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java @@ -16,13 +16,13 @@ * Integration tests for {@link GermlineCNVCaller}. */ public final class GermlineCNVCallerIntegrationTest extends CommandLineProgramTest { - private static final String gCNVSimDataDir = toolsTestDir + "copynumber/gcnv-sim-data/"; - private static final File[] testCountFiles = IntStream.range(0, 20) - .mapToObj(n -> new File(gCNVSimDataDir + String.format("SAMPLE_%03d_counts.tsv", n))) + private static final String GCNV_SIM_DATA_DIR = toolsTestDir + "copynumber/gcnv-sim-data/"; + private static final File[] TEST_COUNT_FILES = IntStream.range(0, 20) + .mapToObj(n -> new File(GCNV_SIM_DATA_DIR + String.format("SAMPLE_%03d_counts.tsv", n))) .toArray(File[]::new); - private static final File contigPloidyCallsOutputDir = new File(gCNVSimDataDir + "contig-ploidy-calls/"); - private static final File simIntervalListSubsetFile = new File(gCNVSimDataDir + "sim_intervals_subset.interval_list"); - private final File tempOutputDir = createTempDir("test-germline-cnv"); + private static final File CONTIG_PLOIDY_CALLS_OUTPUT_DIR = new File(GCNV_SIM_DATA_DIR + "contig-ploidy-calls/"); + private static final File SIM_INTERVAL_LIST_SUBSET_FILE = new File(GCNV_SIM_DATA_DIR + "sim_intervals_subset.interval_list"); + private static final File OUTPUT_DIR = createTempDir("test-germline-cnv"); /** * Run the tool in the COHORT mode for all 20 samples on a small subset of intervals @@ -30,15 +30,14 @@ public final class GermlineCNVCallerIntegrationTest extends CommandLineProgramTe @Test(groups = {"python"}) public void testCohortWithoutIntervalAnnotations() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - Arrays.stream(testCountFiles).forEach(argsBuilder::addInput); + Arrays.stream(TEST_COUNT_FILES).forEach(argsBuilder::addInput); argsBuilder.addArgument(GermlineCNVCaller.RUN_MODE_LONG_NAME, GermlineCNVCaller.RunMode.COHORT.name()) - .addArgument("L", simIntervalListSubsetFile.getAbsolutePath()) + .addArgument("L", SIM_INTERVAL_LIST_SUBSET_FILE.getAbsolutePath()) .addArgument(GermlineCNVCaller.CONTIG_PLOIDY_CALLS_DIRECTORY_LONG_NAME, - contigPloidyCallsOutputDir.getAbsolutePath()) - .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) + CONTIG_PLOIDY_CALLS_OUTPUT_DIR.getAbsolutePath()) + .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-germline-cnv-cohort") - .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString()) - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + .addArgument(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString()); runCommandLine(argsBuilder); } @@ -49,28 +48,26 @@ public void testCohortWithoutIntervalAnnotations() { @Test(groups = {"python"}, dependsOnMethods = "testCohortWithoutIntervalAnnotations") public void testCase() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - Arrays.stream(testCountFiles, 0, 5).forEach(argsBuilder::addInput); + Arrays.stream(TEST_COUNT_FILES, 0, 5).forEach(argsBuilder::addInput); argsBuilder.addArgument(GermlineCNVCaller.RUN_MODE_LONG_NAME, GermlineCNVCaller.RunMode.CASE.name()) .addArgument(GermlineCNVCaller.CONTIG_PLOIDY_CALLS_DIRECTORY_LONG_NAME, - contigPloidyCallsOutputDir.getAbsolutePath()) + CONTIG_PLOIDY_CALLS_OUTPUT_DIR.getAbsolutePath()) .addArgument(CopyNumberStandardArgument.MODEL_LONG_NAME, - new File(tempOutputDir, "test-germline-cnv-cohort-model").getAbsolutePath()) - .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) - .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-germline-cnv-case") - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + new File(OUTPUT_DIR, "test-germline-cnv-cohort-model").getAbsolutePath()) + .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) + .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-germline-cnv-case"); runCommandLine(argsBuilder); } @Test(groups = {"python"}, expectedExceptions = IllegalArgumentException.class) public void testCaseWithoutModel() { final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); - Arrays.stream(testCountFiles, 0, 5).forEach(argsBuilder::addInput); + Arrays.stream(TEST_COUNT_FILES, 0, 5).forEach(argsBuilder::addInput); argsBuilder.addArgument(GermlineCNVCaller.RUN_MODE_LONG_NAME, GermlineCNVCaller.RunMode.CASE.name()) .addArgument(GermlineCNVCaller.CONTIG_PLOIDY_CALLS_DIRECTORY_LONG_NAME, - contigPloidyCallsOutputDir.getAbsolutePath()) - .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, tempOutputDir.getAbsolutePath()) - .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-germline-cnv-case") - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "DEBUG"); + CONTIG_PLOIDY_CALLS_OUTPUT_DIR.getAbsolutePath()) + .addArgument(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) + .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-germline-cnv-case"); runCommandLine(argsBuilder); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegmentsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegmentsIntegrationTest.java index 668a159af06..5f664219295 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegmentsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegmentsIntegrationTest.java @@ -1,7 +1,6 @@ package org.broadinstitute.hellbender.tools.copynumber; import org.broadinstitute.hellbender.CommandLineProgramTest; -import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils; @@ -49,7 +48,6 @@ public void testAllInputsAvailable() { .addArgument(CopyNumberStandardArgument.ALLELIC_COUNTS_FILE_LONG_NAME, TUMOR_ALLELIC_COUNTS_FILE.getAbsolutePath()) .addArgument(CopyNumberStandardArgument.NORMAL_ALLELIC_COUNTS_FILE_LONG_NAME, NORMAL_ALLELIC_COUNTS_FILE.getAbsolutePath()) .addOutput(outputDir) - .addArgument(StandardArgumentDefinitions.VERBOSITY_NAME, "INFO") .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, outputPrefix); runCommandLine(argsBuilder); assertOutputFiles(outputDir, outputPrefix, true, true); @@ -177,18 +175,6 @@ public void testMissingSites() { runCommandLine(argsBuilder); } - @Test(expectedExceptions = UserException.class) - public void testOutputDirExists() { - final String outputPrefix = "test"; - final ArgumentsBuilder argsBuilder = new ArgumentsBuilder() - .addArgument(CopyNumberStandardArgument.DENOISED_COPY_RATIOS_FILE_LONG_NAME, TUMOR_DENOISED_COPY_RATIOS_FILE.getAbsolutePath()) - .addArgument(CopyNumberStandardArgument.ALLELIC_COUNTS_FILE_LONG_NAME, TUMOR_ALLELIC_COUNTS_FILE.getAbsolutePath()) - .addArgument(CopyNumberStandardArgument.NORMAL_ALLELIC_COUNTS_FILE_LONG_NAME, NORMAL_ALLELIC_COUNTS_FILE.getAbsolutePath()) - .addOutput(new File("Non-existent-path")) - .addArgument(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, outputPrefix); - runCommandLine(argsBuilder); - } - @Test(expectedExceptions = UserException.class) public void testNonExistentDenoisedCopyRatiosFile() { final File outputDir = createTempDir("testDir"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/PostprocessGermlineCNVCallsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/PostprocessGermlineCNVCallsIntegrationTest.java index a18eacb8028..dbecf3afa5c 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/PostprocessGermlineCNVCallsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/PostprocessGermlineCNVCallsIntegrationTest.java @@ -51,19 +51,18 @@ public final class PostprocessGermlineCNVCallsIntegrationTest extends CommandLin new File(TEST_SUB_DIR, "segments_output_SAMPLE_002.vcf")); private static final int NUM_TEST_SAMPLES = 3; - private static final int TEST_CALLS_MAX_COPY_NUMBER = 5; /** * Runs {@link PostprocessGermlineCNVCalls} for a single sample. If {@code segmentsOutputVCF} is null, * the tool will only generate intervals VCF output (which is the expected behavior). */ - public void runToolForSingleSample(final List callShards, - final List modelShards, - final int sampleIndex, - final File intervalsOutputVCF, - final File segmentsOutputVCF, - final List allosomalContigs, - final int refAutosomalCopyNumber) { + private void runToolForSingleSample(final List callShards, + final List modelShards, + final int sampleIndex, + final File intervalsOutputVCF, + final File segmentsOutputVCF, + final List allosomalContigs, + final int refAutosomalCopyNumber) { final ArgumentsBuilder argumentsBuilder = new ArgumentsBuilder(); argumentsBuilder.addArgument(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false"); @@ -88,7 +87,6 @@ public void runToolForSingleSample(final List callShards, argumentsBuilder.addArgument(PostprocessGermlineCNVCalls.OUTPUT_SEGMENTS_VCF_LONG_NAME, segmentsOutputVCF.getAbsolutePath()); } - argumentsBuilder.addArgument("verbosity", "DEBUG"); runCommandLine(argumentsBuilder.getArgsList()); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotDenoisedCopyRatiosIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotDenoisedCopyRatiosIntegrationTest.java index a762ff7b508..1feb73ec59d 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotDenoisedCopyRatiosIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotDenoisedCopyRatiosIntegrationTest.java @@ -75,18 +75,6 @@ public void testMinimumContigLength() { runCommandLine(arguments); } - @Test(expectedExceptions = UserException.class) - public void testOutputDirExists() { - final String[] arguments = { - "--" + CopyNumberStandardArgument.STANDARDIZED_COPY_RATIOS_FILE_LONG_NAME, STANDARDIZED_COPY_RATIOS_FILE.getAbsolutePath(), - "--" + CopyNumberStandardArgument.DENOISED_COPY_RATIOS_FILE_LONG_NAME, DENOISED_COPY_RATIOS_FILE.getAbsolutePath(), - "-" + StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, SEQUENCE_DICTIONARY_FILE.getAbsolutePath(), - "-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "Non-existent-path", - "--" + CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, OUTPUT_PREFIX - }; - runCommandLine(arguments); - } - @Test(expectedExceptions = UserException.class) public void testNonExistentStandardizedFile() { final File outputDir = createTempDir("testDir"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegmentsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegmentsIntegrationTest.java index 576e17d7a94..e0131b864b1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegmentsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/plotting/PlotModeledSegmentsIntegrationTest.java @@ -96,19 +96,6 @@ public void testMinimumContigLength() { runCommandLine(arguments); } - @Test(expectedExceptions = UserException.class) - public void testOutputDirExists() { - final String[] arguments = { - "--" + CopyNumberStandardArgument.DENOISED_COPY_RATIOS_FILE_LONG_NAME, DENOISED_COPY_RATIOS_FILE.getAbsolutePath(), - "--" + CopyNumberStandardArgument.ALLELIC_COUNTS_FILE_LONG_NAME, ALLELIC_COUNTS_FILE.getAbsolutePath(), - "--" + CopyNumberStandardArgument.SEGMENTS_FILE_LONG_NAME, MODELED_SEGMENTS_FILE.getAbsolutePath(), - "--" + StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, SEQUENCE_DICTIONARY_FILE.getAbsolutePath(), - "-" + StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "Non-existent-path", - "--" + CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, OUTPUT_PREFIX - }; - runCommandLine(arguments); - } - @Test(expectedExceptions = UserException.class) public void testNonExistentDenoisedCopyRatiosFile() { final File outputDir = createTempDir("testDir");