Skip to content

Commit

Permalink
Cleaned up intermediate files in gCNV WDL and fixed miscellaneous typ…
Browse files Browse the repository at this point in the history
…os. (#5382)
  • Loading branch information
samuelklee committed Mar 4, 2019
1 parent fd1b1c2 commit c65b2c2
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 19 deletions.
14 changes: 8 additions & 6 deletions scripts/cnv_wdl/cnv_common_tasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -313,11 +313,12 @@ task ScatterIntervals {
--SUBDIVISION_MODE INTERVAL_COUNT \
--SCATTER_CONTENT ${num_intervals_per_scatter} \
--OUTPUT ${output_dir_} &&
# output files are named output_dir_/temp_0001_of_N/scattered.interval_list, etc. (N = num_intervals_per_scatter);
# output files are named output_dir_/temp_0001_of_N/scattered.interval_list, etc. (N = number of scatters);
# we rename them as output_dir_/base_filename.scattered.0000.interval_list, etc.
ls ${output_dir_}/*/scattered.interval_list | \
cat -n | \
while read n filename; do mv $filename ${output_dir_}/${base_filename}.scattered.$(printf "%04d" $n).interval_list; done
rm -rf ${output_dir_}/temp_*_of_*
} || {
# if only a single shard is required, then we can just rename the original interval list
>&2 echo "IntervalListTools failed because only a single shard is required. Copying original interval list..."
Expand Down Expand Up @@ -405,21 +406,22 @@ task PostprocessGermlineCNVCalls {
model_args="$model_args --model-shard-path MODEL_$index"
done

mkdir extracted-contig-ploidy-calls
tar xzf ${contig_ploidy_calls_tar} -C extracted-contig-ploidy-calls
mkdir contig-ploidy-calls
tar xzf ${contig_ploidy_calls_tar} -C contig-ploidy-calls

gatk --java-options "-Xmx${command_mem_mb}m" PostprocessGermlineCNVCalls \
$calls_args \
$model_args \
${sep=" " allosomal_contigs_args} \
--autosomal-ref-copy-number ${ref_copy_number_autosomal_contigs} \
--contig-ploidy-calls extracted-contig-ploidy-calls \
--contig-ploidy-calls contig-ploidy-calls \
--sample-index ${sample_index} \
--output-genotyped-intervals ${genotyped_intervals_vcf_filename} \
--output-genotyped-segments ${genotyped_segments_vcf_filename}

rm -r CALLS_*
rm -r MODEL_*
rm -rf CALLS_*
rm -rf MODEL_*
rm -rf contig-ploidy-calls
>>>

runtime {
Expand Down
17 changes: 11 additions & 6 deletions scripts/cnv_wdl/germline/cnv_germline_case_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -278,19 +278,21 @@ task DetermineGermlineContigPloidyCaseMode {
export MKL_NUM_THREADS=${default=8 cpu}
export OMP_NUM_THREADS=${default=8 cpu}

mkdir input-contig-ploidy-model
tar xzf ${contig_ploidy_model_tar} -C input-contig-ploidy-model
mkdir contig-ploidy-model
tar xzf ${contig_ploidy_model_tar} -C contig-ploidy-model

gatk --java-options "-Xmx${command_mem_mb}m" DetermineGermlineContigPloidy \
--input ${sep=" --input " read_count_files} \
--model input-contig-ploidy-model \
--model contig-ploidy-model \
--output ${output_dir_} \
--output-prefix case \
--verbosity DEBUG \
--mapping-error-rate ${default="0.01" mapping_error_rate} \
--sample-psi-scale ${default="0.0001" sample_psi_scale}

tar czf case-contig-ploidy-calls.tar.gz -C ${output_dir_}/case-calls .

rm -rf contig-ploidy-model
>>>

runtime {
Expand Down Expand Up @@ -372,16 +374,16 @@ task GermlineCNVCallerCaseMode {
export MKL_NUM_THREADS=${default=8 cpu}
export OMP_NUM_THREADS=${default=8 cpu}

mkdir contig-ploidy-calls-dir
tar xzf ${contig_ploidy_calls_tar} -C contig-ploidy-calls-dir
mkdir contig-ploidy-calls
tar xzf ${contig_ploidy_calls_tar} -C contig-ploidy-calls

mkdir gcnv-model
tar xzf ${gcnv_model_tar} -C gcnv-model

gatk --java-options "-Xmx${command_mem_mb}m" GermlineCNVCaller \
--run-mode CASE \
--input ${sep=" --input " read_count_files} \
--contig-ploidy-calls contig-ploidy-calls-dir \
--contig-ploidy-calls contig-ploidy-calls \
--model gcnv-model \
--output ${output_dir_} \
--output-prefix case \
Expand Down Expand Up @@ -425,6 +427,9 @@ task GermlineCNVCallerCaseMode {
tar czf case-gcnv-calls-shard-${scatter_index}-sample-$CURRENT_SAMPLE_WITH_LEADING_ZEROS.tar.gz -C ${output_dir_}/case-calls/SAMPLE_$CURRENT_SAMPLE .
let CURRENT_SAMPLE=CURRENT_SAMPLE+1
done

rm -rf contig-ploidy-calls
rm -rf gcnv-model
>>>

runtime {
Expand Down
8 changes: 5 additions & 3 deletions scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -487,14 +487,14 @@ task GermlineCNVCallerCohortMode {
export MKL_NUM_THREADS=${default=8 cpu}
export OMP_NUM_THREADS=${default=8 cpu}

mkdir contig-ploidy-calls-dir
tar xzf ${contig_ploidy_calls_tar} -C contig-ploidy-calls-dir
mkdir contig-ploidy-calls
tar xzf ${contig_ploidy_calls_tar} -C contig-ploidy-calls

gatk --java-options "-Xmx${command_mem_mb}m" GermlineCNVCaller \
--run-mode COHORT \
-L ${intervals} \
--input ${sep=" --input " read_count_files} \
--contig-ploidy-calls contig-ploidy-calls-dir \
--contig-ploidy-calls contig-ploidy-calls \
${"--annotated-intervals " + annotated_intervals} \
--interval-merging-rule OVERLAPPING_ONLY \
--output ${output_dir_} \
Expand Down Expand Up @@ -549,6 +549,8 @@ task GermlineCNVCallerCohortMode {
tar czf ${cohort_entity_id}-gcnv-calls-shard-${scatter_index}-sample-$CURRENT_SAMPLE_WITH_LEADING_ZEROS.tar.gz -C ${output_dir_}/${cohort_entity_id}-calls/SAMPLE_$CURRENT_SAMPLE .
let CURRENT_SAMPLE=CURRENT_SAMPLE+1
done

rm -rf contig-ploidy-calls
>>>

runtime {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
* See {@link CollectAllelicCounts}. This behaves the same, except that it supports spark.
*/
@CommandLineProgramProperties(
summary = "Collects ref/alt counts at sites.",
oneLineSummary = "Collects ref/alt counts at sites.",
summary = "Collects ref/alt counts at sites",
oneLineSummary = "Collects ref/alt counts at sites",
programGroup = CopyNumberProgramGroup.class
)
public class CollectAllelicCountsSpark extends LocusWalkerSpark {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@
* @author Andrey Smirnov <asmirnov@broadinstitute.org>
*/
@CommandLineProgramProperties(
summary = "Postprocesses the output of GermlineCNVCaller and generates VCF files.",
oneLineSummary = "Postprocesses the output of GermlineCNVCaller and generates VCF files.",
summary = "Postprocesses the output of GermlineCNVCaller and generates VCF files",
oneLineSummary = "Postprocesses the output of GermlineCNVCaller and generates VCF files",
programGroup = CopyNumberProgramGroup.class
)
@DocumentedFeature
Expand Down

0 comments on commit c65b2c2

Please sign in to comment.