Skip to content

Commit

Permalink
fix: elevate disk_size params to workflow level
Browse files Browse the repository at this point in the history
  • Loading branch information
a-frantz committed Oct 23, 2024
1 parent fd616f9 commit 61b0dcf
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 11 deletions.
37 changes: 27 additions & 10 deletions workflows/reference/gatk-reference.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,28 @@ workflow gatk_reference {
dbSNP_vcf: "dbSNP VCF file for the reference genome.",
dbSNP_vcf_index: "Index for the dbSNP VCF file for the reference genome.",
interval_list: "List of intervals that will be used when computing variants.",
known_vcfs: "VCF files with known variants to use with variant calling."
known_vcfs: "VCF files with known variants to use with variant calling.",
}
allowNestedInputs: true
}

parameter_meta {
known_vcf_urls: "URLs from which to retrieve VCF files with known variants."
known_vcf_names: "Names of the VCF files with known variants. Order should match that of `known_vcf_urls`."
reference_fa_url: "URL from which to retrieve the reference FASTA file."
reference_fa_name: "Name of the output reference FASTA file."
reference_fa_md5: "MD5 checksum for the reference FASTA file."
dbSNP_vcf_url: "URL from which to retrieve the dbSNP VCF file."
dbSNP_vcf_name: "Name of the dbSNP VCF file."
known_vcf_disk_size_gb: "Disk size (in GB) to allocate for downloading the VCF files with known variants."
reference_fa_disk_size_gb: "Disk size (in GB) to allocate for downloading the reference FASTA file."
dbSNP_vcf_disk_size_gb: "Disk size (in GB) to allocate for downloading the dbSNP VCF file."
dbSNP_vcf_index_url: "URL from which to retrieve the index for the dbSNP VCF file."
dbSNP_vcf_index_name: "Name of the index for the dbSNP VCF file."
known_vcf_urls: "URLs from which to retrieve VCF files with known variants."
known_vcf_names: "Names of the VCF files with known variants. Order should match that of `known_vcf_urls`."
interval_list_url: "URL from which to retrieve the list of intervals to use when computing variants."
interval_list_name: "Name of the list of intervals to use when computing variants."
dbSNP_vcf_index_disk_size_gb: "Disk size (in GB) to allocate for downloading the index for the dbSNP VCF file."
interval_list_disk_size_gb: "Disk size (in GB) to allocate for downloading the list of intervals to use when computing variants."
}

input {
Expand All @@ -43,51 +48,63 @@ workflow gatk_reference {
String dbSNP_vcf_url
#@ except: SnakeCase
String dbSNP_vcf_name
Int known_vcf_disk_size_gb
Int reference_fa_disk_size_gb
#@ except: SnakeCase
Int dbSNP_vcf_disk_size_gb
#@ except: SnakeCase
String? dbSNP_vcf_index_url
#@ except: SnakeCase
String? dbSNP_vcf_index_name
String? interval_list_url
String? interval_list_name
#@ except: SnakeCase
Int? dbSNP_vcf_index_disk_size_gb
Int? interval_list_disk_size_gb
}

call util.download as fasta_download { input:
url = reference_fa_url,
outfile_name = reference_fa_name,
md5sum = reference_fa_md5
md5sum = reference_fa_md5,
disk_size_gb = reference_fa_disk_size_gb,
}

call samtools.faidx { input:
fasta = fasta_download.downloaded_file
fasta = fasta_download.downloaded_file,
}

call picard.create_sequence_dictionary { input:
fasta = fasta_download.downloaded_file
fasta = fasta_download.downloaded_file,
}

call util.download as dbsnp { input:
url = dbSNP_vcf_url,
outfile_name = dbSNP_vcf_name
outfile_name = dbSNP_vcf_name,
disk_size_gb = dbSNP_vcf_disk_size_gb,
}

if (defined(dbSNP_vcf_index_url) && defined(dbSNP_vcf_index_name)) {
call util.download as dbsnp_index { input:
url = select_first([dbSNP_vcf_index_url, "undefined"]),
outfile_name = select_first([dbSNP_vcf_index_name, "undefined"])
outfile_name = select_first([dbSNP_vcf_index_name, "undefined"]),
disk_size_gb = select_first([dbSNP_vcf_index_disk_size_gb, 1]),
}
}

if (defined(interval_list_url) && defined(interval_list_name)) {
call util.download as intervals { input:
url = select_first([interval_list_url, "undefined"]),
outfile_name = select_first([interval_list_name, "undefined"])
outfile_name = select_first([interval_list_name, "undefined"]),
disk_size_gb = select_first([interval_list_disk_size_gb, 1]),
}
}

scatter (pair in zip(known_vcf_urls, known_vcf_names)) {
call util.download as known_vcf { input:
url = pair.left,
outfile_name = pair.right
outfile_name = pair.right,
disk_size_gb = known_vcf_disk_size_gb,
}
}

Expand Down
2 changes: 1 addition & 1 deletion workflows/reference/make-qc-reference.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ workflow make_qc_reference {
"UTR",
]
Boolean protein = false
Int kraken_fastas_disk_size_gb = 10
Int reference_fa_disk_size_gb = 10
Int gtf_disk_size_gb = 10
Int kraken_fastas_disk_size_gb = 10
}

call util.download as reference_download { input:
Expand Down

0 comments on commit 61b0dcf

Please sign in to comment.