Merge pull request #78 from PacificBiosciences/develop

v1.0.3
PacificBiosciences · Oct 20, 2023 · b6a2cd2 · b6a2cd2
2 parents 9b1fd57 + 6daf31b
commit b6a2cd2
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ PacBio WGS Variant Pipeline performs read alignment, variant calling, and phasin
 
 ## Setup
 
-Some tasks and workflows are pulled in from other repositories. Ensure you have initialized submodules following cloning by running `git submodule update --init --recursive`.
+We recommend cloning the repo rather than downloading the release package.  Some tasks and workflows are pulled in from other repositories. Ensure you have initialized submodules following cloning by running `git submodule update --init --recursive`.
 
 ## Resource requirements
 
@@ -117,7 +117,7 @@ A cohort can include one or more samples. Samples need not be related, but if yo
 | :- | :- | :- | :- |
 | String | cohort_id | A unique name for the cohort; used to name outputs | |
 | Array[[Sample](#sample)] | samples | The set of samples for the cohort. At least one sample must be defined. | |
-| Array[String] | phenotypes | [Human Phenotype Ontology (HPO) phenotypes](https://hpo.jax.org/app/) associated with the cohort. If no particular phenotypes are desired, the root HPO term, `HP:0000001`, can be used. | |
+| Array[String] | phenotypes | [Human Phenotype Ontology (HPO) phenotypes](https://hpo.jax.org/app/) associated with the cohort. If no particular phenotypes are desired, the root HPO term, `"HP:0000001"`, can be used. | |
 
 ### [Sample](workflows/humanwgs_structs.wdl)
 
@@ -140,7 +140,7 @@ These files are hosted publicly in each of the cloud backends; see `backends/${b
 
 | Type | Name | Description | Notes |
 | :- | :- | :- | :- |
-| String | name | Reference name; used to name outputs (e.g., "GRCh38") | |
+| String | name | Reference name; used to name outputs (e.g., "GRCh38") | Note: The workflow currently only supports GRCh38 and provides GCA_000001405.15_GRCh38_no_alt_analysis_set. |
 | [IndexData](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl) | fasta | Reference genome and index | |
 | File | tandem_repeat_bed | Tandem repeat locations used by [pbsv](https://github.com/PacificBiosciences/pbsv) to normalize SV representation | |
 | File | trgt_tandem_repeat_bed | Tandem repeat sites to be genotyped by [TRGT](https://github.com/PacificBiosciences/trgt) | |
@@ -176,7 +176,7 @@ These files are hosted publicly in each of the cloud backends; see `backends/${b
 | [DeepVariantModel](https://github.com/PacificBiosciences/wdl-common/blob/main/wdl/structs.wdl)? | deepvariant_model | Optional alternate DeepVariant model file to use | |
 | Int? | pbsv_call_mem_gb | Optionally set RAM (GB) for pbsv_call during cohort analysis | |
 | Int? | glnexus_mem_gb | Optionally set RAM (GB) for GLnexus during cohort analysis | |
-| Boolean? | run_tertiary_analysis | Run the optional tertiary analysis steps \[false\] | |
+| Boolean? | run_tertiary_analysis | Run the optional tertiary analysis steps \[false\] | \[true, false\] |
 | String | backend | Backend where the workflow will be executed | \["Azure", "AWS", "GCP", "HPC"\] |
 | String? | zones | Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'. | <ul><li>[Determining available zones in AWS](backends/aws/README.md#determining-available-zones)</li><li>[Determining available zones in GCP](backends/gcp/README.md#determining-available-zones)</li></ul> |
 | String? | aws_spot_queue_arn | Queue ARN for the spot batch queue; required if backend is set to 'AWS' and `preemptible` is set to `true` | [Determining the AWS queue ARN](backends/aws/README.md#determining-the-aws-batch-queue-arn) |

diff --git a/wdl-ci.config.json b/wdl-ci.config.json
@@ -419,7 +419,7 @@
       "tasks": {
         "pbsv_call": {
           "key": "pbsv_call",
-          "digest": "77yon47d6t327ocrw6bed3dccyq5t3va",
+          "digest": "o5xv2etbm2j4s32d5xs626xj6sp2ykmj",
           "tests": [
             {
               "inputs": {
@@ -457,7 +457,7 @@
       "tasks": {
         "concat_vcf": {
           "key": "concat_vcf",
-          "digest": "ntfiawmetxbdacle2l7mpu5tkz2jmtz2",
+          "digest": "xkyvutmrg3gz6zgabdmwcjvcbwrbwwp7",
           "tests": [
             {
               "inputs": {

diff --git a/workflows/cohort_analysis/cohort_analysis.wdl b/workflows/cohort_analysis/cohort_analysis.wdl
@@ -32,7 +32,9 @@ workflow cohort_analysis {
 		File gvcf_index = gvcf_object.data_index
 	}
 
-	scatter (region_set in pbsv_splits) {
+	scatter (shard_index in range(length(pbsv_splits))) {
+        Array[String] region_set = pbsv_splits[shard_index]
+
 		call PbsvCall.pbsv_call {
 			input:
 				sample_id = cohort_id + ".joint",
@@ -41,6 +43,7 @@ workflow cohort_analysis {
 				reference = reference.fasta.data,
 				reference_index = reference.fasta.data_index,
 				reference_name = reference.name,
+				shard_index = shard_index,
 				regions = region_set,
 				mem_gb = pbsv_call_mem_gb,
 				runtime_attributes = default_runtime_attributes

diff --git a/workflows/sample_analysis/sample_analysis.wdl b/workflows/sample_analysis/sample_analysis.wdl
@@ -68,14 +68,17 @@ workflow sample_analysis {
 			runtime_attributes = default_runtime_attributes
 	}
 
-	scatter (region_set in pbsv_splits) {
+	scatter (shard_index in range(length(pbsv_splits))) {
+        Array[String] region_set = pbsv_splits[shard_index]
+
 		call PbsvCall.pbsv_call {
 			input:
 				sample_id = sample.sample_id,
 				svsigs = pbsv_discover.svsig,
 				reference = reference.fasta.data,
 				reference_index = reference.fasta.data_index,
 				reference_name = reference.name,
+				shard_index = shard_index,
 				regions = region_set,
 				runtime_attributes = default_runtime_attributes
 		}

diff --git a/workflows/wdl-common b/workflows/wdl-common
+8 −1		wdl/tasks/concat_vcf.wdl
+8 −5		wdl/tasks/pbsv_call.wdl
+0 −272		wdl/tasks/pharmcat.wdl
+28 −0		wdl/workflows/pharmcat/inputs.json
+271 −0		wdl/workflows/pharmcat/pharmcat.wdl