Skip to content

Commit

Permalink
Shift subtype lookups to config
Browse files Browse the repository at this point in the history
Shifting hardcoded parameters into configs is generally simpler to
reason with and opens the door (ever so slightly) to adding novel
(wildcard) subtypes to the pipeline via config-only modifications.
  • Loading branch information
jameshadfield committed Dec 2, 2024
1 parent dae855d commit 2a19000
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 15 deletions.
17 changes: 2 additions & 15 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -233,19 +233,6 @@ rule files:
files = rules.files.params


def subtypes_by_subtype_wildcard(wildcards):
# TODO - shift this to config
db = {
'h5nx': ['h5n1', 'h5n2', 'h5n3', 'h5n4', 'h5n5', 'h5n6', 'h5n7', 'h5n8', 'h5n9'],
'h5n1': ['h5n1'],
'h7n9': ['h7n9'],
'h9n2': ['h9n2'],
}
db['h5n1-cattle-outbreak'] = [*db['h5nx']]
assert wildcards.subtype in db, (f"Subtype {wildcards.subtype!r} is not defined in the snakemake function "
"`subtypes_by_subtype_wildcard` -- is there a typo in the subtype you are targetting?")
return(db[wildcards.subtype])

rule download_sequences:
output:
sequences = f"data/{S3_SRC.get('name', None)}/sequences_{{segment}}.fasta",
Expand Down Expand Up @@ -291,7 +278,7 @@ rule filter_sequences_by_subtype:
output:
sequences = "results/{subtype}/{segment}/sequences.fasta",
params:
subtypes=subtypes_by_subtype_wildcard,
subtypes=lambda w: config['subtype_lookup'][w.subtype],
shell:
"""
augur filter \
Expand All @@ -307,7 +294,7 @@ rule filter_metadata_by_subtype:
output:
metadata = "results/{subtype}/metadata.tsv",
params:
subtypes=subtypes_by_subtype_wildcard,
subtypes=lambda w: config['subtype_lookup'][w.subtype],
shell:
"""
augur filter \
Expand Down
7 changes: 7 additions & 0 deletions gisaid/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ s3_src:
local_ingest: false
# P.S. To use local ingest files, comment out s3_src and change to local_ingest: fauna

# For subtypes defined as build wildcards (e.g. "h5n1", "h5nx"), list out the subtype values
# that we'll use to filter the starting metadata's 'subtype' column
subtype_lookup:
h5nx: ['h5n1', 'h5n2', 'h5n3', 'h5n4', 'h5n5', 'h5n6', 'h5n7', 'h5n8', 'h5n9']
h5n1: ['h5n1']
h7n9: ['h7n9']
h9n2: ['h9n2']

#### Parameters which control large overarching aspects of the build
target_sequences_per_tree: 3000
Expand Down
4 changes: 4 additions & 0 deletions h5n1-cattle-outbreak/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ s3_src:
local_ingest: false
# P.S. To use local ingest files, comment out s3_src and change to local_ingest: joined-ncbi (e.g.)

# For subtypes defined as build wildcards (i.e. "h5n1-cattle-outbreak"), list out the subtype values
# that we'll use to filter the starting metadata's 'subtype' column
subtype_lookup:
h5n1-cattle-outbreak: ['h5n1', 'h5n2', 'h5n3', 'h5n4', 'h5n5', 'h5n6', 'h5n7', 'h5n8', 'h5n9']

#### Parameters which control large overarching aspects of the build
# Set a high target_sequences_per_tree to capture all circulating strains, as they will be pruned down
Expand Down

0 comments on commit 2a19000

Please sign in to comment.