-
Notifications
You must be signed in to change notification settings - Fork 19
/
nextclade.smk
70 lines (61 loc) · 1.96 KB
/
nextclade.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
rule nextclade_dataset:
output:
temp("mpxv.zip"),
shell:
"""
nextclade dataset get --name MPXV --output-zip {output}
"""
rule nextclade_dataset_hMPXV:
output:
temp("hmpxv.zip"),
shell:
"""
nextclade dataset get --name hMPXV --output-zip {output}
"""
rule align:
input:
sequences="data/sequences.fasta",
dataset="hmpxv.zip",
output:
alignment="data/alignment.fasta",
insertions="data/insertions.csv",
translations="data/translations.zip",
params:
# The lambda is used to deactivate automatic wildcard expansion.
# https://github.com/snakemake/snakemake/blob/384d0066c512b0429719085f2cf886fdb97fd80a/snakemake/rules.py#L997-L1000
translations=lambda w: "data/translations/{gene}.fasta",
threads: 4
shell:
"""
nextclade run -D {input.dataset} -j {threads} --retry-reverse-complement \
--output-fasta {output.alignment} --output-translations {params.translations} \
--output-insertions {output.insertions} {input.sequences}
zip -rj {output.translations} data/translations
"""
rule nextclade:
input:
sequences="data/sequences.fasta",
dataset="mpxv.zip",
output:
"data/nextclade.tsv",
threads: 4
shell:
"""
nextclade run -D {input.dataset} -j {threads} --output-tsv {output} {input.sequences} --retry-reverse-complement
"""
rule join_metadata_clades:
input:
nextclade="data/nextclade.tsv",
metadata="data/metadata_raw.tsv",
output:
"data/metadata.tsv",
params:
id_field=config["transform"]["id_field"],
shell:
"""
python3 bin/join-metadata-and-clades.py \
--id-field {params.id_field} \
--metadata {input.metadata} \
--nextclade {input.nextclade} \
-o {output}
"""