-
Notifications
You must be signed in to change notification settings - Fork 19
/
fetch_sequences.smk
38 lines (28 loc) · 1 KB
/
fetch_sequences.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
This part of the workflow handles fetching sequences from various sources.
Uses `config.sources` to determine which sequences to include in final output.
Currently only fetches sequences from GenBank, but other sources can be
defined in the config. If adding other sources, add a new rule upstream
of rule `fetch_all_sequences` to create the file `data/{source}.ndjson` or the
file must exist as a static file in the repo.
Produces final output as
sequences_ndjson = "data/sequences.ndjson"
"""
rule fetch_from_genbank:
output:
genbank_ndjson="data/genbank.ndjson",
shell:
"""
./bin/fetch-from-genbank > {output.genbank_ndjson}
"""
def _get_all_sources(wildcards):
return [f"data/{source}.ndjson" for source in config["sources"]]
rule fetch_all_sequences:
input:
all_sources=_get_all_sources,
output:
sequences_ndjson="data/sequences.ndjson",
shell:
"""
cat {input.all_sources} > {output.sequences_ndjson}
"""