-
Notifications
You must be signed in to change notification settings - Fork 10
/
Snakefile-ncov
71 lines (61 loc) · 2.52 KB
/
Snakefile-ncov
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
Snakefile specific for running assembly for the hCoV-19 sequences of
the Seattle Flu Study.
To run:
$ snakemake -k --snakefile Snakefile-ncov --configfile <config_file.json>
"""
import json
# Include the main assembly pipeline
include: "Snakefile-base-flu"
# input function for the rule aggregate
def aggregate_input(wildcards):
"""
Returns input for rule aggregate based on output from checkpoint align_rate.
Set minimum align rate in config under "min_align_rate".
"""
with open(checkpoints.mapped_reads.get(sample=wildcards.sample, reference=wildcards.reference).output[0]) as f:
summary = json.load(f)
all_segments_aligned = summary["all_segments_aligned"]
min_reads = summary["minimum_reads_required"]
mapped = summary["mapped_reads"]
if not all_segments_aligned or mapped <= min_reads:
return rules.not_mapped.output.not_mapped
else:
return rules.fasta_headers.output.masked_consensus
rule all:
input:
# pre_fastqc = expand("summary/pre_trim_fastqc/{fname}_fastqc.html",
# fname=glob.glob(config['fastq_directory']),
post_fastqc = expand("summary/post_trim_fastqc/{sample}.trimmed_{tr}_fastqc.{ext}",
sample=all_ids,
tr=["1P", "1U", "2P", "2U"],
ext=["zip", "html"]),
bamstats = expand("summary/bamstats/{reference}/{sample}.coverage_stats.txt", filtered_product,
sample=all_ids,
reference=all_references),
aggregate = expand("summary/aggregate/{reference}/{sample}.log", filtered_product,
sample=all_ids,
reference=all_references),
rule fasta_headers:
input:
masked_consensus = rules.vcf_to_consensus.output
output:
masked_consensus = "consensus_genomes/{reference}/{sample}.masked_consensus.fasta"
shell:
"""
cat {input.masked_consensus} | \
perl -pi -e 's/(?<=>)[^>|]*(?<=|)/{wildcards.sample}/g' > \
{output.masked_consensus}.temp
awk '{{split(substr($0,2),a,"|"); \
if(a[2]) print ">"a[1]"|"a[1]"-"a[2]"-"a[3]"|"a[2]"|"a[3]; \
else print; }}' \
{output.masked_consensus}.temp > {output.masked_consensus}
rm {output.masked_consensus}.temp
"""
rule aggregate:
input:
aggregate_input = aggregate_input
output:
aggregate_summary = "summary/aggregate/{reference}/{sample}.log"
run:
shell("echo 'Final output: {input.aggregate_input}' > {output}")