Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

CNV consensus (3 of n): Filter bad segments #328

Merged
merged 26 commits into from
Dec 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
25a50f5
add step 3 to Snakefile
Dec 11, 2019
14a6e23
Add file
Dec 11, 2019
df8596e
Add files
Dec 11, 2019
7c427eb
add file
Dec 11, 2019
b332ab1
add file
Dec 11, 2019
c22ff1b
add file
Dec 11, 2019
c9ed4b8
Merge branch 'master' into filter_bad_segments
fingerfen Dec 12, 2019
ab23f1f
Merge branch 'master' into filter_bad_segments
fingerfen Dec 13, 2019
531a067
Simplify snakemake
jashapiro Dec 13, 2019
d98069f
spelling fix
jashapiro Dec 13, 2019
85cfc1b
Update analyses/copy_number_consensus_call/Snakefile
fingerfen Dec 16, 2019
66a4618
Merge branch 'filter_bad_segments' into jashapiro/simplify-snakemake
fingerfen Dec 16, 2019
28b2729
Merge pull request #1 from jashapiro/jashapiro/simplify-snakemake
fingerfen Dec 16, 2019
94b4af1
changed file
Dec 16, 2019
5b9a0e6
changed Snakemake
Dec 16, 2019
e2090be
Switch to tabs for filter output
jashapiro Dec 16, 2019
f4978e2
Add "chr" to output files that don't have them
jashapiro Dec 16, 2019
0d2b447
add re.split
Dec 17, 2019
049e179
add black_list generating script
Dec 17, 2019
cdd50a6
Update analyses/copy_number_consensus_call/src/scripts/generate_black…
fingerfen Dec 17, 2019
b465b1c
Merge pull request #2 from jashapiro/jashapiro/entab
fingerfen Dec 17, 2019
76c9806
changed extension to bed
Dec 17, 2019
e00ca9b
Rename bad_chromosomal_seg_updated_merged.txt to bad_chromosomal_seg_…
jashapiro Dec 17, 2019
e6d20f3
Comment line
jashapiro Dec 17, 2019
75cc80a
Merge branch 'master' into filter_bad_segments
jaclyn-taroni Dec 17, 2019
4e5b281
Merge branch 'master' into filter_bad_segments
jaclyn-taroni Dec 17, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 40 additions & 14 deletions analyses/copy_number_consensus_call/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
# Updated Dec 5, 2019

## Define the ending file(s) that we want
ALL_FREEC= expand("../../scratch/interim/{sample}.freec.dup.bed", sample=config["samples"])
ALL_CNVKIT= expand("../../scratch/interim/{sample}.cnvkit.dup.bed", sample=config["samples"])
ALL_MANTA= expand("../../scratch/interim/{sample}.manta.dup.bed", sample=config["samples"])
OUTPUT= expand("../../scratch/interim/{sample}.{caller}.{dupdel}.filtered.bed",
sample=config["samples"],
caller=["freec", "cnvkit", "manta"],
dupdel=["dup", "del"])


## Define the first rule of the Snakefile. This rule determines what the final file is and which steps to be taken.
rule all:
input:
ALL_FREEC + ALL_CNVKIT + ALL_MANTA
OUTPUT


######################### FILTER STEP FOR THE 3 CALL METHODS ####################################
Expand All @@ -33,12 +34,14 @@ rule freec_filter:
## The first awk looks at column 6 to filter out for loss/gain. Then it prints out 6 of the 7 columns above
## The pipe into the second awk filters the CNV length, freec pval, and add in the CNV type
## The last pipe is to sort first digit of chromosome number numerically
"""awk '$6~/loss/ {{print $2,$3,$4,($4-$3 + 1),$5,$9}}' {input.events} """
"""awk '$6~/loss/ {{print "chr"$2,$3,$4,($4-$3 + 1),$5,$9}}' {input.events} """
""" | awk '{{if ($4 > {params.SIZE_CUTOFF} && $6 < {params.FREEC_PVAL}){{print $0,"DEL"}}}}' """
""" | sort -k1,1 -k2,2n > {output.freec_del} && """
"""awk '$6~/gain/ {{print $2,$3,$4,($4-$3 + 1),$5,$9}}' {input.events} """
""" | sort -k1,1 -k2,2n """
""" | tr [:blank:] '\t' > {output.freec_del} && """
"""awk '$6~/gain/ {{print "chr"$2,$3,$4,($4-$3 + 1),$5,$9}}' {input.events} """
""" | awk '{{if ($4 > {params.SIZE_CUTOFF} && $6 < {params.FREEC_PVAL}){{print $0,"DUP"}}}}' """
""" | sort -k1,1 -k2,2n > {output.freec_dup}"""
""" | sort -k1,1 -k2,2n """
""" | tr [:blank:] '\t' > {output.freec_dup}"""

rule cnvkit_filter:
input:
Expand All @@ -58,10 +61,12 @@ rule cnvkit_filter:
## The last pipe is to sort first digit of chromosome number numerically
"""awk '$7<2 {{print $2,$3,$4,($4-$3 + 1),$7,"NA"}}' {input.events} """
""" | awk '{{if ($4 > {params.SIZE_CUTOFF}){{print $0,"DEL"}}}}' """
""" | sort -k1,1 -k2,2n > {output.cnvkit_del} && """
""" | sort -k1,1 -k2,2n """
""" | tr [:blank:] '\t' > {output.cnvkit_del} && """
"""awk '$7>2 {{print $2,$3,$4,($4-$3 + 1),$7,"NA"}}' {input.events} """
""" | awk '{{if ($4 > {params.SIZE_CUTOFF}){{print $0,"DUP"}}}}' """
""" | sort -k1,1 -k2,2n > {output.cnvkit_dup}"""
""" | sort -k1,1 -k2,2n """
""" | tr [:blank:] '\t' > {output.cnvkit_dup}"""

rule manta_filter:
input:
Expand All @@ -79,7 +84,28 @@ rule manta_filter:
## The first awk looks at column 6 to filter out for loss/gain (DEL/DUP). Then it prints out 6 of the 7 columns above. Put NA for both p-value and copy number since MANTA results don't have these values.
## the first awk also filters out for CNV length
## The last pipe is to sort first digit of chromosome number numerically
"""awk '$6~/DEL/ {{if ($5 > {params.SIZE_CUTOFF}) {{print$2,$3,$4,$5,"NA","NA",$6}}}}' {input} """
""" | sort -k1,1 -k2,2n > {output.manta_del} && """
"""awk '$6~/DUP/ {{if ($5 > {params.SIZE_CUTOFF}) {{print$2,$3,$4,$5,"NA","NA",$6}}}}' {input} """
""" | sort -k1,1 -k2,2n > {output.manta_dup}"""
"""awk '$6~/DEL/ {{if ($5 > {params.SIZE_CUTOFF}) {{print "chr"$2,$3,$4,$5,"NA","NA",$6}}}}' {input} """
""" | sort -k1,1 -k2,2n """
""" | tr [:blank:] '\t' > {output.manta_del} && """
"""awk '$6~/DUP/ {{if ($5 > {params.SIZE_CUTOFF}) {{print "chr"$2,$3,$4,$5,"NA","NA",$6}}}}' {input} """
""" | sort -k1,1 -k2,2n """
""" | tr [:blank:] '\t' > {output.manta_dup}"""


rule filter_telomeres:
input:
## Define the location of the input file and take the path/extension from the config file
script=os.path.join(config["scripts"], "get_rid_bad_segments.py"),
bad_list=os.path.join(config["scripts"], "bad_chromosomal_seg_updated_merged.bed"),
bedfile="../../scratch/interim/{sample}.{caller}.{dupdel}.bed"
output:
## Define the output files' names
filtered_bed="../../scratch/interim/{sample}.{caller}.{dupdel}.filtered.bed"
wildcard_constraints:
caller = "cnvkit|freec|manta",
dupdel = "del|dup"
threads: 1
shell:
## Invoke the python3 script and pass in the reference and CNVs files. Direct the stdout to a new file.
"python3 {input.script} --reference {input.bad_list} --file {input.bedfile} > {output.filtered_bed}"

Loading