forked from AlexsLemonade/OpenPBTA-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run-prepare-cn.sh
executable file
·110 lines (90 loc) · 4.07 KB
/
run-prepare-cn.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# C. Bethell and C. Savonen for CCDL 2019
# Run focal-cn-file-preparation module
#
# Usage: bash run-prepare-cn.sh
set -e
set -o pipefail
# Run original files - will not by default
RUN_ORIGINAL=${RUN_ORIGINAL:-0}
# Run testing files for circle CI - will not by default
IS_CI=${OPENPBTA_TESTING:-0}
# This script should always run as if it were being called from
# the directory it lives in.
script_directory="$(perl -e 'use File::Basename;
use Cwd "abs_path";
print dirname(abs_path(@ARGV[0]));' -- "$0")"
cd "$script_directory" || exit
scratch_dir=../../scratch
data_dir=../../data
results_dir=../../analyses/focal-cn-file-preparation/results
histologies_file=${data_dir}/pbta-histologies.tsv
gtf_file=${data_dir}/gencode.v27.primary_assembly.annotation.gtf.gz
goi_file=../../analyses/oncoprint-landscape/driver-lists/brain-goi-list-long.txt
independent_specimens_file=${data_dir}/independent-specimens.wgswxs.primary.tsv
# Prep the consensus SEG file data
Rscript --vanilla -e "rmarkdown::render('02-add-ploidy-consensus.Rmd', clean = TRUE)"
# Run snakemake script implementing `bedtools coverage` for each sample bed file in
# `scratch/cytoband_status` -- these files are generated in
# `02-add-ploidy-consensus.Rmd`
# currently runs 10 jobs in parallel, which should be fine for most implementations
snakemake -j 10 --snakefile run-bedtools.snakemake
# Determine the dominant status for each chromosome arm and compare with GISTIC's arm status calls
Rscript --vanilla -e "rmarkdown::render('03-add-cytoband-status-consensus.Rmd', clean = TRUE)"
# Run annotation step for consensus file
Rscript --vanilla 04-prepare-cn-file.R \
--cnv_file ${scratch_dir}/consensus_seg_with_status.tsv \
--gtf_file $gtf_file \
--metadata $histologies_file \
--filename_lead "consensus_seg_annotated_cn" \
--seg
# Define most focal units of recurrent CNVs
Rscript --vanilla -e "rmarkdown::render('05-define-most-focal-cn-units.Rmd', clean = TRUE)"
# Define the recurrent calls
Rscript --vanilla -e "rmarkdown::render('06-find-recurrent-calls.Rmd', clean = TRUE)"
libraryStrategies=("polya" "stranded")
chromosomesType=("autosomes" "x_and_y")
for strategy in ${libraryStrategies[@]}; do
for chromosome_type in ${chromosomesType[@]}; do
Rscript --vanilla rna-expression-validation.R \
--annotated_cnv_file results/consensus_seg_annotated_cn_${chromosome_type}.tsv.gz \
--expression_file ${data_dir}/pbta-gene-expression-rsem-fpkm-collapsed.${strategy}.rds \
--independent_specimens_file $independent_specimens_file \
--metadata $histologies_file \
--goi_list $goi_file \
--filename_lead "consensus_seg_annotated_cn"_${chromosome_type}_${strategy}
done
done
# if we want to process the CNV data from the original callers
# (e.g., CNVkit, ControlFreeC)
if [ "$RUN_ORIGINAL" -gt "0" ]; then
# Prep the CNVkit data
Rscript --vanilla -e "rmarkdown::render('01-add-ploidy-cnvkit.Rmd', clean = TRUE)"
# Run annotation step for CNVkit
Rscript --vanilla 04-prepare-cn-file.R \
--cnv_file ${scratch_dir}/cnvkit_with_status.tsv \
--gtf_file $gtf_file \
--metadata $histologies_file \
--filename_lead "cnvkit_annotated_cn" \
--seg
# Run annotation step for ControlFreeC
Rscript --vanilla 04-prepare-cn-file.R \
--cnv_file ${data_dir}/pbta-cnv-controlfreec.tsv.gz \
--gtf_file $gtf_file \
--metadata $histologies_file \
--filename_lead "controlfreec_annotated_cn" \
--controlfreec
filenameLead=("cnvkit_annotated_cn" "controlfreec_annotated_cn")
for filename in ${filenameLead[@]}; do
for strategy in ${libraryStrategies[@]}; do
for chromosome_type in ${chromosomesType[@]}; do
Rscript --vanilla rna-expression-validation.R \
--annotated_cnv_file results/${filename}_${chromosome_type}.tsv.gz \
--expression_file ${data_dir}/pbta-gene-expression-rsem-fpkm-collapsed.${strategy}.rds \
--independent_specimens_file $independent_specimens_file \
--metadata $histologies_file \
--goi_list $goi_file \
--filename_lead ${filename}_${chromosome_type}_${strategy}
done
done
done
fi