Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Update MAF fields for v19 SNV consensus #1033

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
18f6811
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 10, 2019
68b0d70
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 11, 2019
dcca700
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 11, 2019
f5a4cb6
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 12, 2019
4e53183
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 16, 2019
3754d70
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 17, 2019
60b78fa
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 20, 2019
ec662bb
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 25, 2019
a2fe2b2
Merge remote-tracking branch 'upstream/master'
jashapiro Sep 26, 2019
f8033df
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 4, 2019
7c4a3f0
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 4, 2019
a3d2bd7
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 4, 2019
d789b22
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 8, 2019
c909570
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 24, 2019
11b3860
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 25, 2019
573451a
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 29, 2019
1ccebd1
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 30, 2019
a47805a
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 30, 2019
fd955e8
Merge remote-tracking branch 'upstream/master'
jashapiro Oct 30, 2019
b258b80
Merge remote-tracking branch 'upstream/master'
jashapiro Nov 1, 2019
ce11b49
Merge remote-tracking branch 'upstream/master'
jashapiro Nov 2, 2019
7abd259
Merge remote-tracking branch 'upstream/master'
jashapiro Nov 4, 2019
9e5d10c
Merge remote-tracking branch 'upstream/master'
jashapiro Nov 5, 2019
0dc681e
Merge remote-tracking branch 'upstream/master'
jashapiro Nov 13, 2019
e40ad57
Merge remote-tracking branch 'upstream/master'
jashapiro Nov 19, 2019
114175b
Merge remote-tracking branch 'upstream/master'
jashapiro Nov 25, 2019
fb36e7e
Merge remote-tracking branch 'upstream/master'
jashapiro Dec 19, 2019
eb4db40
Merge remote-tracking branch 'origin/master'
jashapiro Feb 13, 2020
88aa44c
Merge branch 'master' of github.com:jashapiro/OpenPBTA-analysis
jashapiro May 20, 2020
8ba1ea7
Merge remote-tracking branch 'upstream/master'
jashapiro Apr 11, 2021
ce09075
Merge remote-tracking branch 'upstream/master'
jashapiro Apr 15, 2021
69e4dac
Merge branch 'master' of github.com:AlexsLemonade/OpenPBTA-analysis
jashapiro Apr 26, 2021
905f351
Update MAF data types
jashapiro Apr 26, 2021
efee99d
missed a comma!
jashapiro Apr 26, 2021
b2c8bb9
Skip MAF columns that are not common to all callers
jashapiro Apr 26, 2021
f70f602
remove Entrez from needed_cols
jashapiro Apr 26, 2021
361e40e
Restore Entrez_Gene_Id
jashapiro Apr 26, 2021
720481c
Update consensus comparison
jashapiro Apr 27, 2021
b36884e
Merge branch 'master' into jashapiro/update-SNV-consensus-v19
jashapiro Apr 27, 2021
32d1cd6
Make experimental_strategy a postive filter to deal with changes in c…
jashapiro Apr 28, 2021
7979830
Update caller comparisons
jashapiro Apr 28, 2021
b284ce8
Merge branch 'jashapiro/update-SNV-consensus-v19' of github.com:jasha…
jashapiro Apr 28, 2021
2be9d8a
Merge branch 'master' into jashapiro/update-SNV-consensus-v19
kgaonkar6 Apr 29, 2021
1518686
Merge branch 'master' into jashapiro/update-SNV-consensus-v19
kgaonkar6 May 4, 2021
e6916e0
Merge remote-tracking branch 'upstream/master' into jashapiro/update-…
jashapiro May 4, 2021
9b8429e
Uncomment SNV/TMB modules in CI
jashapiro May 4, 2021
a0c523e
Fix config format
jashapiro May 4, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -225,21 +225,21 @@ jobs:
# name: RNA-Seq composition
# command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/rna-seq-composition/rna-seq-composition.Rmd', clean = TRUE)"

# - run:
# name: TCGA SNV Caller Analysis
# command: ./scripts/run_in_ci.sh bash analyses/snv-callers/run_caller_consensus_analysis-tcga.sh
- run:
name: TCGA SNV Caller Analysis
command: ./scripts/run_in_ci.sh bash analyses/snv-callers/run_caller_consensus_analysis-tcga.sh

# - run:
# name: SNV Caller Analysis
# command: OPENPBTA_VAF_CUTOFF=0.5 ./scripts/run_in_ci.sh bash analyses/snv-callers/run_caller_consensus_analysis-pbta.sh
- run:
name: SNV Caller Analysis
command: OPENPBTA_VAF_CUTOFF=0.5 ./scripts/run_in_ci.sh bash analyses/snv-callers/run_caller_consensus_analysis-pbta.sh

# - run:
# name: Tumor mutation burden with TCGA
# command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/tmb-compare/compare-tcga-pbta.Rmd', clean = TRUE)"
- run:
name: Tumor mutation burden with TCGA
command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/tmb-compare/compare-tcga-pbta.Rmd', clean = TRUE)"

# - run:
# name: Exploration of nonsynonymous filter
# command: ./scripts/run_in_ci.sh bash analyses/snv-callers/explore_variant_classifications/run_explorations.sh
- run:
name: Exploration of nonsynonymous filter
command: ./scripts/run_in_ci.sh bash analyses/snv-callers/explore_variant_classifications/run_explorations.sh

# This analysis was used to explore the TCGA PBTA data when the BED files used to calculate TCGA
# were incorrect https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/568
Expand Down Expand Up @@ -275,9 +275,9 @@ jobs:
# name: d3b TMB code
# command: ./scripts/run_in_ci.sh bash analyses/tmb-compare/TMB_d3b_code/run_tmb_d3b.sh

# - run:
# name: Compare TMB calculations
# command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/tmb-compare/compare-tmb-calculations.Rmd', clean = TRUE)"
- run:
name: Compare TMB calculations
command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/tmb-compare/compare-tmb-calculations.Rmd', clean = TRUE)"

- run:
name: Run survival plots
Expand Down
200 changes: 91 additions & 109 deletions analyses/snv-callers/compare_snv_callers_plots-tcga.nb.html

Large diffs are not rendered by default.

152 changes: 121 additions & 31 deletions analyses/snv-callers/compare_snv_callers_plots.nb.html

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 7 additions & 0 deletions analyses/snv-callers/run_caller_consensus_analysis-pbta.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ vaf_cutoff=${OPENPBTA_VAF_CUTOFF:-0}
run_plots_nb=${OPENPBTA_PLOTS:-0}

################################ Set Up Database ################################
echo "Setting up Database"
python3 analyses/snv-callers/scripts/01-setup_db.py \
--db-file $dbfile \
--strelka-file data/pbta-snv-strelka2.vep.maf.gz \
Expand All @@ -35,19 +36,22 @@ python3 analyses/snv-callers/scripts/01-setup_db.py \
--meta-file data/pbta-histologies.tsv

##################### Merge callers' files into total files ####################
echo "Merging callers"
Rscript analyses/snv-callers/scripts/02-merge_callers.R \
--db_file $dbfile \
--output_file $consensus_file \
--vaf_filter $vaf_cutoff \
--overwrite

########################## Add consensus to db ################################
echo "Adding consensus to database"
python3 analyses/snv-callers/scripts/01-setup_db.py \
--db-file $dbfile \
--consensus-file $consensus_file

############# Create intersection BED files for TMB calculations ###############
# Make All mutations BED files
echo "Making intersection bed files"
bedtools intersect \
-a data/WGS.hg38.strelka2.unpadded.bed \
-b data/WGS.hg38.mutect2.vardict.unpadded.bed \
Expand All @@ -56,6 +60,7 @@ bedtools intersect \
#################### Make coding regions file
# Convert GTF to BED file for use in bedtools
# Here we are only extracting lines with as a CDS i.e. are coded in protein
echo "Making CDS bed file"
gunzip -c data/gencode.v27.primary_assembly.annotation.gtf.gz \
| awk '$3 ~ /CDS/' \
| convert2bed --do-not-sort --input=gtf - \
Expand All @@ -64,6 +69,7 @@ gunzip -c data/gencode.v27.primary_assembly.annotation.gtf.gz \
> $cds_file

######################### Calculate consensus TMB ##############################
echo "Calculating TMB"
Rscript analyses/snv-callers/scripts/03-calculate_tmb.R \
--db_file $dbfile \
--output analyses/snv-callers/results/consensus \
Expand All @@ -79,5 +85,6 @@ gzip $consensus_file
############################# Comparison Plots #################################
if [ "$run_plots_nb" -gt "0" ]
then
echo "Making comparison plots"
Rscript -e "rmarkdown::render('analyses/snv-callers/compare_snv_callers_plots.Rmd', clean = TRUE)"
fi
41 changes: 6 additions & 35 deletions analyses/snv-callers/scripts/01-setup_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,19 +101,6 @@
('Matched_Norm_Sample_Barcode', 'TEXT'),
('Match_Norm_Seq_Allele1', 'TEXT'),
('Match_Norm_Seq_Allele2', 'TEXT'),
('Tumor_Validation_Allele1', 'TEXT'),
('Tumor_Validation_Allele2', 'TEXT'),
('Match_Norm_Validation_Allele1', 'TEXT'),
('Match_Norm_Validation_Allele2', 'TEXT'),
('Verification_Status', 'TEXT'),
('Validation_Status', 'TEXT'),
('Mutation_Status', 'TEXT'),
('Sequencing_Phase', 'TEXT'),
('Sequence_Source', 'TEXT'),
('Validation_Method', 'TEXT'),
('Score', 'TEXT'),
('BAM_File', 'TEXT'),
('Sequencer', 'TEXT'),
('Tumor_Sample_UUID', 'TEXT'),
('Matched_Norm_Sample_UUID', 'TEXT'),
('HGVSc', 'TEXT'),
Expand Down Expand Up @@ -161,7 +148,6 @@
('AF', 'TEXT'),
('AFR_AF', 'TEXT'),
('AMR_AF', 'TEXT'),
('ASN_AF', 'TEXT'),
('EAS_AF', 'TEXT'),
('EUR_AF', 'TEXT'),
('SAS_AF', 'TEXT'),
Expand All @@ -180,31 +166,11 @@
('TSL', 'TEXT'),
('HGVS_OFFSET', 'TEXT'),
('PHENO', 'TEXT'),
('MINIMISED', 'TEXT'),
('ExAC_AF', 'TEXT'),
('ExAC_AF_AFR', 'TEXT'),
('ExAC_AF_AMR', 'TEXT'),
('ExAC_AF_EAS', 'TEXT'),
('ExAC_AF_FIN', 'TEXT'),
('ExAC_AF_NFE', 'TEXT'),
('ExAC_AF_OTH', 'TEXT'),
('ExAC_AF_SAS', 'TEXT'),
('GENE_PHENO', 'TEXT'),
('FILTER', 'TEXT'),
('flanking_bps', 'TEXT'),
('vcf_id', 'TEXT'),
('vcf_qual', 'REAL'),
('ExAC_AF_Adj', 'TEXT'),
('ExAC_AC_AN_Adj', 'TEXT'),
('ExAC_AC_AN', 'TEXT'),
('ExAC_AC_AN_AFR', 'TEXT'),
('ExAC_AC_AN_AMR', 'TEXT'),
('ExAC_AC_AN_EAS', 'TEXT'),
('ExAC_AC_AN_FIN', 'TEXT'),
('ExAC_AC_AN_NFE', 'TEXT'),
('ExAC_AC_AN_OTH', 'TEXT'),
('ExAC_AC_AN_SAS', 'TEXT'),
('ExAC_FILTER', 'TEXT'),
('gnomAD_AF', 'TEXT'),
('gnomAD_AFR_AF', 'TEXT'),
('gnomAD_AMR_AF', 'TEXT'),
Expand All @@ -215,9 +181,12 @@
('gnomAD_OTH_AF', 'TEXT'),
('gnomAD_SAS_AF', 'TEXT'),
('vcf_pos', 'INTEGER'),
('HotSpotAllele', 'INTEGER'),
('VAF', 'REAL')
]

common_cols = [col for col, type in maf_types]

needed_cols = [
'Hugo_Symbol',
'Entrez_Gene_Id',
Expand Down Expand Up @@ -304,7 +273,9 @@
# process the chunk
chunk['VAF'] = (chunk['t_alt_count'] /
(chunk['t_ref_count'] + chunk['t_alt_count']))
if table_name not in ('strelka', 'lancet', 'consensus'):
if table_name in ('strelka', 'lancet', 'consensus'):
chunk = chunk[common_cols]
else:
chunk = chunk[needed_cols]
chunk.to_sql(table_name, con, if_exists='append')
# create indexes
Expand Down
4 changes: 2 additions & 2 deletions analyses/snv-callers/scripts/03-calculate_tmb.R
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ strelka_mutect_maf_df <- strelka_mutect_maf_df %>%
),
by = "Tumor_Sample_Barcode"
) %>%
# Remove samples if they are labeled "Panel"
dplyr::filter(experimental_strategy != "Panel")
# Remove samples if they are not WGS or WXS
dplyr::filter(experimental_strategy %in% c("WGS", "WXS"))

############################# Set Up BED Files #################################
# Make a data.frame of the unique BED file paths and their names
Expand Down