diff --git a/analyses/oncoprint-landscape/01-plot-oncoprint.R b/analyses/oncoprint-landscape/01-plot-oncoprint.R index 81004ab102..6c8341a85d 100644 --- a/analyses/oncoprint-landscape/01-plot-oncoprint.R +++ b/analyses/oncoprint-landscape/01-plot-oncoprint.R @@ -89,6 +89,19 @@ option_list <- list( type = "character", default = NULL, help = "oncoprint output png file name" + ), + optparse::make_option( + c("-l", "--low_segmean_cutoff"), + type = "double", + default = 0.5, # Determined using CNVkit documentation (https://cnvkit.readthedocs.io/en/stable/calling.html) + help = "low segmean cutoff to determine amplification and deletion" + ), + optparse::make_option( + c("-s", "--high_segmean_cutoff"), + type = "double", + default = 1, # Determined using CNVkit documentation (https://cnvkit.readthedocs.io/en/stable/calling.html) + help = "high segmean cutoff to distinguish hemizygous deletion from + homozygous deletion" ) ) @@ -116,11 +129,57 @@ metadata <- metadata %>% maf_df <- data.table::fread(maf, stringsAsFactors = FALSE) # Read in cnv file -if (!is.null(opt$cnv_file)) { - cnv_file <- data.table::fread(opt$cnv_file, stringsAsFactors = FALSE) - # TODO: Filter and set up `cnv_file` to be in the column format - + if (!is.null(opt$cnv_file)) { + cnv_file <- + data.table::fread(opt$cnv_file, + data.table = FALSE, + stringsAsFactors = FALSE) + # TODO: Adapt `cnv_file` once the results of the consensus calls are obtained, + # to be in the column format - # "Hugo_Symbol, Tumor_Sample_Barcode, Variant_Classification" as required by # the `read.maf function` + + # Create a dataframe with `Tumor_Sample_Barcode` and gene information from maf + # dataframe + select_maf_df <- maf_df %>% + dplyr::select(Tumor_Sample_Barcode, Hugo_Symbol) + + # Add gene information to the cnv dataframe + cnv_df <- cnv_file %>% + dplyr::inner_join(select_maf_df, by = c("ID" = "Tumor_Sample_Barcode")) %>% + dplyr::distinct() + + # Define cutoffs for `Variant_Classification` column + low_segmean_cutoff <- opt$low_segmean_cutoff + high_segmean_cutoff <- opt$high_segmean_cutoff + + # Create `Variant_Classification` column + cnv_df <- cnv_df %>% + dplyr::mutate( + Variant_Classification = dplyr::case_when( + seg.mean < -low_segmean_cutoff & + seg.mean > -high_segmean_cutoff ~ "Hem_Deletion", + seg.mean < -high_segmean_cutoff ~ "Hom_Deletion", + seg.mean > low_segmean_cutoff ~ "Amplification" + ) + ) + + # Make the `Variant_Classification` column a factor vector + cnv_df$Variant_Classification <- as.factor(cnv_df$Variant_Classification) + + # Select the columns specified as input format for `read.maf` function + cnv_df <- cnv_df %>% + dplyr::select(Hugo_Symbol, ID, Variant_Classification) + + # Rename columns + colnames(cnv_df) <- + c("Hugo_Symbol", + "Tumor_Sample_Barcode", + "Variant_Classification") + + # Remove NA + cnv_file <- cnv_df %>% + dplyr::filter(!is.na(Variant_Classification)) } # Read in fusion file @@ -133,22 +192,22 @@ if (!is.null(opt$fusion_file)) { #### Incorporate Fusion Data ------------------------------------------------- # TODO: Once the consensus calls of the fusion data are obtained, this section # will need to be adapted to the format of the fusion input file. For example, - # the way we separate the genes out of `FusionName` may need to be adapted. - + # the way we separate the genes out of `FusionName` may need to be adapted. + # Separate fusion gene partners and add variant classification and center fus_sep <- fusion_file %>% # Separate the 5' and 3' genes - tidyr::separate(FusionName, c("Gene1", "Gene2"), sep = "--") %>% + tidyr::separate(FusionName, c("Gene1", "Gene2"), sep = "--") %>% dplyr::select(Sample, Gene1, Gene2) - - reformat_fusion <- fus_sep %>% - # Here we want to tally how many times the 5' gene shows up as a fusion hit + + reformat_fusion <- fus_sep %>% + # Here we want to tally how many times the 5' gene shows up as a fusion hit # in a sample dplyr::group_by(Sample, Gene1) %>% dplyr::tally() %>% - # If the sample-5' gene pair shows up more than once, call it a multi hit + # If the sample-5' gene pair shows up more than once, call it a multi hit # fusion - dplyr::mutate(Variant_Classification = + dplyr::mutate(Variant_Classification = dplyr::if_else(n == 1, "Fusion", "Multi_Hit_Fusion"), # Required column for joining with MAF Variant_Type = "OTHER") %>% @@ -172,13 +231,19 @@ maf_object <- "Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", + "Translation_Start_Site", "Nonsense_Mutation", "Nonstop_Mutation", "In_Frame_Del", "In_Frame_Ins", "Missense_Mutation", + "Stop_Codon_Ins", + "Start_Codon_Del", "Fusion", "Multi_Hit", + "Hem_Deletion", + "Hom_Deletion", + "Amplification", "Multi_Hit_Fusion" ) ) @@ -240,7 +305,7 @@ oncoplot( logColBar = TRUE, sortByAnnotation = TRUE, showTumorSampleBarcodes = TRUE, - removeNonMutated = FALSE, + removeNonMutated = TRUE, annotationFontSize = 0.7, SampleNamefontSize = 0.5, fontSize = 0.7, diff --git a/analyses/oncoprint-landscape/plots/maf_oncoprint.png b/analyses/oncoprint-landscape/plots/maf_oncoprint.png index 2fe23e4a84..9eff02d179 100644 Binary files a/analyses/oncoprint-landscape/plots/maf_oncoprint.png and b/analyses/oncoprint-landscape/plots/maf_oncoprint.png differ diff --git a/analyses/oncoprint-landscape/run-oncoprint.sh b/analyses/oncoprint-landscape/run-oncoprint.sh index ca62345e7c..47a277f1be 100644 --- a/analyses/oncoprint-landscape/run-oncoprint.sh +++ b/analyses/oncoprint-landscape/run-oncoprint.sh @@ -15,7 +15,9 @@ wget --no-clobber --directory-prefix=driver-lists -O driver-lists/brain-goi-list wget --no-clobber --directory-prefix=driver-lists -O driver-lists/brain-goi-list-short.txt https://github.com/marislab/create-pptc-pdx-oncoprints/raw/2c9ed2a2331bcef3003d6aa25a130485d76a3535/data/brain-goi-list.txt Rscript --vanilla 01-plot-oncoprint.R \ - --maf_file ../../data/pbta-snv-mutect2.vep.maf.gz \ + --maf_file ../../data/pbta-snv-lancet.vep.maf.gz \ --fusion_file ../../scratch/arriba.tsv \ + --cnv_file ../../data/pbta-cnv-cnvkit.seg.gz \ --goi_list driver-lists/brain-goi-list-long.txt \ - --png_name maf_oncoprint.png + --png_name maf_oncoprint.png \ + --low_segmean_cutoff 0.2 diff --git a/analyses/oncoprint-landscape/util/oncoplot-palette.R b/analyses/oncoprint-landscape/util/oncoplot-palette.R index c644324f97..c0a91411e0 100644 --- a/analyses/oncoprint-landscape/util/oncoplot-palette.R +++ b/analyses/oncoprint-landscape/util/oncoplot-palette.R @@ -11,7 +11,7 @@ # Define a color vector for plots color_palette <- c("Missense_Mutation" = "#35978f", - "Nonsense_Mutation" = "#000000", + "Nonsense_Mutation" = "#002F6C", "Frame_Shift_Del" = "#56B4E9", "Frame_Shift_Ins" = "#FFBBFF", "Splice_Site" = "#F0E442",