AlexsLemonade · cbethell · Oct 29, 2019 · Oct 29, 2019 · Oct 29, 2019 · jaclyn-taroni
diff --git a/analyses/oncoprint-landscape/01-plot-oncoprint.R b/analyses/oncoprint-landscape/01-plot-oncoprint.R
@@ -89,6 +89,19 @@ option_list <- list(
     type = "character",
     default = NULL,
     help = "oncoprint output png file name"
+  ),
+  optparse::make_option(
+    c("-l", "--low_segmean_cutoff"),
+    type = "double",
+    default = 0.5, # Determined using CNVkit documentation (https://cnvkit.readthedocs.io/en/stable/calling.html)
+    help = "low segmean cutoff to determine amplification and deletion"
+  ),
+  optparse::make_option(
+    c("-s", "--high_segmean_cutoff"),
+    type = "double",
+    default = 1, # Determined using CNVkit documentation (https://cnvkit.readthedocs.io/en/stable/calling.html)
+    help = "high segmean cutoff to distinguish hemizygous deletion from
+            homozygous deletion"
   )
 )
 
@@ -116,11 +129,57 @@ metadata <- metadata %>%
 maf_df <- data.table::fread(maf, stringsAsFactors = FALSE)
 
 # Read in cnv file
-if (!is.null(opt$cnv_file)) {
-  cnv_file <- data.table::fread(opt$cnv_file, stringsAsFactors = FALSE)
-  # TODO: Filter and set up `cnv_file` to be in the column format -
+ if (!is.null(opt$cnv_file)) {
+  cnv_file <-
+    data.table::fread(opt$cnv_file,
+                      data.table = FALSE,
+                      stringsAsFactors = FALSE)
+  # TODO: Adapt `cnv_file` once the results of the consensus calls are obtained,
+  # to be in the column format -
   # "Hugo_Symbol, Tumor_Sample_Barcode, Variant_Classification" as required by
   # the `read.maf function`
+
+  # Create a dataframe with `Tumor_Sample_Barcode` and gene information from maf
+  # dataframe
+  select_maf_df <- maf_df %>%
+    dplyr::select(Tumor_Sample_Barcode, Hugo_Symbol)
+
+  # Add gene information to the cnv dataframe
+  cnv_df <- cnv_file %>%
+    dplyr::inner_join(select_maf_df, by = c("ID" = "Tumor_Sample_Barcode")) %>%
+    dplyr::distinct()
+
+  # Define cutoffs for `Variant_Classification` column
+  low_segmean_cutoff <- opt$low_segmean_cutoff
+  high_segmean_cutoff <- opt$high_segmean_cutoff
+
+  # Create `Variant_Classification` column
+  cnv_df <- cnv_df %>%
+    dplyr::mutate(
+      Variant_Classification = dplyr::case_when(
+        seg.mean < -low_segmean_cutoff &
+          seg.mean > -high_segmean_cutoff ~ "Hem_Deletion",
+        seg.mean < -high_segmean_cutoff ~ "Hom_Deletion",
+        seg.mean > low_segmean_cutoff ~ "Amplification"
+      )
+    )
+
+  # Make the `Variant_Classification` column a factor vector
+  cnv_df$Variant_Classification <- as.factor(cnv_df$Variant_Classification)
+
+  # Select the columns specified as input format for `read.maf` function
+  cnv_df <- cnv_df %>%
+    dplyr::select(Hugo_Symbol, ID, Variant_Classification)
+
+  # Rename columns
+  colnames(cnv_df) <-
+    c("Hugo_Symbol",
+      "Tumor_Sample_Barcode",
+      "Variant_Classification")
+
+  # Remove NA
+  cnv_file <- cnv_df %>%
+    dplyr::filter(!is.na(Variant_Classification))
 }
 
 # Read in fusion file
@@ -133,22 +192,22 @@ if (!is.null(opt$fusion_file)) {
   #### Incorporate Fusion Data -------------------------------------------------
   # TODO: Once the consensus calls of the fusion data are obtained, this section
   # will need to be adapted to the format of the fusion input file. For example,
-  # the way we separate the genes out of `FusionName` may need to be adapted. 
-  
+  # the way we separate the genes out of `FusionName` may need to be adapted.
+
   # Separate fusion gene partners and add variant classification and center
   fus_sep <- fusion_file %>%
     # Separate the 5' and 3' genes
-    tidyr::separate(FusionName, c("Gene1", "Gene2"), sep = "--") %>%  
+    tidyr::separate(FusionName, c("Gene1", "Gene2"), sep = "--") %>%
     dplyr::select(Sample, Gene1, Gene2)
-  
-  reformat_fusion <- fus_sep %>% 
-    # Here we want to tally how many times the 5' gene shows up as a fusion hit 
+
+  reformat_fusion <- fus_sep %>%
+    # Here we want to tally how many times the 5' gene shows up as a fusion hit
     # in a sample
     dplyr::group_by(Sample, Gene1) %>%
     dplyr::tally() %>%
-    # If the sample-5' gene pair shows up more than once, call it a multi hit 
+    # If the sample-5' gene pair shows up more than once, call it a multi hit
     # fusion
-    dplyr::mutate(Variant_Classification = 
+    dplyr::mutate(Variant_Classification =
                     dplyr::if_else(n == 1, "Fusion", "Multi_Hit_Fusion"),
                   # Required column for joining with MAF
                   Variant_Type = "OTHER") %>%
@@ -172,13 +231,19 @@ maf_object <-
       "Frame_Shift_Del",
       "Frame_Shift_Ins",
       "Splice_Site",
+      "Translation_Start_Site",
       "Nonsense_Mutation",
       "Nonstop_Mutation",
       "In_Frame_Del",
       "In_Frame_Ins",
       "Missense_Mutation",
+      "Stop_Codon_Ins",
+      "Start_Codon_Del",
       "Fusion",
       "Multi_Hit",
+      "Hem_Deletion",
+      "Hom_Deletion",
+      "Amplification",
       "Multi_Hit_Fusion"
     )
   )
@@ -240,7 +305,7 @@ oncoplot(
   logColBar = TRUE,
   sortByAnnotation = TRUE,
   showTumorSampleBarcodes = TRUE,
-  removeNonMutated = FALSE,
+  removeNonMutated = TRUE,
   annotationFontSize = 0.7,
   SampleNamefontSize = 0.5,
   fontSize = 0.7,

diff --git a/analyses/oncoprint-landscape/plots/maf_oncoprint.png b/analyses/oncoprint-landscape/plots/maf_oncoprint.png
diff --git a/analyses/oncoprint-landscape/run-oncoprint.sh b/analyses/oncoprint-landscape/run-oncoprint.sh
@@ -15,7 +15,9 @@ wget --no-clobber --directory-prefix=driver-lists -O driver-lists/brain-goi-list
 wget --no-clobber --directory-prefix=driver-lists -O driver-lists/brain-goi-list-short.txt https://github.com/marislab/create-pptc-pdx-oncoprints/raw/2c9ed2a2331bcef3003d6aa25a130485d76a3535/data/brain-goi-list.txt
 
 Rscript --vanilla 01-plot-oncoprint.R \
-	--maf_file ../../data/pbta-snv-mutect2.vep.maf.gz \
+	--maf_file ../../data/pbta-snv-lancet.vep.maf.gz \
 	--fusion_file ../../scratch/arriba.tsv \
+	--cnv_file ../../data/pbta-cnv-cnvkit.seg.gz \
 	--goi_list driver-lists/brain-goi-list-long.txt \
-	--png_name maf_oncoprint.png
+	--png_name maf_oncoprint.png \
+	--low_segmean_cutoff 0.2
diff --git a/analyses/oncoprint-landscape/util/oncoplot-palette.R b/analyses/oncoprint-landscape/util/oncoplot-palette.R
@@ -11,7 +11,7 @@
 
 # Define a color vector for plots 
 color_palette <- c("Missense_Mutation" = "#35978f", 
-                   "Nonsense_Mutation" = "#000000",
+                   "Nonsense_Mutation" = "#002F6C",
                    "Frame_Shift_Del" = "#56B4E9", 
                    "Frame_Shift_Ins" = "#FFBBFF", 
                    "Splice_Site" = "#F0E442",