diff --git a/.circleci/config.yml b/.circleci/config.yml index 60b54a2f3a..88c0e50ab5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -163,10 +163,6 @@ jobs: name: GISTIC Plots command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/gistic_plot.Rmd', clean = TRUE)" - - run: - name: CN Status Heatmap - command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/cn_status_heatmap.Rmd', clean = TRUE)" - - run: name: Gene set enrichment analysis to generate GSVA scores command: OPENPBTA_TESTING=1 ./scripts/run_in_ci.sh bash "analyses/gene-set-enrichment-analysis/run-gsea.sh" diff --git a/analyses/cnv-chrom-plot/cn_status_heatmap.Rmd b/analyses/cnv-chrom-plot/cn_status_heatmap.Rmd deleted file mode 100644 index 967c2e6157..0000000000 --- a/analyses/cnv-chrom-plot/cn_status_heatmap.Rmd +++ /dev/null @@ -1,453 +0,0 @@ ---- -title: "CN Status Heatmap" -output: - html_notebook: - toc: true - toc_float: true -author: Candace Savonen for ALSF - CCDL -date: 2020 ---- - -## Purpose: - -Create a summary heatmap of copy number status from the consensus CNV call data. -This is done by binning the genome and calculating the segment's coverage of the -CNV consensus segments. -A bin is declared a particular copy number status if that status's base pair -coverage fraction is above a certain threshold (`frac_threshold`) and the callable -portion of the bin is higher than the threshold, `frac_uncallable`. - -### Usage - -This notebook can be run via the command line from the top directory of the -repository as follows: - -``` -Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/cn_status_heatmap.Rmd', - clean = TRUE)" -``` - -### Cutoffs: - -```{r} -# The max length of a segment to use the data. -# segments that are too long may dominate the heatmap and/or be indicators of -# broader structural changes -length_max <- 1e7 - -# Set minimum percentage of a bin that should be callable to report data. -frac_uncallable <- 0.75 - -# Absolute fraction needed for a bin to be called a particular status -frac_threshold <- 0.75 -``` - -### Set Up - -```{r} -# Magrittr pipe -`%>%` <- dplyr::`%>%` -``` - -### Directories and Files - -```{r} -# Path to input directory -input_dir <- file.path("..", "..", "data") -figure_dir <- file.path("..", "..", "figures") -scratch_dir <- file.path("..", "..", "scratch") - -# Path to output directory -plots_dir <- "plots" - -# Create the plots_dir if it does not exist -if (!dir.exists(plots_dir)) { - dir.create(plots_dir, recursive = TRUE) -} -``` - -Read in custom functions. - -```{r} -source(file.path("util", "bin-coverage.R")) -``` - -Import color palettes. - -```{r} -# Import standard color palettes for project -histology_col_palette <- readr::read_tsv( - file.path(figure_dir, "palettes", "histology_color_palette.tsv") -) %>% - # We'll use deframe so we can use it as a recoding list - tibble::deframe() -``` - -Read in the divergent color palette and set it up with three colors. -In this instance, we only need three values for `gain`, `neutral`, and `loss`. - -```{r} -divergent_col_palette <- readr::read_tsv( - file.path(figure_dir, "palettes", "divergent_color_palette.tsv") -) %>% - # Only keep only these three colors - dplyr::filter( - color_names %in% c("divergent_low_4", "divergent_neutral", "divergent_high_4") - ) %>% - dplyr::pull("hex_codes") -``` - -### Read in data - -```{r} -# Read in metadata -metadata <- - readr::read_tsv(file.path(input_dir, "pbta-histologies.tsv")) %>% - # Easier to deal with NA short histologies if they are labeled something different - dplyr::mutate(short_histology = as.character(tidyr::replace_na(short_histology, "none"))) %>% - # Tack on the sample color using the short_histology column and a recode - dplyr::mutate(sample_color = dplyr::recode( - short_histology, - !!!histology_col_palette - )) -``` - -### Set up consensus copy number data - -```{r} -# Read in the segment copy number data -seg_data <- data.table::fread( - file.path( - input_dir, - "pbta-cnv-consensus.seg.gz" - ), - data.table = FALSE -) -``` - -Set up the status for each consensus segment. - -```{r} -seg_data <- seg_data %>% - # Join the histology column to this data - dplyr::inner_join( - dplyr::select( - metadata, - "Kids_First_Biospecimen_ID", - "short_histology", - "tumor_ploidy" - ), - by = c("ID" = "Kids_First_Biospecimen_ID") - ) %>% - # Reformat the chromosome variable to drop the "chr" - dplyr::mutate(chrom = factor(gsub("chr", "", chrom), - levels = c(1:22, "X", "Y") - )) %>% - # Recode the copy number status based on ploidy - dplyr::mutate(status = dplyr::case_when( - # when the copy number is less than inferred ploidy, mark this as a loss - copy.num < tumor_ploidy ~ "loss", - # if copy number is higher than ploidy, mark as a gain - copy.num > tumor_ploidy ~ "gain", - copy.num == tumor_ploidy ~ "neutral" - )) %>% - # Remove sex chromosomes - dplyr::filter( - !(chrom %in% c("X", "Y", "M")), - !is.na(status) - ) -``` - -Set up seg data as GenomicRanges. - -```{r} -seg_ranges <- GenomicRanges::GRanges( - seqnames = seg_data$chrom, - ranges = IRanges::IRanges( - start = seg_data$loc.start, - end = seg_data$loc.end - ), - status = seg_data$status, - histology = seg_data$short_histology, - biospecimen = seg_data$ID -) -``` - -Explore the distribution of segment lengths. - -```{r} -ggplot2::qplot(seg_ranges@ranges@width, geom = "density") + - ggplot2::theme_classic() + - ggplot2::ylab("density") + - ggplot2::xlab("Segment length in bp") + - # Let's put a vertical line where we will make a filter cutoff - ggplot2::geom_vline(xintercept = length_max, color = "red") -``` - -Filter out segments that are longer than our cutoff. - -```{r} -filtered_seg_ranges <- seg_ranges[which(seg_ranges@ranges@width < length_max)] -``` - -### Set up chromosomal sizes for making bins. - -(This has nothing to do with Strelka, but it just so happens this is a file -with the sizes of the chromosomes in this genome build, hg38). - -```{r} -chr_sizes <- readr::read_tsv(file.path(input_dir, "WGS.hg38.strelka2.unpadded.bed"), - col_names = c("chrom", "start", "end") -) %>% - # Reformat the chromosome variable to drop the "chr" - dplyr::mutate(chrom = factor(gsub("chr", "", chrom), - levels = c(1:22, "X", "Y", "M") - )) %>% - # Remove sex chromosomes - dplyr::filter(!(chrom %in% c("X", "Y", "M"))) - - -# Make chromosome size named vector for Heatmap annotation -chr_sizes_vector <- chr_sizes$end -names(chr_sizes_vector) <- chr_sizes$chrom -``` - -### Set up uncallable regions data - -Regions that were not able to be accurately called will need to be color coded gray later. -Here, we are setting up the uncallable regions like we did with the callable regions. - -```{r} -uncallable_bed <- readr::read_tsv( - file.path( - "..", - "copy_number_consensus_call", - "ref", - "cnv_excluded_regions.bed" - ), - col_names = c("chrom", "start", "end") -) %>% - # Reformat the chromosome variable to drop the "chr" - dplyr::mutate(chrom = factor(gsub("chr", "", chrom), - levels = c(1:22, "X", "Y") - )) %>% - dplyr::filter( - # Drop CNVs that don't have chromosome labels - !is.na(chrom), - # Drop sex chromosomes - !(chrom %in% c("X", "Y", "M")) - ) -``` - -Set up uncallable regions as GenomicRanges. - -```{r} -uncallable_ranges <- GenomicRanges::GRanges( - seqnames = uncallable_bed$chrom, - ranges = IRanges::IRanges( - start = uncallable_bed$start, - end = uncallable_bed$end - ) -) -``` - -## Call bin CN statuses for each sample - -Set up binned genome ranges. - -```{r} -# Set up bins of ~1Mb size -bins <- GenomicRanges::tileGenome( - chr_sizes_vector, - tilewidth = 1e6 -) -# Uncompress these ranges -bins <- unlist(bins) -``` - -Run the bin status calling on each sample. - -```{r echo=FALSE} -# Get a vector of the biospecimen IDs -sample_ids <- unique(seg_data$ID) - -# Run call_bin_status for each biospecimen's segments. -bin_calls_list <- lapply(sample_ids, - call_bin_status, - bin_ranges = bins, - seg_ranges = filtered_seg_ranges, - uncallable_ranges = uncallable_ranges, - frac_threshold_val = frac_threshold, - frac_uncallable_val = frac_uncallable -) - -# Bring along sample IDs -names(bin_calls_list) <- sample_ids - -# Format into data.frame -bin_calls_df <- dplyr::bind_rows(bin_calls_list, - .id = "biospecimen_id" -) -``` - -## Set up heatmap annotation objects - -Make color key. - -```{r} -color_key <- structure(c(divergent_col_palette, "#9932CC", "#cccccc"), - names = c("loss", "neutral", "gain", "unstable", "uncallable") -) -``` - -### Make column annotation object - -Extract chromosome labels and make an alternating color key for them. -This annotation object strategy was originally from [chromosomal-instability](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/b5a33838d1e9bd7e7913a89201ec26125c16c94c/analyses/chromosomal-instability/02a-plot-chr-instability-heatmaps.Rmd#L73). - -```{r} -# Set up chromosome labels from bins as a factor vector -chrs <- paste0("chr", S4Vectors::decode(bins@seqnames)) -chrs <- factor(chrs, levels = paste0("chr", 1:22)) - -# Make a key for assigning alternating colors to the chromosomes -chr_colors <- rep(c("gray", "lightblue"), - length.out = length(unique(chrs)) -) -names(chr_colors) <- unique(chrs) - -# Get coordinate start positions -chr_pos <- match(unique(chrs), chrs) - -# Get mid points of chromosome labels -mid_points <- ceiling((c(chr_pos[2:length(chr_pos)], length(chrs)) + chr_pos[1:length(chr_pos)]) / 2) -``` - -Make chromosomal labeling `HeatmapAnnotation` object. - -```{r} -# Make text labels for chromosome text -chr_text <- ComplexHeatmap::anno_mark( - at = mid_points, - labels = levels(chrs), - which = "column", - side = "bottom", - labels_rot = 45, - labels_gp = grid::gpar(cex = 0.65) -) - -# Create the Heatmap annotation object -chr_annot <- ComplexHeatmap::HeatmapAnnotation( - df = data.frame(chrs), - col = list(chrs = chr_colors), - name = "", - show_legend = FALSE, - show_annotation_name = FALSE, - mark = chr_text # Put the text in -) -``` - -### Make row annotation object - -Make histology labeling `HeatmapAnnotation` object. -This annotation object strategy was originally from [chromosomal-instability](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/b5a33838d1e9bd7e7913a89201ec26125c16c94c/analyses/chromosomal-instability/02a-plot-chr-instability-heatmaps.Rmd#L73). - -```{r} -# Get the histologies for the samples in this set and order them by histology -histologies <- - data.frame(Kids_First_Biospecimen_ID = bin_calls_df$biospecimen_id) %>% - dplyr::inner_join(metadata %>% - dplyr::select(Kids_First_Biospecimen_ID, short_histology, sample_color)) %>% - dplyr::mutate(short_histology = factor(short_histology)) %>% - dplyr::arrange(short_histology) %>% - tibble::column_to_rownames("Kids_First_Biospecimen_ID") - -# Make color key specific to these samples -histologies_color_key_filtered <- unique(histologies$sample_color) -names(histologies_color_key_filtered) <- unique(histologies$short_histology) - -# Drop this column so ComplexHeatmap isn't tempted to plot it -histologies <- dplyr::select(histologies, -sample_color) - -# Get coordinate start positions -hist_pos <- match(names(histologies_color_key_filtered), histologies$short_histology) - -# Get mid points of chromosome labels -mid_points <- ceiling((c(hist_pos[2:length(hist_pos)], length(histologies$short_histology)) + hist_pos[1:length(hist_pos)]) / 2) -``` - -```{r} -# Make text labels for chromosome text -hist_text <- ComplexHeatmap::anno_mark( - at = mid_points, - labels = levels(histologies$short_histology), - which = "row", - side = "right", - labels_gp = grid::gpar(cex = 0.65), - link_width = grid::unit(15, "mm") -) - -# Create the Heatmap annotation object -hist_annot <- ComplexHeatmap::HeatmapAnnotation( - df = data.frame(histologies), - col = list(short_histology = histologies_color_key_filtered), - which = "row", - show_annotation_name = FALSE, - show_legend = FALSE, - mark = hist_text # Put the text in -) -``` - -Format `bin_calls_df` as a matrix with rownames for `ComplexHeatmap` to use. - -```{r} -bin_calls_mat <- bin_calls_df %>% - tibble::column_to_rownames("biospecimen_id") %>% - as.matrix() - -# Ensure that this matrix is in the same order as the annotation -bin_calls_mat <- bin_calls_mat[rownames(histologies), ] - -# Double check its in thte same order -all.equal(rownames(bin_calls_mat), rownames(histologies)) -``` - -## Assemble CN status heatmap - -```{r} -# Plot on a heatmap -heatmap <- ComplexHeatmap::Heatmap( - bin_calls_mat, - name = "CN status", - col = color_key, - cluster_columns = FALSE, - cluster_rows = FALSE, - show_column_names = FALSE, - show_row_names = FALSE, - bottom_annotation = chr_annot, - right_annotation = hist_annot, - heatmap_legend_param = list(nrow = 1), - raster_quality = 8 -) -``` - -Print out heatmap. - -```{r} -ComplexHeatmap::draw(heatmap, heatmap_legend_side = "bottom") -``` - -Save to PDF. - -```{r} -# Save plot as PDF -pdf(file.path(plots_dir, "cn_status_heatmap.pdf")) -ComplexHeatmap::draw(heatmap, heatmap_legend_side = "bottom") -dev.off() -``` - -# Session Info - -```{r} -sessionInfo() -``` diff --git a/analyses/cnv-chrom-plot/cn_status_heatmap.nb.html b/analyses/cnv-chrom-plot/cn_status_heatmap.nb.html deleted file mode 100644 index 4bb1ff85be..0000000000 --- a/analyses/cnv-chrom-plot/cn_status_heatmap.nb.html +++ /dev/null @@ -1,3581 +0,0 @@ - - - - -
- - - - - - - - - - -Create a summary heatmap of copy number status from the consensus CNV call data. This is done by binning the genome and calculating the segment’s coverage of the CNV consensus segments. A bin is declared a particular copy number status if that status’s base pair coverage fraction is above a certain threshold (frac_threshold
) and the callable portion of the bin is higher than the threshold, frac_uncallable
.
This notebook can be run via the command line from the top directory of the repository as follows:
-Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/cn_status_heatmap.Rmd',
- clean = TRUE)"
-# The max length of a segment to use the data.
-# segments that are too long may dominate the heatmap and/or be indicators of
-# broader structural changes
-length_max <- 1e7
-
-# Set minimum percentage of a bin that should be callable to report data.
-frac_uncallable <- 0.75
-
-# Absolute fraction needed for a bin to be called a particular status
-frac_threshold <- 0.75
-
-
-
-# Magrittr pipe
-`%>%` <- dplyr::`%>%`
-
-
-
-# Path to input directory
-input_dir <- file.path("..", "..", "data")
-figure_dir <- file.path("..", "..", "figures")
-scratch_dir <- file.path("..", "..", "scratch")
-
-# Path to output directory
-plots_dir <- "plots"
-
-# Create the plots_dir if it does not exist
-if (!dir.exists(plots_dir)) {
- dir.create(plots_dir, recursive = TRUE)
-}
-
-
-
-Read in custom functions.
- - - -source(file.path("util", "bin-coverage.R"))
-
-
-
-Import color palettes.
- - - -# Import standard color palettes for project
-histology_col_palette <- readr::read_tsv(
- file.path(figure_dir, "palettes", "histology_color_palette.tsv")
-) %>%
- # We'll use deframe so we can use it as a recoding list
- tibble::deframe()
-
-
-Parsed with column specification:
-cols(
- color_names = [31mcol_character()[39m,
- hex_codes = [31mcol_character()[39m
-)
-
-
-
-Read in the divergent color palette and set it up with three colors. In this instance, we only need three values for gain
, neutral
, and loss
.
divergent_col_palette <- readr::read_tsv(
- file.path(figure_dir, "palettes", "divergent_color_palette.tsv")
-) %>%
- # Only keep only these three colors
- dplyr::filter(
- color_names %in% c("divergent_low_4", "divergent_neutral", "divergent_high_4")
- ) %>%
- dplyr::pull("hex_codes")
-
-
-Parsed with column specification:
-cols(
- color_names = [31mcol_character()[39m,
- hex_codes = [31mcol_character()[39m
-)
-
-
-
-# Read in metadata
-metadata <-
- readr::read_tsv(file.path(input_dir, "pbta-histologies.tsv")) %>%
- # Easier to deal with NA short histologies if they are labeled something different
- dplyr::mutate(short_histology = as.character(tidyr::replace_na(short_histology, "none"))) %>%
- # Tack on the sample color using the short_histology column and a recode
- dplyr::mutate(sample_color = dplyr::recode(
- short_histology,
- !!!histology_col_palette
- ))
-
-
-Parsed with column specification:
-cols(
- .default = col_character(),
- OS_days = [32mcol_double()[39m,
- age_last_update_days = [32mcol_double()[39m,
- normal_fraction = [32mcol_double()[39m,
- tumor_fraction = [32mcol_double()[39m,
- tumor_ploidy = [32mcol_double()[39m,
- molecular_subtype = [33mcol_logical()[39m
-)
-See spec(...) for full column specifications.
-493 parsing failures.
- row col expected actual file
-2334 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
-2335 molecular_subtype 1/0/T/F/TRUE/FALSE Group4 '../../data/pbta-histologies.tsv'
-2336 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
-2337 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
-2338 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
-.... ................. .................. ...... .................................
-See problems(...) for more details.
-
-
-
-# Read in the segment copy number data
-seg_data <- data.table::fread(
- file.path(
- input_dir,
- "pbta-cnv-consensus.seg.gz"
- ),
- data.table = FALSE
-)
-
-
-
-Set up the status for each consensus segment.
- - - -seg_data <- seg_data %>%
- # Join the histology column to this data
- dplyr::inner_join(
- dplyr::select(
- metadata,
- "Kids_First_Biospecimen_ID",
- "short_histology",
- "tumor_ploidy"
- ),
- by = c("ID" = "Kids_First_Biospecimen_ID")
- ) %>%
- # Reformat the chromosome variable to drop the "chr"
- dplyr::mutate(chrom = factor(gsub("chr", "", chrom),
- levels = c(1:22, "X", "Y")
- )) %>%
- # Recode the copy number status based on ploidy
- dplyr::mutate(status = dplyr::case_when(
- # when the copy number is less than inferred ploidy, mark this as a loss
- copy.num < tumor_ploidy ~ "loss",
- # if copy number is higher than ploidy, mark as a gain
- copy.num > tumor_ploidy ~ "gain",
- copy.num == tumor_ploidy ~ "neutral"
- )) %>%
- # Remove sex chromosomes
- dplyr::filter(
- !(chrom %in% c("X", "Y", "M")),
- !is.na(status)
- )
-
-
-
-Set up seg data as GenomicRanges.
- - - -seg_ranges <- GenomicRanges::GRanges(
- seqnames = seg_data$chrom,
- ranges = IRanges::IRanges(
- start = seg_data$loc.start,
- end = seg_data$loc.end
- ),
- status = seg_data$status,
- histology = seg_data$short_histology,
- biospecimen = seg_data$ID
-)
-
-
-
-Explore the distribution of segment lengths.
- - - -ggplot2::qplot(seg_ranges@ranges@width, geom = "density") +
- ggplot2::theme_classic() +
- ggplot2::ylab("density") +
- ggplot2::xlab("Segment length in bp") +
- # Let's put a vertical line where we will make a filter cutoff
- ggplot2::geom_vline(xintercept = length_max, color = "red")
-
-
-
-
-
-
-Filter out segments that are longer than our cutoff.
- - - -filtered_seg_ranges <- seg_ranges[which(seg_ranges@ranges@width < length_max)]
-
-
-
-(This has nothing to do with Strelka, but it just so happens this is a file with the sizes of the chromosomes in this genome build, hg38).
- - - -chr_sizes <- readr::read_tsv(file.path(input_dir, "WGS.hg38.strelka2.unpadded.bed"),
- col_names = c("chrom", "start", "end")
-) %>%
- # Reformat the chromosome variable to drop the "chr"
- dplyr::mutate(chrom = factor(gsub("chr", "", chrom),
- levels = c(1:22, "X", "Y", "M")
- )) %>%
- # Remove sex chromosomes
- dplyr::filter(!(chrom %in% c("X", "Y", "M")))
-
-
-Parsed with column specification:
-cols(
- chrom = [31mcol_character()[39m,
- start = [32mcol_double()[39m,
- end = [32mcol_double()[39m
-)
-
-
-# Make chromosome size named vector for Heatmap annotation
-chr_sizes_vector <- chr_sizes$end
-names(chr_sizes_vector) <- chr_sizes$chrom
-
-
-
-Regions that were not able to be accurately called will need to be color coded gray later. Here, we are setting up the uncallable regions like we did with the callable regions.
- - - -uncallable_bed <- readr::read_tsv(
- file.path(
- "..",
- "copy_number_consensus_call",
- "ref",
- "cnv_excluded_regions.bed"
- ),
- col_names = c("chrom", "start", "end")
-) %>%
- # Reformat the chromosome variable to drop the "chr"
- dplyr::mutate(chrom = factor(gsub("chr", "", chrom),
- levels = c(1:22, "X", "Y")
- )) %>%
- dplyr::filter(
- # Drop CNVs that don't have chromosome labels
- !is.na(chrom),
- # Drop sex chromosomes
- !(chrom %in% c("X", "Y", "M"))
- )
-
-
-Parsed with column specification:
-cols(
- chrom = [31mcol_character()[39m,
- start = [32mcol_double()[39m,
- end = [32mcol_double()[39m
-)
-
-
-
-Set up uncallable regions as GenomicRanges.
- - - -uncallable_ranges <- GenomicRanges::GRanges(
- seqnames = uncallable_bed$chrom,
- ranges = IRanges::IRanges(
- start = uncallable_bed$start,
- end = uncallable_bed$end
- )
-)
-
-
-
-Set up binned genome ranges.
- - - -# Set up bins of ~1Mb size
-bins <- GenomicRanges::tileGenome(
- chr_sizes_vector,
- tilewidth = 1e6
-)
-# Uncompress these ranges
-bins <- unlist(bins)
-
-
-
-Run the bin status calling on each sample.
- - - - -Make color key.
- - - -color_key <- structure(c(divergent_col_palette, "#9932CC", "#cccccc"),
- names = c("loss", "neutral", "gain", "unstable", "uncallable")
-)
-
-
-
-Extract chromosome labels and make an alternating color key for them. This annotation object strategy was originally from chromosomal-instability.
- - - -# Set up chromosome labels from bins as a factor vector
-chrs <- paste0("chr", S4Vectors::decode(bins@seqnames))
-chrs <- factor(chrs, levels = paste0("chr", 1:22))
-
-# Make a key for assigning alternating colors to the chromosomes
-chr_colors <- rep(c("gray", "lightblue"),
- length.out = length(unique(chrs))
-)
-names(chr_colors) <- unique(chrs)
-
-# Get coordinate start positions
-chr_pos <- match(unique(chrs), chrs)
-
-# Get mid points of chromosome labels
-mid_points <- ceiling((c(chr_pos[2:length(chr_pos)], length(chrs)) + chr_pos[1:length(chr_pos)]) / 2)
-
-
-
-Make chromosomal labeling HeatmapAnnotation
object.
# Make text labels for chromosome text
-chr_text <- ComplexHeatmap::anno_mark(
- at = mid_points,
- labels = levels(chrs),
- which = "column",
- side = "bottom",
- labels_rot = 45,
- labels_gp = grid::gpar(cex = 0.65)
-)
-
-# Create the Heatmap annotation object
-chr_annot <- ComplexHeatmap::HeatmapAnnotation(
- df = data.frame(chrs),
- col = list(chrs = chr_colors),
- name = "",
- show_legend = FALSE,
- show_annotation_name = FALSE,
- mark = chr_text # Put the text in
-)
-
-
-
-Make histology labeling HeatmapAnnotation
object. This annotation object strategy was originally from chromosomal-instability.
# Get the histologies for the samples in this set and order them by histology
-histologies <-
- data.frame(Kids_First_Biospecimen_ID = bin_calls_df$biospecimen_id) %>%
- dplyr::inner_join(metadata %>%
- dplyr::select(Kids_First_Biospecimen_ID, short_histology, sample_color)) %>%
- dplyr::mutate(short_histology = factor(short_histology)) %>%
- dplyr::arrange(short_histology) %>%
- tibble::column_to_rownames("Kids_First_Biospecimen_ID")
-
-
-Joining, by = "Kids_First_Biospecimen_ID"
-Column `Kids_First_Biospecimen_ID` joining factor and character vector, coercing into character vector
-
-
-# Make color key specific to these samples
-histologies_color_key_filtered <- unique(histologies$sample_color)
-names(histologies_color_key_filtered) <- unique(histologies$short_histology)
-
-# Drop this column so ComplexHeatmap isn't tempted to plot it
-histologies <- dplyr::select(histologies, -sample_color)
-
-# Get coordinate start positions
-hist_pos <- match(names(histologies_color_key_filtered), histologies$short_histology)
-
-# Get mid points of chromosome labels
-mid_points <- ceiling((c(hist_pos[2:length(hist_pos)], length(histologies$short_histology)) + hist_pos[1:length(hist_pos)]) / 2)
-
-
-
-
-
-
-# Make text labels for chromosome text
-hist_text <- ComplexHeatmap::anno_mark(
- at = mid_points,
- labels = levels(histologies$short_histology),
- which = "row",
- side = "right",
- labels_gp = grid::gpar(cex = 0.65),
- link_width = grid::unit(15, "mm")
-)
-
-# Create the Heatmap annotation object
-hist_annot <- ComplexHeatmap::HeatmapAnnotation(
- df = data.frame(histologies),
- col = list(short_histology = histologies_color_key_filtered),
- which = "row",
- show_annotation_name = FALSE,
- show_legend = FALSE,
- mark = hist_text # Put the text in
-)
-
-
-
-Format bin_calls_df
as a matrix with rownames for ComplexHeatmap
to use.
bin_calls_mat <- bin_calls_df %>%
- tibble::column_to_rownames("biospecimen_id") %>%
- as.matrix()
-
-# Ensure that this matrix is in the same order as the annotation
-bin_calls_mat <- bin_calls_mat[rownames(histologies), ]
-
-# Double check its in thte same order
-all.equal(rownames(bin_calls_mat), rownames(histologies))
-
-
-[1] TRUE
-
-
-
-# Plot on a heatmap
-heatmap <- ComplexHeatmap::Heatmap(
- bin_calls_mat,
- name = "CN status",
- col = color_key,
- cluster_columns = FALSE,
- cluster_rows = FALSE,
- show_column_names = FALSE,
- show_row_names = FALSE,
- bottom_annotation = chr_annot,
- right_annotation = hist_annot,
- heatmap_legend_param = list(nrow = 1),
- raster_quality = 8
-)
-
-
-
-Print out heatmap.
- - - -ComplexHeatmap::draw(heatmap, heatmap_legend_side = "bottom")
-
-
-
-
-
-
-Save to PDF.
- - - -# Save plot as PDF
-pdf(file.path(plots_dir, "cn_status_heatmap.pdf"))
-ComplexHeatmap::draw(heatmap, heatmap_legend_side = "bottom")
-dev.off()
-
-
-null device
- 1
-
-
-
-sessionInfo()
-
-
-R version 3.6.0 (2019-04-26)
-Platform: x86_64-pc-linux-gnu (64-bit)
-Running under: Debian GNU/Linux 9 (stretch)
-
-Matrix products: default
-BLAS/LAPACK: /usr/lib/libopenblasp-r0.2.19.so
-
-locale:
- [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
- [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
- [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
- [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
- [9] LC_ADDRESS=C LC_TELEPHONE=C
-[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
-
-attached base packages:
-[1] stats graphics grDevices utils datasets methods base
-
-loaded via a namespace (and not attached):
- [1] circlize_0.4.9 shape_1.4.4 GetoptLong_0.1.7
- [4] tidyselect_0.2.5 xfun_0.8 purrr_0.3.2
- [7] colorspace_1.4-1 htmltools_0.3.6 stats4_3.6.0
-[10] yaml_2.2.0 base64enc_0.1-3 rlang_0.4.0
-[13] R.oo_1.22.0 pillar_1.4.2 glue_1.3.1
-[16] R.utils_2.9.0 BiocGenerics_0.32.0 RColorBrewer_1.1-2
-[19] GenomeInfoDbData_1.2.2 stringr_1.4.0 zlibbioc_1.32.0
-[22] munsell_0.5.0 gtable_0.3.0 R.methodsS3_1.7.1
-[25] GlobalOptions_0.1.0 evaluate_0.14 labeling_0.3
-[28] knitr_1.23 ComplexHeatmap_2.2.0 IRanges_2.20.2
-[31] GenomeInfoDb_1.22.1 parallel_3.6.0 Rcpp_1.0.1
-[34] readr_1.3.1 scales_1.0.0 S4Vectors_0.24.4
-[37] jsonlite_1.6 XVector_0.26.0 rjson_0.2.20
-[40] ggplot2_3.2.0 hms_0.4.2 png_0.1-7
-[43] digest_0.6.20 stringi_1.4.3 dplyr_0.8.3
-[46] GenomicRanges_1.38.0 grid_3.6.0 clue_0.3-57
-[49] tools_3.6.0 bitops_1.0-6 magrittr_1.5
-[52] RCurl_1.95-4.12 lazyeval_0.2.2 tibble_2.1.3
-[55] cluster_2.1.0 crayon_1.3.4 tidyr_0.8.3
-[58] pkgconfig_2.0.2 data.table_1.12.2 assertthat_0.2.1
-[61] rmarkdown_1.13 rstudioapi_0.10 R6_2.4.0
-[64] compiler_3.6.0
-
-
-