diff --git a/.circleci/config.yml b/.circleci/config.yml index 60b54a2f3a..88c0e50ab5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -163,10 +163,6 @@ jobs: name: GISTIC Plots command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/gistic_plot.Rmd', clean = TRUE)" - - run: - name: CN Status Heatmap - command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/cn_status_heatmap.Rmd', clean = TRUE)" - - run: name: Gene set enrichment analysis to generate GSVA scores command: OPENPBTA_TESTING=1 ./scripts/run_in_ci.sh bash "analyses/gene-set-enrichment-analysis/run-gsea.sh" diff --git a/analyses/cnv-chrom-plot/cn_status_heatmap.Rmd b/analyses/cnv-chrom-plot/cn_status_heatmap.Rmd deleted file mode 100644 index 967c2e6157..0000000000 --- a/analyses/cnv-chrom-plot/cn_status_heatmap.Rmd +++ /dev/null @@ -1,453 +0,0 @@ ---- -title: "CN Status Heatmap" -output: - html_notebook: - toc: true - toc_float: true -author: Candace Savonen for ALSF - CCDL -date: 2020 ---- - -## Purpose: - -Create a summary heatmap of copy number status from the consensus CNV call data. -This is done by binning the genome and calculating the segment's coverage of the -CNV consensus segments. -A bin is declared a particular copy number status if that status's base pair -coverage fraction is above a certain threshold (`frac_threshold`) and the callable -portion of the bin is higher than the threshold, `frac_uncallable`. - -### Usage - -This notebook can be run via the command line from the top directory of the -repository as follows: - -``` -Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/cn_status_heatmap.Rmd', - clean = TRUE)" -``` - -### Cutoffs: - -```{r} -# The max length of a segment to use the data. -# segments that are too long may dominate the heatmap and/or be indicators of -# broader structural changes -length_max <- 1e7 - -# Set minimum percentage of a bin that should be callable to report data. -frac_uncallable <- 0.75 - -# Absolute fraction needed for a bin to be called a particular status -frac_threshold <- 0.75 -``` - -### Set Up - -```{r} -# Magrittr pipe -`%>%` <- dplyr::`%>%` -``` - -### Directories and Files - -```{r} -# Path to input directory -input_dir <- file.path("..", "..", "data") -figure_dir <- file.path("..", "..", "figures") -scratch_dir <- file.path("..", "..", "scratch") - -# Path to output directory -plots_dir <- "plots" - -# Create the plots_dir if it does not exist -if (!dir.exists(plots_dir)) { - dir.create(plots_dir, recursive = TRUE) -} -``` - -Read in custom functions. - -```{r} -source(file.path("util", "bin-coverage.R")) -``` - -Import color palettes. - -```{r} -# Import standard color palettes for project -histology_col_palette <- readr::read_tsv( - file.path(figure_dir, "palettes", "histology_color_palette.tsv") -) %>% - # We'll use deframe so we can use it as a recoding list - tibble::deframe() -``` - -Read in the divergent color palette and set it up with three colors. -In this instance, we only need three values for `gain`, `neutral`, and `loss`. - -```{r} -divergent_col_palette <- readr::read_tsv( - file.path(figure_dir, "palettes", "divergent_color_palette.tsv") -) %>% - # Only keep only these three colors - dplyr::filter( - color_names %in% c("divergent_low_4", "divergent_neutral", "divergent_high_4") - ) %>% - dplyr::pull("hex_codes") -``` - -### Read in data - -```{r} -# Read in metadata -metadata <- - readr::read_tsv(file.path(input_dir, "pbta-histologies.tsv")) %>% - # Easier to deal with NA short histologies if they are labeled something different - dplyr::mutate(short_histology = as.character(tidyr::replace_na(short_histology, "none"))) %>% - # Tack on the sample color using the short_histology column and a recode - dplyr::mutate(sample_color = dplyr::recode( - short_histology, - !!!histology_col_palette - )) -``` - -### Set up consensus copy number data - -```{r} -# Read in the segment copy number data -seg_data <- data.table::fread( - file.path( - input_dir, - "pbta-cnv-consensus.seg.gz" - ), - data.table = FALSE -) -``` - -Set up the status for each consensus segment. - -```{r} -seg_data <- seg_data %>% - # Join the histology column to this data - dplyr::inner_join( - dplyr::select( - metadata, - "Kids_First_Biospecimen_ID", - "short_histology", - "tumor_ploidy" - ), - by = c("ID" = "Kids_First_Biospecimen_ID") - ) %>% - # Reformat the chromosome variable to drop the "chr" - dplyr::mutate(chrom = factor(gsub("chr", "", chrom), - levels = c(1:22, "X", "Y") - )) %>% - # Recode the copy number status based on ploidy - dplyr::mutate(status = dplyr::case_when( - # when the copy number is less than inferred ploidy, mark this as a loss - copy.num < tumor_ploidy ~ "loss", - # if copy number is higher than ploidy, mark as a gain - copy.num > tumor_ploidy ~ "gain", - copy.num == tumor_ploidy ~ "neutral" - )) %>% - # Remove sex chromosomes - dplyr::filter( - !(chrom %in% c("X", "Y", "M")), - !is.na(status) - ) -``` - -Set up seg data as GenomicRanges. - -```{r} -seg_ranges <- GenomicRanges::GRanges( - seqnames = seg_data$chrom, - ranges = IRanges::IRanges( - start = seg_data$loc.start, - end = seg_data$loc.end - ), - status = seg_data$status, - histology = seg_data$short_histology, - biospecimen = seg_data$ID -) -``` - -Explore the distribution of segment lengths. - -```{r} -ggplot2::qplot(seg_ranges@ranges@width, geom = "density") + - ggplot2::theme_classic() + - ggplot2::ylab("density") + - ggplot2::xlab("Segment length in bp") + - # Let's put a vertical line where we will make a filter cutoff - ggplot2::geom_vline(xintercept = length_max, color = "red") -``` - -Filter out segments that are longer than our cutoff. - -```{r} -filtered_seg_ranges <- seg_ranges[which(seg_ranges@ranges@width < length_max)] -``` - -### Set up chromosomal sizes for making bins. - -(This has nothing to do with Strelka, but it just so happens this is a file -with the sizes of the chromosomes in this genome build, hg38). - -```{r} -chr_sizes <- readr::read_tsv(file.path(input_dir, "WGS.hg38.strelka2.unpadded.bed"), - col_names = c("chrom", "start", "end") -) %>% - # Reformat the chromosome variable to drop the "chr" - dplyr::mutate(chrom = factor(gsub("chr", "", chrom), - levels = c(1:22, "X", "Y", "M") - )) %>% - # Remove sex chromosomes - dplyr::filter(!(chrom %in% c("X", "Y", "M"))) - - -# Make chromosome size named vector for Heatmap annotation -chr_sizes_vector <- chr_sizes$end -names(chr_sizes_vector) <- chr_sizes$chrom -``` - -### Set up uncallable regions data - -Regions that were not able to be accurately called will need to be color coded gray later. -Here, we are setting up the uncallable regions like we did with the callable regions. - -```{r} -uncallable_bed <- readr::read_tsv( - file.path( - "..", - "copy_number_consensus_call", - "ref", - "cnv_excluded_regions.bed" - ), - col_names = c("chrom", "start", "end") -) %>% - # Reformat the chromosome variable to drop the "chr" - dplyr::mutate(chrom = factor(gsub("chr", "", chrom), - levels = c(1:22, "X", "Y") - )) %>% - dplyr::filter( - # Drop CNVs that don't have chromosome labels - !is.na(chrom), - # Drop sex chromosomes - !(chrom %in% c("X", "Y", "M")) - ) -``` - -Set up uncallable regions as GenomicRanges. - -```{r} -uncallable_ranges <- GenomicRanges::GRanges( - seqnames = uncallable_bed$chrom, - ranges = IRanges::IRanges( - start = uncallable_bed$start, - end = uncallable_bed$end - ) -) -``` - -## Call bin CN statuses for each sample - -Set up binned genome ranges. - -```{r} -# Set up bins of ~1Mb size -bins <- GenomicRanges::tileGenome( - chr_sizes_vector, - tilewidth = 1e6 -) -# Uncompress these ranges -bins <- unlist(bins) -``` - -Run the bin status calling on each sample. - -```{r echo=FALSE} -# Get a vector of the biospecimen IDs -sample_ids <- unique(seg_data$ID) - -# Run call_bin_status for each biospecimen's segments. -bin_calls_list <- lapply(sample_ids, - call_bin_status, - bin_ranges = bins, - seg_ranges = filtered_seg_ranges, - uncallable_ranges = uncallable_ranges, - frac_threshold_val = frac_threshold, - frac_uncallable_val = frac_uncallable -) - -# Bring along sample IDs -names(bin_calls_list) <- sample_ids - -# Format into data.frame -bin_calls_df <- dplyr::bind_rows(bin_calls_list, - .id = "biospecimen_id" -) -``` - -## Set up heatmap annotation objects - -Make color key. - -```{r} -color_key <- structure(c(divergent_col_palette, "#9932CC", "#cccccc"), - names = c("loss", "neutral", "gain", "unstable", "uncallable") -) -``` - -### Make column annotation object - -Extract chromosome labels and make an alternating color key for them. -This annotation object strategy was originally from [chromosomal-instability](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/b5a33838d1e9bd7e7913a89201ec26125c16c94c/analyses/chromosomal-instability/02a-plot-chr-instability-heatmaps.Rmd#L73). - -```{r} -# Set up chromosome labels from bins as a factor vector -chrs <- paste0("chr", S4Vectors::decode(bins@seqnames)) -chrs <- factor(chrs, levels = paste0("chr", 1:22)) - -# Make a key for assigning alternating colors to the chromosomes -chr_colors <- rep(c("gray", "lightblue"), - length.out = length(unique(chrs)) -) -names(chr_colors) <- unique(chrs) - -# Get coordinate start positions -chr_pos <- match(unique(chrs), chrs) - -# Get mid points of chromosome labels -mid_points <- ceiling((c(chr_pos[2:length(chr_pos)], length(chrs)) + chr_pos[1:length(chr_pos)]) / 2) -``` - -Make chromosomal labeling `HeatmapAnnotation` object. - -```{r} -# Make text labels for chromosome text -chr_text <- ComplexHeatmap::anno_mark( - at = mid_points, - labels = levels(chrs), - which = "column", - side = "bottom", - labels_rot = 45, - labels_gp = grid::gpar(cex = 0.65) -) - -# Create the Heatmap annotation object -chr_annot <- ComplexHeatmap::HeatmapAnnotation( - df = data.frame(chrs), - col = list(chrs = chr_colors), - name = "", - show_legend = FALSE, - show_annotation_name = FALSE, - mark = chr_text # Put the text in -) -``` - -### Make row annotation object - -Make histology labeling `HeatmapAnnotation` object. -This annotation object strategy was originally from [chromosomal-instability](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/b5a33838d1e9bd7e7913a89201ec26125c16c94c/analyses/chromosomal-instability/02a-plot-chr-instability-heatmaps.Rmd#L73). - -```{r} -# Get the histologies for the samples in this set and order them by histology -histologies <- - data.frame(Kids_First_Biospecimen_ID = bin_calls_df$biospecimen_id) %>% - dplyr::inner_join(metadata %>% - dplyr::select(Kids_First_Biospecimen_ID, short_histology, sample_color)) %>% - dplyr::mutate(short_histology = factor(short_histology)) %>% - dplyr::arrange(short_histology) %>% - tibble::column_to_rownames("Kids_First_Biospecimen_ID") - -# Make color key specific to these samples -histologies_color_key_filtered <- unique(histologies$sample_color) -names(histologies_color_key_filtered) <- unique(histologies$short_histology) - -# Drop this column so ComplexHeatmap isn't tempted to plot it -histologies <- dplyr::select(histologies, -sample_color) - -# Get coordinate start positions -hist_pos <- match(names(histologies_color_key_filtered), histologies$short_histology) - -# Get mid points of chromosome labels -mid_points <- ceiling((c(hist_pos[2:length(hist_pos)], length(histologies$short_histology)) + hist_pos[1:length(hist_pos)]) / 2) -``` - -```{r} -# Make text labels for chromosome text -hist_text <- ComplexHeatmap::anno_mark( - at = mid_points, - labels = levels(histologies$short_histology), - which = "row", - side = "right", - labels_gp = grid::gpar(cex = 0.65), - link_width = grid::unit(15, "mm") -) - -# Create the Heatmap annotation object -hist_annot <- ComplexHeatmap::HeatmapAnnotation( - df = data.frame(histologies), - col = list(short_histology = histologies_color_key_filtered), - which = "row", - show_annotation_name = FALSE, - show_legend = FALSE, - mark = hist_text # Put the text in -) -``` - -Format `bin_calls_df` as a matrix with rownames for `ComplexHeatmap` to use. - -```{r} -bin_calls_mat <- bin_calls_df %>% - tibble::column_to_rownames("biospecimen_id") %>% - as.matrix() - -# Ensure that this matrix is in the same order as the annotation -bin_calls_mat <- bin_calls_mat[rownames(histologies), ] - -# Double check its in thte same order -all.equal(rownames(bin_calls_mat), rownames(histologies)) -``` - -## Assemble CN status heatmap - -```{r} -# Plot on a heatmap -heatmap <- ComplexHeatmap::Heatmap( - bin_calls_mat, - name = "CN status", - col = color_key, - cluster_columns = FALSE, - cluster_rows = FALSE, - show_column_names = FALSE, - show_row_names = FALSE, - bottom_annotation = chr_annot, - right_annotation = hist_annot, - heatmap_legend_param = list(nrow = 1), - raster_quality = 8 -) -``` - -Print out heatmap. - -```{r} -ComplexHeatmap::draw(heatmap, heatmap_legend_side = "bottom") -``` - -Save to PDF. - -```{r} -# Save plot as PDF -pdf(file.path(plots_dir, "cn_status_heatmap.pdf")) -ComplexHeatmap::draw(heatmap, heatmap_legend_side = "bottom") -dev.off() -``` - -# Session Info - -```{r} -sessionInfo() -``` diff --git a/analyses/cnv-chrom-plot/cn_status_heatmap.nb.html b/analyses/cnv-chrom-plot/cn_status_heatmap.nb.html deleted file mode 100644 index 4bb1ff85be..0000000000 --- a/analyses/cnv-chrom-plot/cn_status_heatmap.nb.html +++ /dev/null @@ -1,3581 +0,0 @@ - - - - - - - - - - - - - - - -CN Status Heatmap - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-
-
-
-
- -
- - - - - - - - -
-

Purpose:

-

Create a summary heatmap of copy number status from the consensus CNV call data. This is done by binning the genome and calculating the segment’s coverage of the CNV consensus segments. A bin is declared a particular copy number status if that status’s base pair coverage fraction is above a certain threshold (frac_threshold) and the callable portion of the bin is higher than the threshold, frac_uncallable.

-
-

Usage

-

This notebook can be run via the command line from the top directory of the repository as follows:

-
Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/cn_status_heatmap.Rmd', 
-                              clean = TRUE)"
-
-
-

Cutoffs:

- - - -
# The max length of a segment to use the data.
-# segments that are too long may dominate the heatmap and/or be indicators of
-# broader structural changes
-length_max <- 1e7
-
-# Set minimum percentage of a bin that should be callable to report data.
-frac_uncallable <- 0.75
-
-# Absolute fraction needed for a bin to be called a particular status
-frac_threshold <- 0.75
- - - -
-
-

Set Up

- - - -
# Magrittr pipe
-`%>%` <- dplyr::`%>%`
- - - -
-
-

Directories and Files

- - - -
# Path to input directory
-input_dir <- file.path("..", "..", "data")
-figure_dir <- file.path("..", "..", "figures")
-scratch_dir <- file.path("..", "..", "scratch")
-
-# Path to output directory
-plots_dir <- "plots"
-
-# Create the plots_dir if it does not exist
-if (!dir.exists(plots_dir)) {
-  dir.create(plots_dir, recursive = TRUE)
-}
- - - -

Read in custom functions.

- - - -
source(file.path("util", "bin-coverage.R"))
- - - -

Import color palettes.

- - - -
# Import standard color palettes for project
-histology_col_palette <- readr::read_tsv(
-  file.path(figure_dir, "palettes", "histology_color_palette.tsv")
-) %>%
-  # We'll use deframe so we can use it as a recoding list
-  tibble::deframe()
- - -
Parsed with column specification:
-cols(
-  color_names = col_character(),
-  hex_codes = col_character()
-)
- - - -

Read in the divergent color palette and set it up with three colors. In this instance, we only need three values for gain, neutral, and loss.

- - - -
divergent_col_palette <- readr::read_tsv(
-  file.path(figure_dir, "palettes", "divergent_color_palette.tsv")
-) %>%
-  # Only keep only these three colors
-  dplyr::filter(
-    color_names %in% c("divergent_low_4", "divergent_neutral", "divergent_high_4")
-  ) %>%
-  dplyr::pull("hex_codes")
- - -
Parsed with column specification:
-cols(
-  color_names = col_character(),
-  hex_codes = col_character()
-)
- - - -
-
-

Read in data

- - - -
# Read in metadata
-metadata <-
-  readr::read_tsv(file.path(input_dir, "pbta-histologies.tsv")) %>%
-  # Easier to deal with NA short histologies if they are labeled something different
-  dplyr::mutate(short_histology = as.character(tidyr::replace_na(short_histology, "none"))) %>%
-  # Tack on the sample color using the short_histology column and a recode
-  dplyr::mutate(sample_color = dplyr::recode(
-    short_histology,
-    !!!histology_col_palette
-  ))
- - -
Parsed with column specification:
-cols(
-  .default = col_character(),
-  OS_days = col_double(),
-  age_last_update_days = col_double(),
-  normal_fraction = col_double(),
-  tumor_fraction = col_double(),
-  tumor_ploidy = col_double(),
-  molecular_subtype = col_logical()
-)
-See spec(...) for full column specifications.
-493 parsing failures.
- row               col           expected actual                              file
-2334 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
-2335 molecular_subtype 1/0/T/F/TRUE/FALSE Group4 '../../data/pbta-histologies.tsv'
-2336 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
-2337 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
-2338 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv'
-.... ................. .................. ...... .................................
-See problems(...) for more details.
- - - -
-
-

Set up consensus copy number data

- - - -
# Read in the segment copy number data
-seg_data <- data.table::fread(
-  file.path(
-    input_dir,
-    "pbta-cnv-consensus.seg.gz"
-  ),
-  data.table = FALSE
-)
- - - -

Set up the status for each consensus segment.

- - - -
seg_data <- seg_data %>%
-  # Join the histology column to this data
-  dplyr::inner_join(
-    dplyr::select(
-      metadata,
-      "Kids_First_Biospecimen_ID",
-      "short_histology",
-      "tumor_ploidy"
-    ),
-    by = c("ID" = "Kids_First_Biospecimen_ID")
-  ) %>%
-  # Reformat the chromosome variable to drop the "chr"
-  dplyr::mutate(chrom = factor(gsub("chr", "", chrom),
-    levels = c(1:22, "X", "Y")
-  )) %>%
-  # Recode the copy number status based on ploidy
-  dplyr::mutate(status = dplyr::case_when(
-    # when the copy number is less than inferred ploidy, mark this as a loss
-    copy.num < tumor_ploidy ~ "loss",
-    # if copy number is higher than ploidy, mark as a gain
-    copy.num > tumor_ploidy ~ "gain",
-    copy.num == tumor_ploidy ~ "neutral"
-  )) %>%
-  # Remove sex chromosomes
-  dplyr::filter(
-    !(chrom %in% c("X", "Y", "M")),
-    !is.na(status)
-  )
- - - -

Set up seg data as GenomicRanges.

- - - -
seg_ranges <- GenomicRanges::GRanges(
-  seqnames = seg_data$chrom,
-  ranges = IRanges::IRanges(
-    start = seg_data$loc.start,
-    end = seg_data$loc.end
-  ),
-  status = seg_data$status,
-  histology = seg_data$short_histology,
-  biospecimen = seg_data$ID
-)
- - - -

Explore the distribution of segment lengths.

- - - -
ggplot2::qplot(seg_ranges@ranges@width, geom = "density") +
-  ggplot2::theme_classic() +
-  ggplot2::ylab("density") +
-  ggplot2::xlab("Segment length in bp") +
-  # Let's put a vertical line where we will make a filter cutoff
-  ggplot2::geom_vline(xintercept = length_max, color = "red")
- - -

- - - -

Filter out segments that are longer than our cutoff.

- - - -
filtered_seg_ranges <- seg_ranges[which(seg_ranges@ranges@width < length_max)]
- - - -
-
-

Set up chromosomal sizes for making bins.

-

(This has nothing to do with Strelka, but it just so happens this is a file with the sizes of the chromosomes in this genome build, hg38).

- - - -
chr_sizes <- readr::read_tsv(file.path(input_dir, "WGS.hg38.strelka2.unpadded.bed"),
-  col_names = c("chrom", "start", "end")
-) %>%
-  # Reformat the chromosome variable to drop the "chr"
-  dplyr::mutate(chrom = factor(gsub("chr", "", chrom),
-    levels = c(1:22, "X", "Y", "M")
-  )) %>%
-  # Remove sex chromosomes
-  dplyr::filter(!(chrom %in% c("X", "Y", "M")))
- - -
Parsed with column specification:
-cols(
-  chrom = col_character(),
-  start = col_double(),
-  end = col_double()
-)
- - -
# Make chromosome size named vector for Heatmap annotation
-chr_sizes_vector <- chr_sizes$end
-names(chr_sizes_vector) <- chr_sizes$chrom
- - - -
-
-

Set up uncallable regions data

-

Regions that were not able to be accurately called will need to be color coded gray later. Here, we are setting up the uncallable regions like we did with the callable regions.

- - - -
uncallable_bed <- readr::read_tsv(
-  file.path(
-    "..",
-    "copy_number_consensus_call",
-    "ref",
-    "cnv_excluded_regions.bed"
-  ),
-  col_names = c("chrom", "start", "end")
-) %>%
-  # Reformat the chromosome variable to drop the "chr"
-  dplyr::mutate(chrom = factor(gsub("chr", "", chrom),
-    levels = c(1:22, "X", "Y")
-  )) %>%
-  dplyr::filter(
-    # Drop CNVs that don't have chromosome labels
-    !is.na(chrom),
-    # Drop sex chromosomes
-    !(chrom %in% c("X", "Y", "M"))
-  )
- - -
Parsed with column specification:
-cols(
-  chrom = col_character(),
-  start = col_double(),
-  end = col_double()
-)
- - - -

Set up uncallable regions as GenomicRanges.

- - - -
uncallable_ranges <- GenomicRanges::GRanges(
-  seqnames = uncallable_bed$chrom,
-  ranges = IRanges::IRanges(
-    start = uncallable_bed$start,
-    end = uncallable_bed$end
-  )
-)
- - - -
-
-
-

Call bin CN statuses for each sample

-

Set up binned genome ranges.

- - - -
# Set up bins of ~1Mb size
-bins <- GenomicRanges::tileGenome(
-  chr_sizes_vector,
-  tilewidth = 1e6
-)
-# Uncompress these ranges
-bins <- unlist(bins)
- - - -

Run the bin status calling on each sample.

- - - - -
-
-

Set up heatmap annotation objects

-

Make color key.

- - - -
color_key <- structure(c(divergent_col_palette, "#9932CC", "#cccccc"),
-  names = c("loss", "neutral", "gain", "unstable", "uncallable")
-)
- - - -
-

Make column annotation object

-

Extract chromosome labels and make an alternating color key for them. This annotation object strategy was originally from chromosomal-instability.

- - - -
# Set up chromosome labels from bins as a factor vector
-chrs <- paste0("chr", S4Vectors::decode(bins@seqnames))
-chrs <- factor(chrs, levels = paste0("chr", 1:22))
-
-# Make a key for assigning alternating colors to the chromosomes
-chr_colors <- rep(c("gray", "lightblue"),
-  length.out = length(unique(chrs))
-)
-names(chr_colors) <- unique(chrs)
-
-# Get coordinate start positions
-chr_pos <- match(unique(chrs), chrs)
-
-# Get mid points of chromosome labels
-mid_points <- ceiling((c(chr_pos[2:length(chr_pos)], length(chrs)) + chr_pos[1:length(chr_pos)]) / 2)
- - - -

Make chromosomal labeling HeatmapAnnotation object.

- - - -
# Make text labels for chromosome text
-chr_text <- ComplexHeatmap::anno_mark(
-  at = mid_points,
-  labels = levels(chrs),
-  which = "column",
-  side = "bottom",
-  labels_rot = 45,
-  labels_gp = grid::gpar(cex = 0.65)
-)
-
-# Create the Heatmap annotation object
-chr_annot <- ComplexHeatmap::HeatmapAnnotation(
-  df = data.frame(chrs),
-  col = list(chrs = chr_colors),
-  name = "",
-  show_legend = FALSE,
-  show_annotation_name = FALSE,
-  mark = chr_text # Put the text in
-)
- - - -
-
-

Make row annotation object

-

Make histology labeling HeatmapAnnotation object. This annotation object strategy was originally from chromosomal-instability.

- - - -
# Get the histologies for the samples in this set and order them by histology
-histologies <-
-  data.frame(Kids_First_Biospecimen_ID = bin_calls_df$biospecimen_id) %>%
-  dplyr::inner_join(metadata %>%
-    dplyr::select(Kids_First_Biospecimen_ID, short_histology, sample_color)) %>%
-  dplyr::mutate(short_histology = factor(short_histology)) %>%
-  dplyr::arrange(short_histology) %>%
-  tibble::column_to_rownames("Kids_First_Biospecimen_ID")
- - -
Joining, by = "Kids_First_Biospecimen_ID"
-Column `Kids_First_Biospecimen_ID` joining factor and character vector, coercing into character vector
- - -
# Make color key specific to these samples
-histologies_color_key_filtered <- unique(histologies$sample_color)
-names(histologies_color_key_filtered) <- unique(histologies$short_histology)
-
-# Drop this column so ComplexHeatmap isn't tempted to plot it
-histologies <- dplyr::select(histologies, -sample_color)
-
-# Get coordinate start positions
-hist_pos <- match(names(histologies_color_key_filtered), histologies$short_histology)
-
-# Get mid points of chromosome labels
-mid_points <- ceiling((c(hist_pos[2:length(hist_pos)], length(histologies$short_histology)) + hist_pos[1:length(hist_pos)]) / 2)
- - - - - - -
# Make text labels for chromosome text
-hist_text <- ComplexHeatmap::anno_mark(
-  at = mid_points,
-  labels = levels(histologies$short_histology),
-  which = "row",
-  side = "right",
-  labels_gp = grid::gpar(cex = 0.65),
-  link_width = grid::unit(15, "mm")
-)
-
-# Create the Heatmap annotation object
-hist_annot <- ComplexHeatmap::HeatmapAnnotation(
-  df = data.frame(histologies),
-  col = list(short_histology = histologies_color_key_filtered),
-  which = "row",
-  show_annotation_name = FALSE,
-  show_legend = FALSE,
-  mark = hist_text # Put the text in
-)
- - - -

Format bin_calls_df as a matrix with rownames for ComplexHeatmap to use.

- - - -
bin_calls_mat <- bin_calls_df %>%
-  tibble::column_to_rownames("biospecimen_id") %>%
-  as.matrix()
-
-# Ensure that this matrix is in the same order as the annotation
-bin_calls_mat <- bin_calls_mat[rownames(histologies), ]
-
-# Double check its in thte same order
-all.equal(rownames(bin_calls_mat), rownames(histologies))
- - -
[1] TRUE
- - - -
-
-
-

Assemble CN status heatmap

- - - -
# Plot on a heatmap
-heatmap <- ComplexHeatmap::Heatmap(
-  bin_calls_mat,
-  name = "CN status",
-  col = color_key,
-  cluster_columns = FALSE,
-  cluster_rows = FALSE,
-  show_column_names = FALSE,
-  show_row_names = FALSE,
-  bottom_annotation = chr_annot,
-  right_annotation = hist_annot,
-  heatmap_legend_param = list(nrow = 1),
-  raster_quality = 8
-)
- - - -

Print out heatmap.

- - - -
ComplexHeatmap::draw(heatmap, heatmap_legend_side = "bottom")
- - -

- - - -

Save to PDF.

- - - -
# Save plot as PDF
-pdf(file.path(plots_dir, "cn_status_heatmap.pdf"))
-ComplexHeatmap::draw(heatmap, heatmap_legend_side = "bottom")
-dev.off()
- - -
null device 
-          1 
- - - -
-
-

Session Info

- - - -
sessionInfo()
- - -
R version 3.6.0 (2019-04-26)
-Platform: x86_64-pc-linux-gnu (64-bit)
-Running under: Debian GNU/Linux 9 (stretch)
-
-Matrix products: default
-BLAS/LAPACK: /usr/lib/libopenblasp-r0.2.19.so
-
-locale:
- [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
- [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
- [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
- [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
- [9] LC_ADDRESS=C               LC_TELEPHONE=C            
-[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
-
-attached base packages:
-[1] stats     graphics  grDevices utils     datasets  methods   base     
-
-loaded via a namespace (and not attached):
- [1] circlize_0.4.9         shape_1.4.4            GetoptLong_0.1.7      
- [4] tidyselect_0.2.5       xfun_0.8               purrr_0.3.2           
- [7] colorspace_1.4-1       htmltools_0.3.6        stats4_3.6.0          
-[10] yaml_2.2.0             base64enc_0.1-3        rlang_0.4.0           
-[13] R.oo_1.22.0            pillar_1.4.2           glue_1.3.1            
-[16] R.utils_2.9.0          BiocGenerics_0.32.0    RColorBrewer_1.1-2    
-[19] GenomeInfoDbData_1.2.2 stringr_1.4.0          zlibbioc_1.32.0       
-[22] munsell_0.5.0          gtable_0.3.0           R.methodsS3_1.7.1     
-[25] GlobalOptions_0.1.0    evaluate_0.14          labeling_0.3          
-[28] knitr_1.23             ComplexHeatmap_2.2.0   IRanges_2.20.2        
-[31] GenomeInfoDb_1.22.1    parallel_3.6.0         Rcpp_1.0.1            
-[34] readr_1.3.1            scales_1.0.0           S4Vectors_0.24.4      
-[37] jsonlite_1.6           XVector_0.26.0         rjson_0.2.20          
-[40] ggplot2_3.2.0          hms_0.4.2              png_0.1-7             
-[43] digest_0.6.20          stringi_1.4.3          dplyr_0.8.3           
-[46] GenomicRanges_1.38.0   grid_3.6.0             clue_0.3-57           
-[49] tools_3.6.0            bitops_1.0-6           magrittr_1.5          
-[52] RCurl_1.95-4.12        lazyeval_0.2.2         tibble_2.1.3          
-[55] cluster_2.1.0          crayon_1.3.4           tidyr_0.8.3           
-[58] pkgconfig_2.0.2        data.table_1.12.2      assertthat_0.2.1      
-[61] rmarkdown_1.13         rstudioapi_0.10        R6_2.4.0              
-[64] compiler_3.6.0        
- - -
- -
LS0tCnRpdGxlOiAiQ04gU3RhdHVzIEhlYXRtYXAiCm91dHB1dDogICAKICBodG1sX25vdGVib29rOiAKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCmF1dGhvcjogQ2FuZGFjZSBTYXZvbmVuIGZvciBBTFNGIC0gQ0NETApkYXRlOiAyMDIwCi0tLQoKIyMgUHVycG9zZTogCgpDcmVhdGUgYSBzdW1tYXJ5IGhlYXRtYXAgb2YgY29weSBudW1iZXIgc3RhdHVzIGZyb20gdGhlIGNvbnNlbnN1cyBDTlYgY2FsbCBkYXRhLiAKVGhpcyBpcyBkb25lIGJ5IGJpbm5pbmcgdGhlIGdlbm9tZSBhbmQgY2FsY3VsYXRpbmcgdGhlIHNlZ21lbnQncyBjb3ZlcmFnZSBvZiB0aGUKQ05WIGNvbnNlbnN1cyBzZWdtZW50cy4gCkEgYmluIGlzIGRlY2xhcmVkIGEgcGFydGljdWxhciBjb3B5IG51bWJlciBzdGF0dXMgaWYgdGhhdCBzdGF0dXMncyBiYXNlIHBhaXIgCmNvdmVyYWdlIGZyYWN0aW9uIGlzIGFib3ZlIGEgY2VydGFpbiB0aHJlc2hvbGQgKGBmcmFjX3RocmVzaG9sZGApIGFuZCB0aGUgY2FsbGFibGUgCnBvcnRpb24gb2YgdGhlIGJpbiBpcyBoaWdoZXIgdGhhbiB0aGUgdGhyZXNob2xkLCBgZnJhY191bmNhbGxhYmxlYC4KCiMjIyBVc2FnZQoKVGhpcyBub3RlYm9vayBjYW4gYmUgcnVuIHZpYSB0aGUgY29tbWFuZCBsaW5lIGZyb20gdGhlIHRvcCBkaXJlY3Rvcnkgb2YgdGhlIApyZXBvc2l0b3J5IGFzIGZvbGxvd3M6CgpgYGAKUnNjcmlwdCAtZSAicm1hcmtkb3duOjpyZW5kZXIoJ2FuYWx5c2VzL2Nudi1jaHJvbS1wbG90L2NuX3N0YXR1c19oZWF0bWFwLlJtZCcsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjbGVhbiA9IFRSVUUpIgpgYGAKCiMjIyBDdXRvZmZzOiAKCmBgYHtyfQojIFRoZSBtYXggbGVuZ3RoIG9mIGEgc2VnbWVudCB0byB1c2UgdGhlIGRhdGEuCiMgc2VnbWVudHMgdGhhdCBhcmUgdG9vIGxvbmcgbWF5IGRvbWluYXRlIHRoZSBoZWF0bWFwIGFuZC9vciBiZSBpbmRpY2F0b3JzIG9mCiMgYnJvYWRlciBzdHJ1Y3R1cmFsIGNoYW5nZXMKbGVuZ3RoX21heCA8LSAxZTcKCiMgU2V0IG1pbmltdW0gcGVyY2VudGFnZSBvZiBhIGJpbiB0aGF0IHNob3VsZCBiZSBjYWxsYWJsZSB0byByZXBvcnQgZGF0YS4KZnJhY191bmNhbGxhYmxlIDwtIDAuNzUKCiMgQWJzb2x1dGUgZnJhY3Rpb24gbmVlZGVkIGZvciBhIGJpbiB0byBiZSBjYWxsZWQgYSBwYXJ0aWN1bGFyIHN0YXR1cwpmcmFjX3RocmVzaG9sZCA8LSAwLjc1CmBgYAoKIyMjIFNldCBVcAoKYGBge3J9CiMgTWFncml0dHIgcGlwZQpgJT4lYCA8LSBkcGx5cjo6YCU+JWAKYGBgCgojIyMgRGlyZWN0b3JpZXMgYW5kIEZpbGVzCgpgYGB7cn0KIyBQYXRoIHRvIGlucHV0IGRpcmVjdG9yeQppbnB1dF9kaXIgPC0gZmlsZS5wYXRoKCIuLiIsICIuLiIsICJkYXRhIikKZmlndXJlX2RpciA8LSBmaWxlLnBhdGgoIi4uIiwgIi4uIiwgImZpZ3VyZXMiKQpzY3JhdGNoX2RpciA8LSBmaWxlLnBhdGgoIi4uIiwgIi4uIiwgInNjcmF0Y2giKQoKIyBQYXRoIHRvIG91dHB1dCBkaXJlY3RvcnkKcGxvdHNfZGlyIDwtICJwbG90cyIKCiMgQ3JlYXRlIHRoZSBwbG90c19kaXIgaWYgaXQgZG9lcyBub3QgZXhpc3QKaWYgKCFkaXIuZXhpc3RzKHBsb3RzX2RpcikpIHsKICBkaXIuY3JlYXRlKHBsb3RzX2RpciwgcmVjdXJzaXZlID0gVFJVRSkKfQpgYGAKClJlYWQgaW4gY3VzdG9tIGZ1bmN0aW9ucy4KCmBgYHtyfQpzb3VyY2UoZmlsZS5wYXRoKCJ1dGlsIiwgImJpbi1jb3ZlcmFnZS5SIikpCmBgYAoKSW1wb3J0IGNvbG9yIHBhbGV0dGVzLiAKCmBgYHtyfQojIEltcG9ydCBzdGFuZGFyZCBjb2xvciBwYWxldHRlcyBmb3IgcHJvamVjdApoaXN0b2xvZ3lfY29sX3BhbGV0dGUgPC0gcmVhZHI6OnJlYWRfdHN2KAogIGZpbGUucGF0aChmaWd1cmVfZGlyLCAicGFsZXR0ZXMiLCAiaGlzdG9sb2d5X2NvbG9yX3BhbGV0dGUudHN2IikKKSAlPiUKICAjIFdlJ2xsIHVzZSBkZWZyYW1lIHNvIHdlIGNhbiB1c2UgaXQgYXMgYSByZWNvZGluZyBsaXN0CiAgdGliYmxlOjpkZWZyYW1lKCkKYGBgCgpSZWFkIGluIHRoZSBkaXZlcmdlbnQgY29sb3IgcGFsZXR0ZSBhbmQgc2V0IGl0IHVwIHdpdGggdGhyZWUgY29sb3JzLiAKSW4gdGhpcyBpbnN0YW5jZSwgd2Ugb25seSBuZWVkIHRocmVlIHZhbHVlcyBmb3IgYGdhaW5gLCBgbmV1dHJhbGAsIGFuZCBgbG9zc2AuCgpgYGB7cn0KZGl2ZXJnZW50X2NvbF9wYWxldHRlIDwtIHJlYWRyOjpyZWFkX3RzdigKICBmaWxlLnBhdGgoZmlndXJlX2RpciwgInBhbGV0dGVzIiwgImRpdmVyZ2VudF9jb2xvcl9wYWxldHRlLnRzdiIpCikgJT4lCiAgIyBPbmx5IGtlZXAgb25seSB0aGVzZSB0aHJlZSBjb2xvcnMKICBkcGx5cjo6ZmlsdGVyKAogICAgY29sb3JfbmFtZXMgJWluJSBjKCJkaXZlcmdlbnRfbG93XzQiLCAiZGl2ZXJnZW50X25ldXRyYWwiLCAiZGl2ZXJnZW50X2hpZ2hfNCIpCiAgKSAlPiUKICBkcGx5cjo6cHVsbCgiaGV4X2NvZGVzIikKYGBgCgojIyMgUmVhZCBpbiBkYXRhIAoKYGBge3J9CiMgUmVhZCBpbiBtZXRhZGF0YQptZXRhZGF0YSA8LQogIHJlYWRyOjpyZWFkX3RzdihmaWxlLnBhdGgoaW5wdXRfZGlyLCAicGJ0YS1oaXN0b2xvZ2llcy50c3YiKSkgJT4lCiAgIyBFYXNpZXIgdG8gZGVhbCB3aXRoIE5BIHNob3J0IGhpc3RvbG9naWVzIGlmIHRoZXkgYXJlIGxhYmVsZWQgc29tZXRoaW5nIGRpZmZlcmVudAogIGRwbHlyOjptdXRhdGUoc2hvcnRfaGlzdG9sb2d5ID0gYXMuY2hhcmFjdGVyKHRpZHlyOjpyZXBsYWNlX25hKHNob3J0X2hpc3RvbG9neSwgIm5vbmUiKSkpICU+JQogICMgVGFjayBvbiB0aGUgc2FtcGxlIGNvbG9yIHVzaW5nIHRoZSBzaG9ydF9oaXN0b2xvZ3kgY29sdW1uIGFuZCBhIHJlY29kZQogIGRwbHlyOjptdXRhdGUoc2FtcGxlX2NvbG9yID0gZHBseXI6OnJlY29kZSgKICAgIHNob3J0X2hpc3RvbG9neSwKICAgICEhIWhpc3RvbG9neV9jb2xfcGFsZXR0ZQogICkpCmBgYAoKIyMjIFNldCB1cCBjb25zZW5zdXMgY29weSBudW1iZXIgZGF0YQoKYGBge3J9CiMgUmVhZCBpbiB0aGUgc2VnbWVudCBjb3B5IG51bWJlciBkYXRhCnNlZ19kYXRhIDwtIGRhdGEudGFibGU6OmZyZWFkKAogIGZpbGUucGF0aCgKICAgIGlucHV0X2RpciwKICAgICJwYnRhLWNudi1jb25zZW5zdXMuc2VnLmd6IgogICksCiAgZGF0YS50YWJsZSA9IEZBTFNFCikKYGBgCgpTZXQgdXAgdGhlIHN0YXR1cyBmb3IgZWFjaCBjb25zZW5zdXMgc2VnbWVudC4gCgpgYGB7cn0Kc2VnX2RhdGEgPC0gc2VnX2RhdGEgJT4lCiAgIyBKb2luIHRoZSBoaXN0b2xvZ3kgY29sdW1uIHRvIHRoaXMgZGF0YQogIGRwbHlyOjppbm5lcl9qb2luKAogICAgZHBseXI6OnNlbGVjdCgKICAgICAgbWV0YWRhdGEsCiAgICAgICJLaWRzX0ZpcnN0X0Jpb3NwZWNpbWVuX0lEIiwKICAgICAgInNob3J0X2hpc3RvbG9neSIsCiAgICAgICJ0dW1vcl9wbG9pZHkiCiAgICApLAogICAgYnkgPSBjKCJJRCIgPSAiS2lkc19GaXJzdF9CaW9zcGVjaW1lbl9JRCIpCiAgKSAlPiUKICAjIFJlZm9ybWF0IHRoZSBjaHJvbW9zb21lIHZhcmlhYmxlIHRvIGRyb3AgdGhlICJjaHIiCiAgZHBseXI6Om11dGF0ZShjaHJvbSA9IGZhY3Rvcihnc3ViKCJjaHIiLCAiIiwgY2hyb20pLAogICAgbGV2ZWxzID0gYygxOjIyLCAiWCIsICJZIikKICApKSAlPiUKICAjIFJlY29kZSB0aGUgY29weSBudW1iZXIgc3RhdHVzIGJhc2VkIG9uIHBsb2lkeQogIGRwbHlyOjptdXRhdGUoc3RhdHVzID0gZHBseXI6OmNhc2Vfd2hlbigKICAgICMgd2hlbiB0aGUgY29weSBudW1iZXIgaXMgbGVzcyB0aGFuIGluZmVycmVkIHBsb2lkeSwgbWFyayB0aGlzIGFzIGEgbG9zcwogICAgY29weS5udW0gPCB0dW1vcl9wbG9pZHkgfiAibG9zcyIsCiAgICAjIGlmIGNvcHkgbnVtYmVyIGlzIGhpZ2hlciB0aGFuIHBsb2lkeSwgbWFyayBhcyBhIGdhaW4KICAgIGNvcHkubnVtID4gdHVtb3JfcGxvaWR5IH4gImdhaW4iLAogICAgY29weS5udW0gPT0gdHVtb3JfcGxvaWR5IH4gIm5ldXRyYWwiCiAgKSkgJT4lCiAgIyBSZW1vdmUgc2V4IGNocm9tb3NvbWVzCiAgZHBseXI6OmZpbHRlcigKICAgICEoY2hyb20gJWluJSBjKCJYIiwgIlkiLCAiTSIpKSwKICAgICFpcy5uYShzdGF0dXMpCiAgKQpgYGAKClNldCB1cCBzZWcgZGF0YSBhcyBHZW5vbWljUmFuZ2VzLiAKCmBgYHtyfQpzZWdfcmFuZ2VzIDwtIEdlbm9taWNSYW5nZXM6OkdSYW5nZXMoCiAgc2VxbmFtZXMgPSBzZWdfZGF0YSRjaHJvbSwKICByYW5nZXMgPSBJUmFuZ2VzOjpJUmFuZ2VzKAogICAgc3RhcnQgPSBzZWdfZGF0YSRsb2Muc3RhcnQsCiAgICBlbmQgPSBzZWdfZGF0YSRsb2MuZW5kCiAgKSwKICBzdGF0dXMgPSBzZWdfZGF0YSRzdGF0dXMsCiAgaGlzdG9sb2d5ID0gc2VnX2RhdGEkc2hvcnRfaGlzdG9sb2d5LAogIGJpb3NwZWNpbWVuID0gc2VnX2RhdGEkSUQKKQpgYGAKCkV4cGxvcmUgdGhlIGRpc3RyaWJ1dGlvbiBvZiBzZWdtZW50IGxlbmd0aHMuIAoKYGBge3J9CmdncGxvdDI6OnFwbG90KHNlZ19yYW5nZXNAcmFuZ2VzQHdpZHRoLCBnZW9tID0gImRlbnNpdHkiKSArCiAgZ2dwbG90Mjo6dGhlbWVfY2xhc3NpYygpICsKICBnZ3Bsb3QyOjp5bGFiKCJkZW5zaXR5IikgKwogIGdncGxvdDI6OnhsYWIoIlNlZ21lbnQgbGVuZ3RoIGluIGJwIikgKwogICMgTGV0J3MgcHV0IGEgdmVydGljYWwgbGluZSB3aGVyZSB3ZSB3aWxsIG1ha2UgYSBmaWx0ZXIgY3V0b2ZmCiAgZ2dwbG90Mjo6Z2VvbV92bGluZSh4aW50ZXJjZXB0ID0gbGVuZ3RoX21heCwgY29sb3IgPSAicmVkIikKYGBgCgpGaWx0ZXIgb3V0IHNlZ21lbnRzIHRoYXQgYXJlIGxvbmdlciB0aGFuIG91ciBjdXRvZmYuIAoKYGBge3J9CmZpbHRlcmVkX3NlZ19yYW5nZXMgPC0gc2VnX3Jhbmdlc1t3aGljaChzZWdfcmFuZ2VzQHJhbmdlc0B3aWR0aCA8IGxlbmd0aF9tYXgpXQpgYGAKCiMjIyBTZXQgdXAgY2hyb21vc29tYWwgc2l6ZXMgZm9yIG1ha2luZyBiaW5zLiAKCihUaGlzIGhhcyBub3RoaW5nIHRvIGRvIHdpdGggU3RyZWxrYSwgYnV0IGl0IGp1c3Qgc28gaGFwcGVucyB0aGlzIGlzIGEgZmlsZSAKd2l0aCB0aGUgc2l6ZXMgb2YgdGhlIGNocm9tb3NvbWVzIGluIHRoaXMgZ2Vub21lIGJ1aWxkLCBoZzM4KS4KCmBgYHtyfQpjaHJfc2l6ZXMgPC0gcmVhZHI6OnJlYWRfdHN2KGZpbGUucGF0aChpbnB1dF9kaXIsICJXR1MuaGczOC5zdHJlbGthMi51bnBhZGRlZC5iZWQiKSwKICBjb2xfbmFtZXMgPSBjKCJjaHJvbSIsICJzdGFydCIsICJlbmQiKQopICU+JQogICMgUmVmb3JtYXQgdGhlIGNocm9tb3NvbWUgdmFyaWFibGUgdG8gZHJvcCB0aGUgImNociIKICBkcGx5cjo6bXV0YXRlKGNocm9tID0gZmFjdG9yKGdzdWIoImNociIsICIiLCBjaHJvbSksCiAgICBsZXZlbHMgPSBjKDE6MjIsICJYIiwgIlkiLCAiTSIpCiAgKSkgJT4lCiAgIyBSZW1vdmUgc2V4IGNocm9tb3NvbWVzCiAgZHBseXI6OmZpbHRlcighKGNocm9tICVpbiUgYygiWCIsICJZIiwgIk0iKSkpCgoKIyBNYWtlIGNocm9tb3NvbWUgc2l6ZSBuYW1lZCB2ZWN0b3IgZm9yIEhlYXRtYXAgYW5ub3RhdGlvbgpjaHJfc2l6ZXNfdmVjdG9yIDwtIGNocl9zaXplcyRlbmQKbmFtZXMoY2hyX3NpemVzX3ZlY3RvcikgPC0gY2hyX3NpemVzJGNocm9tCmBgYAoKIyMjIFNldCB1cCB1bmNhbGxhYmxlIHJlZ2lvbnMgZGF0YSAKClJlZ2lvbnMgdGhhdCB3ZXJlIG5vdCBhYmxlIHRvIGJlIGFjY3VyYXRlbHkgY2FsbGVkIHdpbGwgbmVlZCB0byBiZSBjb2xvciBjb2RlZCBncmF5IGxhdGVyLiAKSGVyZSwgd2UgYXJlIHNldHRpbmcgdXAgdGhlIHVuY2FsbGFibGUgcmVnaW9ucyBsaWtlIHdlIGRpZCB3aXRoIHRoZSBjYWxsYWJsZSByZWdpb25zLgoKYGBge3J9CnVuY2FsbGFibGVfYmVkIDwtIHJlYWRyOjpyZWFkX3RzdigKICBmaWxlLnBhdGgoCiAgICAiLi4iLAogICAgImNvcHlfbnVtYmVyX2NvbnNlbnN1c19jYWxsIiwKICAgICJyZWYiLAogICAgImNudl9leGNsdWRlZF9yZWdpb25zLmJlZCIKICApLAogIGNvbF9uYW1lcyA9IGMoImNocm9tIiwgInN0YXJ0IiwgImVuZCIpCikgJT4lCiAgIyBSZWZvcm1hdCB0aGUgY2hyb21vc29tZSB2YXJpYWJsZSB0byBkcm9wIHRoZSAiY2hyIgogIGRwbHlyOjptdXRhdGUoY2hyb20gPSBmYWN0b3IoZ3N1YigiY2hyIiwgIiIsIGNocm9tKSwKICAgIGxldmVscyA9IGMoMToyMiwgIlgiLCAiWSIpCiAgKSkgJT4lCiAgZHBseXI6OmZpbHRlcigKICAgICMgRHJvcCBDTlZzIHRoYXQgZG9uJ3QgaGF2ZSBjaHJvbW9zb21lIGxhYmVscwogICAgIWlzLm5hKGNocm9tKSwKICAgICMgRHJvcCBzZXggY2hyb21vc29tZXMKICAgICEoY2hyb20gJWluJSBjKCJYIiwgIlkiLCAiTSIpKQogICkKYGBgCgpTZXQgdXAgdW5jYWxsYWJsZSByZWdpb25zIGFzIEdlbm9taWNSYW5nZXMuIAoKYGBge3J9CnVuY2FsbGFibGVfcmFuZ2VzIDwtIEdlbm9taWNSYW5nZXM6OkdSYW5nZXMoCiAgc2VxbmFtZXMgPSB1bmNhbGxhYmxlX2JlZCRjaHJvbSwKICByYW5nZXMgPSBJUmFuZ2VzOjpJUmFuZ2VzKAogICAgc3RhcnQgPSB1bmNhbGxhYmxlX2JlZCRzdGFydCwKICAgIGVuZCA9IHVuY2FsbGFibGVfYmVkJGVuZAogICkKKQpgYGAKCiMjIENhbGwgYmluIENOIHN0YXR1c2VzIGZvciBlYWNoIHNhbXBsZQoKU2V0IHVwIGJpbm5lZCBnZW5vbWUgcmFuZ2VzLiAKCmBgYHtyfQojIFNldCB1cCBiaW5zIG9mIH4xTWIgc2l6ZQpiaW5zIDwtIEdlbm9taWNSYW5nZXM6OnRpbGVHZW5vbWUoCiAgY2hyX3NpemVzX3ZlY3RvciwKICB0aWxld2lkdGggPSAxZTYKKQojIFVuY29tcHJlc3MgdGhlc2UgcmFuZ2VzCmJpbnMgPC0gdW5saXN0KGJpbnMpCmBgYAoKUnVuIHRoZSBiaW4gc3RhdHVzIGNhbGxpbmcgb24gZWFjaCBzYW1wbGUuIAoKYGBge3IgZWNobz1GQUxTRX0KIyBHZXQgYSB2ZWN0b3Igb2YgdGhlIGJpb3NwZWNpbWVuIElEcwpzYW1wbGVfaWRzIDwtIHVuaXF1ZShzZWdfZGF0YSRJRCkKCiMgUnVuIGNhbGxfYmluX3N0YXR1cyBmb3IgZWFjaCBiaW9zcGVjaW1lbidzIHNlZ21lbnRzLgpiaW5fY2FsbHNfbGlzdCA8LSBsYXBwbHkoc2FtcGxlX2lkcywKICBjYWxsX2Jpbl9zdGF0dXMsCiAgYmluX3JhbmdlcyA9IGJpbnMsCiAgc2VnX3JhbmdlcyA9IGZpbHRlcmVkX3NlZ19yYW5nZXMsCiAgdW5jYWxsYWJsZV9yYW5nZXMgPSB1bmNhbGxhYmxlX3JhbmdlcywKICBmcmFjX3RocmVzaG9sZF92YWwgPSBmcmFjX3RocmVzaG9sZCwKICBmcmFjX3VuY2FsbGFibGVfdmFsID0gZnJhY191bmNhbGxhYmxlCikKCiMgQnJpbmcgYWxvbmcgc2FtcGxlIElEcwpuYW1lcyhiaW5fY2FsbHNfbGlzdCkgPC0gc2FtcGxlX2lkcwoKIyBGb3JtYXQgaW50byBkYXRhLmZyYW1lCmJpbl9jYWxsc19kZiA8LSBkcGx5cjo6YmluZF9yb3dzKGJpbl9jYWxsc19saXN0LAogIC5pZCA9ICJiaW9zcGVjaW1lbl9pZCIKKQpgYGAKCiMjIFNldCB1cCBoZWF0bWFwIGFubm90YXRpb24gb2JqZWN0cwoKTWFrZSBjb2xvciBrZXkuIAoKYGBge3J9CmNvbG9yX2tleSA8LSBzdHJ1Y3R1cmUoYyhkaXZlcmdlbnRfY29sX3BhbGV0dGUsICIjOTkzMkNDIiwgIiNjY2NjY2MiKSwKICBuYW1lcyA9IGMoImxvc3MiLCAibmV1dHJhbCIsICJnYWluIiwgInVuc3RhYmxlIiwgInVuY2FsbGFibGUiKQopCmBgYAoKIyMjIE1ha2UgY29sdW1uIGFubm90YXRpb24gb2JqZWN0CgpFeHRyYWN0IGNocm9tb3NvbWUgbGFiZWxzIGFuZCBtYWtlIGFuIGFsdGVybmF0aW5nIGNvbG9yIGtleSBmb3IgdGhlbS4gClRoaXMgYW5ub3RhdGlvbiBvYmplY3Qgc3RyYXRlZ3kgd2FzIG9yaWdpbmFsbHkgZnJvbSBbY2hyb21vc29tYWwtaW5zdGFiaWxpdHldKGh0dHBzOi8vZ2l0aHViLmNvbS9BbGV4c0xlbW9uYWRlL09wZW5QQlRBLWFuYWx5c2lzL2Jsb2IvYjVhMzM4MzhkMWU5YmQ3ZTc5MTNhODkyMDFlYzI2MTI1YzE2Yzk0Yy9hbmFseXNlcy9jaHJvbW9zb21hbC1pbnN0YWJpbGl0eS8wMmEtcGxvdC1jaHItaW5zdGFiaWxpdHktaGVhdG1hcHMuUm1kI0w3MykuCgpgYGB7cn0KIyBTZXQgdXAgY2hyb21vc29tZSBsYWJlbHMgZnJvbSBiaW5zIGFzIGEgZmFjdG9yIHZlY3RvcgpjaHJzIDwtIHBhc3RlMCgiY2hyIiwgUzRWZWN0b3JzOjpkZWNvZGUoYmluc0BzZXFuYW1lcykpCmNocnMgPC0gZmFjdG9yKGNocnMsIGxldmVscyA9IHBhc3RlMCgiY2hyIiwgMToyMikpCgojIE1ha2UgYSBrZXkgZm9yIGFzc2lnbmluZyBhbHRlcm5hdGluZyBjb2xvcnMgdG8gdGhlIGNocm9tb3NvbWVzCmNocl9jb2xvcnMgPC0gcmVwKGMoImdyYXkiLCAibGlnaHRibHVlIiksCiAgbGVuZ3RoLm91dCA9IGxlbmd0aCh1bmlxdWUoY2hycykpCikKbmFtZXMoY2hyX2NvbG9ycykgPC0gdW5pcXVlKGNocnMpCgojIEdldCBjb29yZGluYXRlIHN0YXJ0IHBvc2l0aW9ucwpjaHJfcG9zIDwtIG1hdGNoKHVuaXF1ZShjaHJzKSwgY2hycykKCiMgR2V0IG1pZCBwb2ludHMgb2YgY2hyb21vc29tZSBsYWJlbHMKbWlkX3BvaW50cyA8LSBjZWlsaW5nKChjKGNocl9wb3NbMjpsZW5ndGgoY2hyX3BvcyldLCBsZW5ndGgoY2hycykpICsgY2hyX3Bvc1sxOmxlbmd0aChjaHJfcG9zKV0pIC8gMikKYGBgCgpNYWtlIGNocm9tb3NvbWFsIGxhYmVsaW5nIGBIZWF0bWFwQW5ub3RhdGlvbmAgb2JqZWN0LgoKYGBge3J9CiMgTWFrZSB0ZXh0IGxhYmVscyBmb3IgY2hyb21vc29tZSB0ZXh0CmNocl90ZXh0IDwtIENvbXBsZXhIZWF0bWFwOjphbm5vX21hcmsoCiAgYXQgPSBtaWRfcG9pbnRzLAogIGxhYmVscyA9IGxldmVscyhjaHJzKSwKICB3aGljaCA9ICJjb2x1bW4iLAogIHNpZGUgPSAiYm90dG9tIiwKICBsYWJlbHNfcm90ID0gNDUsCiAgbGFiZWxzX2dwID0gZ3JpZDo6Z3BhcihjZXggPSAwLjY1KQopCgojIENyZWF0ZSB0aGUgSGVhdG1hcCBhbm5vdGF0aW9uIG9iamVjdApjaHJfYW5ub3QgPC0gQ29tcGxleEhlYXRtYXA6OkhlYXRtYXBBbm5vdGF0aW9uKAogIGRmID0gZGF0YS5mcmFtZShjaHJzKSwKICBjb2wgPSBsaXN0KGNocnMgPSBjaHJfY29sb3JzKSwKICBuYW1lID0gIiIsCiAgc2hvd19sZWdlbmQgPSBGQUxTRSwKICBzaG93X2Fubm90YXRpb25fbmFtZSA9IEZBTFNFLAogIG1hcmsgPSBjaHJfdGV4dCAjIFB1dCB0aGUgdGV4dCBpbgopCmBgYAoKIyMjIE1ha2Ugcm93IGFubm90YXRpb24gb2JqZWN0CgpNYWtlIGhpc3RvbG9neSBsYWJlbGluZyBgSGVhdG1hcEFubm90YXRpb25gIG9iamVjdC4KVGhpcyBhbm5vdGF0aW9uIG9iamVjdCBzdHJhdGVneSB3YXMgb3JpZ2luYWxseSBmcm9tIFtjaHJvbW9zb21hbC1pbnN0YWJpbGl0eV0oaHR0cHM6Ly9naXRodWIuY29tL0FsZXhzTGVtb25hZGUvT3BlblBCVEEtYW5hbHlzaXMvYmxvYi9iNWEzMzgzOGQxZTliZDdlNzkxM2E4OTIwMWVjMjYxMjVjMTZjOTRjL2FuYWx5c2VzL2Nocm9tb3NvbWFsLWluc3RhYmlsaXR5LzAyYS1wbG90LWNoci1pbnN0YWJpbGl0eS1oZWF0bWFwcy5SbWQjTDczKS4KCmBgYHtyfQojIEdldCB0aGUgaGlzdG9sb2dpZXMgZm9yIHRoZSBzYW1wbGVzIGluIHRoaXMgc2V0IGFuZCBvcmRlciB0aGVtIGJ5IGhpc3RvbG9neQpoaXN0b2xvZ2llcyA8LQogIGRhdGEuZnJhbWUoS2lkc19GaXJzdF9CaW9zcGVjaW1lbl9JRCA9IGJpbl9jYWxsc19kZiRiaW9zcGVjaW1lbl9pZCkgJT4lCiAgZHBseXI6OmlubmVyX2pvaW4obWV0YWRhdGEgJT4lCiAgICBkcGx5cjo6c2VsZWN0KEtpZHNfRmlyc3RfQmlvc3BlY2ltZW5fSUQsIHNob3J0X2hpc3RvbG9neSwgc2FtcGxlX2NvbG9yKSkgJT4lCiAgZHBseXI6Om11dGF0ZShzaG9ydF9oaXN0b2xvZ3kgPSBmYWN0b3Ioc2hvcnRfaGlzdG9sb2d5KSkgJT4lCiAgZHBseXI6OmFycmFuZ2Uoc2hvcnRfaGlzdG9sb2d5KSAlPiUKICB0aWJibGU6OmNvbHVtbl90b19yb3duYW1lcygiS2lkc19GaXJzdF9CaW9zcGVjaW1lbl9JRCIpCgojIE1ha2UgY29sb3Iga2V5IHNwZWNpZmljIHRvIHRoZXNlIHNhbXBsZXMKaGlzdG9sb2dpZXNfY29sb3Jfa2V5X2ZpbHRlcmVkIDwtIHVuaXF1ZShoaXN0b2xvZ2llcyRzYW1wbGVfY29sb3IpCm5hbWVzKGhpc3RvbG9naWVzX2NvbG9yX2tleV9maWx0ZXJlZCkgPC0gdW5pcXVlKGhpc3RvbG9naWVzJHNob3J0X2hpc3RvbG9neSkKCiMgRHJvcCB0aGlzIGNvbHVtbiBzbyBDb21wbGV4SGVhdG1hcCBpc24ndCB0ZW1wdGVkIHRvIHBsb3QgaXQKaGlzdG9sb2dpZXMgPC0gZHBseXI6OnNlbGVjdChoaXN0b2xvZ2llcywgLXNhbXBsZV9jb2xvcikKCiMgR2V0IGNvb3JkaW5hdGUgc3RhcnQgcG9zaXRpb25zCmhpc3RfcG9zIDwtIG1hdGNoKG5hbWVzKGhpc3RvbG9naWVzX2NvbG9yX2tleV9maWx0ZXJlZCksIGhpc3RvbG9naWVzJHNob3J0X2hpc3RvbG9neSkKCiMgR2V0IG1pZCBwb2ludHMgb2YgY2hyb21vc29tZSBsYWJlbHMKbWlkX3BvaW50cyA8LSBjZWlsaW5nKChjKGhpc3RfcG9zWzI6bGVuZ3RoKGhpc3RfcG9zKV0sIGxlbmd0aChoaXN0b2xvZ2llcyRzaG9ydF9oaXN0b2xvZ3kpKSArIGhpc3RfcG9zWzE6bGVuZ3RoKGhpc3RfcG9zKV0pIC8gMikKYGBgCgpgYGB7cn0KIyBNYWtlIHRleHQgbGFiZWxzIGZvciBjaHJvbW9zb21lIHRleHQKaGlzdF90ZXh0IDwtIENvbXBsZXhIZWF0bWFwOjphbm5vX21hcmsoCiAgYXQgPSBtaWRfcG9pbnRzLAogIGxhYmVscyA9IGxldmVscyhoaXN0b2xvZ2llcyRzaG9ydF9oaXN0b2xvZ3kpLAogIHdoaWNoID0gInJvdyIsCiAgc2lkZSA9ICJyaWdodCIsCiAgbGFiZWxzX2dwID0gZ3JpZDo6Z3BhcihjZXggPSAwLjY1KSwKICBsaW5rX3dpZHRoID0gZ3JpZDo6dW5pdCgxNSwgIm1tIikKKQoKIyBDcmVhdGUgdGhlIEhlYXRtYXAgYW5ub3RhdGlvbiBvYmplY3QKaGlzdF9hbm5vdCA8LSBDb21wbGV4SGVhdG1hcDo6SGVhdG1hcEFubm90YXRpb24oCiAgZGYgPSBkYXRhLmZyYW1lKGhpc3RvbG9naWVzKSwKICBjb2wgPSBsaXN0KHNob3J0X2hpc3RvbG9neSA9IGhpc3RvbG9naWVzX2NvbG9yX2tleV9maWx0ZXJlZCksCiAgd2hpY2ggPSAicm93IiwKICBzaG93X2Fubm90YXRpb25fbmFtZSA9IEZBTFNFLAogIHNob3dfbGVnZW5kID0gRkFMU0UsCiAgbWFyayA9IGhpc3RfdGV4dCAjIFB1dCB0aGUgdGV4dCBpbgopCmBgYAoKRm9ybWF0IGBiaW5fY2FsbHNfZGZgIGFzIGEgbWF0cml4IHdpdGggcm93bmFtZXMgZm9yIGBDb21wbGV4SGVhdG1hcGAgdG8gdXNlLiAKCmBgYHtyfQpiaW5fY2FsbHNfbWF0IDwtIGJpbl9jYWxsc19kZiAlPiUKICB0aWJibGU6OmNvbHVtbl90b19yb3duYW1lcygiYmlvc3BlY2ltZW5faWQiKSAlPiUKICBhcy5tYXRyaXgoKQoKIyBFbnN1cmUgdGhhdCB0aGlzIG1hdHJpeCBpcyBpbiB0aGUgc2FtZSBvcmRlciBhcyB0aGUgYW5ub3RhdGlvbgpiaW5fY2FsbHNfbWF0IDwtIGJpbl9jYWxsc19tYXRbcm93bmFtZXMoaGlzdG9sb2dpZXMpLCBdCgojIERvdWJsZSBjaGVjayBpdHMgaW4gdGh0ZSBzYW1lIG9yZGVyCmFsbC5lcXVhbChyb3duYW1lcyhiaW5fY2FsbHNfbWF0KSwgcm93bmFtZXMoaGlzdG9sb2dpZXMpKQpgYGAKCiMjIEFzc2VtYmxlIENOIHN0YXR1cyBoZWF0bWFwCgpgYGB7cn0KIyBQbG90IG9uIGEgaGVhdG1hcApoZWF0bWFwIDwtIENvbXBsZXhIZWF0bWFwOjpIZWF0bWFwKAogIGJpbl9jYWxsc19tYXQsCiAgbmFtZSA9ICJDTiBzdGF0dXMiLAogIGNvbCA9IGNvbG9yX2tleSwKICBjbHVzdGVyX2NvbHVtbnMgPSBGQUxTRSwKICBjbHVzdGVyX3Jvd3MgPSBGQUxTRSwKICBzaG93X2NvbHVtbl9uYW1lcyA9IEZBTFNFLAogIHNob3dfcm93X25hbWVzID0gRkFMU0UsCiAgYm90dG9tX2Fubm90YXRpb24gPSBjaHJfYW5ub3QsCiAgcmlnaHRfYW5ub3RhdGlvbiA9IGhpc3RfYW5ub3QsCiAgaGVhdG1hcF9sZWdlbmRfcGFyYW0gPSBsaXN0KG5yb3cgPSAxKSwKICByYXN0ZXJfcXVhbGl0eSA9IDgKKQpgYGAKClByaW50IG91dCBoZWF0bWFwLiAKCmBgYHtyfQpDb21wbGV4SGVhdG1hcDo6ZHJhdyhoZWF0bWFwLCBoZWF0bWFwX2xlZ2VuZF9zaWRlID0gImJvdHRvbSIpCmBgYAoKU2F2ZSB0byBQREYuIAoKYGBge3J9CiMgU2F2ZSBwbG90IGFzIFBERgpwZGYoZmlsZS5wYXRoKHBsb3RzX2RpciwgImNuX3N0YXR1c19oZWF0bWFwLnBkZiIpKQpDb21wbGV4SGVhdG1hcDo6ZHJhdyhoZWF0bWFwLCBoZWF0bWFwX2xlZ2VuZF9zaWRlID0gImJvdHRvbSIpCmRldi5vZmYoKQpgYGAKCiMgU2Vzc2lvbiBJbmZvCgpgYGB7cn0Kc2Vzc2lvbkluZm8oKQpgYGAK
- - -
-
- -
- - - - - - - - diff --git a/analyses/cnv-chrom-plot/plots/cn_status_heatmap.pdf b/analyses/cnv-chrom-plot/plots/cn_status_heatmap.pdf deleted file mode 100644 index 9e31c025cf..0000000000 Binary files a/analyses/cnv-chrom-plot/plots/cn_status_heatmap.pdf and /dev/null differ diff --git a/analyses/cnv-chrom-plot/util/bin-coverage.R b/analyses/cnv-chrom-plot/util/bin-coverage.R index 427bc0a0e3..083edd07ba 100644 --- a/analyses/cnv-chrom-plot/util/bin-coverage.R +++ b/analyses/cnv-chrom-plot/util/bin-coverage.R @@ -5,17 +5,17 @@ # 2020 bp_per_bin <- function(bin_ranges, status_ranges) { - # Given a binned genome ranges object and another GenomicRanges object, + # Given a binned genome ranges object and another GenomicRanges object, # Return the number of bp covered per bin. # # Args: - # bin_ranges: A binned GenomicRanges made from tileGenome. + # bin_ranges: A binned GenomicRanges made from tileGenome. # status_ranges:A GenomicRanges object to calculate what percent coverage of - # each bin. + # each bin. # # Returns: - # a data.frame with bins x number of bp - + # a data.frame with bins x number of bp + # Find the portions of each copy number segment that overlap with each bin. bin_overlaps <- GenomicRanges::pintersect( IRanges::findOverlapPairs( @@ -23,26 +23,26 @@ bp_per_bin <- function(bin_ranges, status_ranges) { status_ranges ) ) - + # Which bins do the segs in `bin_overlaps` overlap with? bin_indices <- GenomicRanges::findOverlaps( bin_ranges, bin_overlaps ) - + # Get the sum of the length of all seg portions for each bin. bp_per_bin <- tapply( bin_overlaps@ranges@width, # Get length of each sequence within the bin bin_indices@from, # Index of which bin it overlaps sum ) # Add up length per bin - + # Format as data.frame with rows = bins per_bin_df <- data.frame( bin = as.numeric(names(bp_per_bin)), bp_per_bin = as.numeric(bp_per_bin) ) - + # Store dummy counts if there are no ranges that are in the bins if (nrow(per_bin_df) == 0) { per_bin_df <- data.frame( @@ -56,71 +56,71 @@ bp_per_bin <- function(bin_ranges, status_ranges) { call_bin_status <- function(sample_id, seg_ranges, bin_ranges, - uncallable_ranges, - frac_threshold_val = .75, + uncallable_ranges, + frac_threshold_val = .75, frac_uncallable_val = .75) { - # Given a sample_id, CN segment ranges, and binned genome ranges object, - # make a call for each bin on what CN copy status has the most coverage in the bin. - # Uses bp_per_bin function. + # Given a sample_id, CN segment ranges, and binned genome ranges object, + # make a call for each bin on what CN copy status has the most coverage in the bin. + # Uses bp_per_bin function. # # Args: # sample_id: A string that corresponds to a single biospecimen id - # seg_ranges: A GenomicRanges object that contains a `status` and a `biospecimen` slot. - # The `biospecimen slot will be used to split out the `sample_id`'s corresponding ranges. - # The `status` slot should have gain/loss/neutral. - # bin_ranges: A binned GenomicRanges made from tileGenome that has been uncompressed with `unlist`. + # seg_ranges: A GenomicRanges object that contains a `status` and a `biospecimen` slot. + # The `biospecimen slot will be used to split out the `sample_id`'s corresponding ranges. + # The `status` slot should have gain/loss/neutral. + # bin_ranges: A binned GenomicRanges made from tileGenome that has been uncompressed with `unlist`. # frac_threshold: What coverage fraction do we need to make the call? # uncallable_threshold: What fraction of a bin needs to be callable for us # us to make a status call? # # Returns: - # a small data.frame that contains the status call of the sample for each bin. + # a small data.frame that contains the status call of the sample for each bin. # # Extract the ranges for this sample sample_seg_ranges <- seg_ranges[which(seg_ranges$biospecimen == sample_id)] - + # Split ranges into their respective statuses gain_ranges <- sample_seg_ranges[sample_seg_ranges$status == "gain"] loss_ranges <- sample_seg_ranges[sample_seg_ranges$status == "loss"] neutral_ranges <- sample_seg_ranges[sample_seg_ranges$status == "neutral"] - + # Calculate length of each type of status per bin gain_per_bin <- bp_per_bin(bin_ranges, gain_ranges) loss_per_bin <- bp_per_bin(bin_ranges, loss_ranges) neutral_per_bin <- bp_per_bin(bin_ranges, neutral_ranges) uncallable_per_bin <- bp_per_bin(bin_ranges, uncallable_ranges) - + # Format this data into one data.frame where each row is a bin bin_bp_status <- data.frame( bin = as.numeric(1:length(bin_ranges)), # Keep bin width bin_width = bin_ranges@ranges@width ) %>% - # Join loss coverage data - dplyr::left_join(gain_per_bin, - by = "bin" - ) %>% - # Rename as .gain - dplyr::rename(bp_per_bin.gain = bp_per_bin) %>% - # Join loss coverage data - dplyr::left_join(loss_per_bin, - by = "bin" - ) %>% - # Rename as .loss - dplyr::rename(bp_per_bin.loss = bp_per_bin) %>% - # Join neutral coverage data - dplyr::left_join(neutral_per_bin, - by = "bin" - ) %>% - # Rename as .neutral - dplyr::rename(bp_per_bin.neutral = bp_per_bin) %>% - # Join uncallable loss coverage data - dplyr::left_join(uncallable_per_bin, - by = "bin" - ) %>% - # Rename as .uncallable - dplyr::rename(bp_per_bin.uncallable = bp_per_bin) %>% + # Join loss coverage data + dplyr::left_join(gain_per_bin, + by = "bin" + ) %>% + # Rename as .gain + dplyr::rename(bp_per_bin.gain = bp_per_bin) %>% + # Join loss coverage data + dplyr::left_join(loss_per_bin, + by = "bin" + ) %>% + # Rename as .loss + dplyr::rename(bp_per_bin.loss = bp_per_bin) %>% + # Join neutral coverage data + dplyr::left_join(neutral_per_bin, + by = "bin" + ) %>% + # Rename as .neutral + dplyr::rename(bp_per_bin.neutral = bp_per_bin) %>% + # Join uncallable loss coverage data + dplyr::left_join(uncallable_per_bin, + by = "bin" + ) %>% + # Rename as .uncallable + dplyr::rename(bp_per_bin.uncallable = bp_per_bin) %>% # If there is an NA, at this point we can assume it means 0 dplyr::mutate_at( dplyr::vars( @@ -136,17 +136,17 @@ call_bin_status <- function(sample_id, frac_uncallable = bp_per_bin.uncallable / bin_width ) %>% # Use these percentages for declaring final call per bin based on - # the frac_threshold_val + # the frac_delta_threshold dplyr::mutate( - status = dplyr::case_when( - frac_uncallable > frac_uncallable_val ~ "uncallable", - frac_gain > frac_threshold_val ~ "gain", - frac_loss > frac_threshold_val ~ "loss", - frac_gain + frac_loss > frac_threshold_val ~ "unstable", - TRUE ~ "neutral" + status = dplyr::case_when( + frac_uncallable > uncallable_threshold ~ "uncallable" + frac_gain > threshold ~ "gain", + frac_loss > threshold ~ "loss", + frac_neutral > threshold ~ "neutral", + TRUE ~ "unstable" ) ) - + # Format this data as a status status_df <- bin_bp_status %>% # Only keep the bin and status columns @@ -154,7 +154,7 @@ call_bin_status <- function(sample_id, # Arrange the bins in order dplyr::arrange(bin) %>% # Spread this data so we can make it a sample x bin matrix later - tidyr::spread(bin, status) - + tidyr::spread(bin, status) + return(status_df) }