From 501734215df0fa208f588d560847b0fbdde7920f Mon Sep 17 00:00:00 2001 From: Ashastry2 Date: Thu, 24 Oct 2024 12:14:41 -0400 Subject: [PATCH] Updates documentation and vignette to show changes to plotSCEHeatmap function Fixes notes about namespace --- NAMESPACE | 4 ++ R/plotSCEHeatmap.R | 17 +++++-- man/plotSCEHeatmap.Rd | 13 +++++- vignettes/articles/heatmap.Rmd | 81 +++++++++++++++++++++++----------- 4 files changed, 84 insertions(+), 31 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e34b6d0f9..c069ce18e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -271,6 +271,7 @@ import(GSVAdata) import(SingleCellExperiment) import(eds) importFrom(BiocParallel,SerialParam) +importFrom(ComplexHeatmap,anno_barplot) importFrom(S4Vectors,"metadata<-") importFrom(S4Vectors,metadata) importFrom(SingleCellExperiment,"counts<-") @@ -317,8 +318,11 @@ importFrom(stats,prcomp) importFrom(stats,quantile) importFrom(stringr,str_c) importFrom(stringr,str_replace_all) +importFrom(tibble,column_to_rownames) +importFrom(tibble,remove_rownames) importFrom(tibble,tibble) importFrom(tidyr,spread) +importFrom(tidyr,unite) importFrom(tools,file_ext) importFrom(utils,head) importFrom(utils,packageVersion) diff --git a/R/plotSCEHeatmap.R b/R/plotSCEHeatmap.R index 05358967a..ee78d12ff 100644 --- a/R/plotSCEHeatmap.R +++ b/R/plotSCEHeatmap.R @@ -25,6 +25,11 @@ #' @param cellIndexBy A single character specifying a column name of #' \code{colData(inSCE)}, or a vector of the same length as \code{ncol(inSCE)}, #' where we search for the non-rowname cell indices. Default \code{"rownames"}. +#' @param cluster_columns A logical scalar that turns on/off +#' clustering of columns. Default \code{FALSE}. Clustering columns should be turned off when using reduced dim +#' for plotting as it will be sorted by PCs +#' @param cluster_rows A logical scalar that turns on/off clustering of rows. +#' Default \code{FALSE}. #' @param rowDataName character. The column name(s) in \code{rowData} that need #' to be added to the annotation. Not applicable for #' \code{plotSCEDimReduceHeatmap}. Default \code{NULL}. @@ -103,7 +108,8 @@ #' @importFrom stringr str_replace_all str_c #' @importFrom stats prcomp quantile #' @importFrom dplyr select arrange group_by count ungroup mutate one_of desc -#' @importFrom tidyr spread unite column_to_rownames remove_rownames +#' @importFrom tidyr spread unite +#' @importFrom tibble column_to_rownames remove_rownames #' @importFrom grid gpar #' @importFrom ComplexHeatmap anno_barplot #' @importFrom rlang .data @@ -113,6 +119,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL, scale = TRUE, trim = c(-2,2), featureIndexBy = 'rownames', cellIndexBy = 'rownames', + cluster_columns = FALSE, + cluster_rows = FALSE, rowDataName = NULL, colDataName = NULL, aggregateRow = NULL, aggregateCol = NULL, featureAnnotations = NULL, cellAnnotations = NULL, @@ -282,8 +290,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL, temp_df<-as.data.frame(colData(SCE)[,c(aggregateCol),drop=FALSE]) %>% unite("new_colnames",1:ncol(.),sep = "_",remove = FALSE) %>% remove_rownames() %>% - mutate(aggregated_column = new_colnames) %>% - dplyr::select(new_colnames, aggregated_column) %>% + # mutate(aggregated_column = new_colnames) %>% + # dplyr::select(new_colnames, aggregated_column) %>% column_to_rownames("new_colnames") colData(SCE)<-DataFrame(temp_df) @@ -446,7 +454,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL, show_row_dend = rowDend, show_column_dend = colDend, row_dend_reorder = TRUE, - cluster_columns = FALSE, + cluster_columns = cluster_columns, + cluster_rows = cluster_rows, show_column_names = colLabel, column_names_gp = grid::gpar(fontsize = colLabelSize), row_gap = rowGap, column_gap = colGap, diff --git a/man/plotSCEHeatmap.Rd b/man/plotSCEHeatmap.Rd index ea46597db..f62dba285 100644 --- a/man/plotSCEHeatmap.Rd +++ b/man/plotSCEHeatmap.Rd @@ -15,6 +15,8 @@ plotSCEHeatmap( trim = c(-2, 2), featureIndexBy = "rownames", cellIndexBy = "rownames", + cluster_columns = FALSE, + cluster_rows = FALSE, rowDataName = NULL, colDataName = NULL, aggregateRow = NULL, @@ -65,8 +67,8 @@ another feature list indicated by \code{featureIndexBy}. Default \code{NULL}.} (cells). Alternatively, it can be a vector identifying cells in another cell list indicated by \code{featureIndexBy}. Default \code{NULL}.} -\item{scale}{Whether to perform z-score scaling on each row. Default -\code{TRUE}.} +\item{scale}{Whether to perform z-score or min-max scaling on each row.Choose from \code{"zscore"}, \code{"min-max"} or default +\code{TRUE} or \code{FALSE}} \item{trim}{A 2-element numeric vector. Values outside of this range will be trimmed to their nearst bound. Default \code{c(-2, 2)}} @@ -80,6 +82,13 @@ where we search for the non-rowname feature indices. Not applicable for \code{colData(inSCE)}, or a vector of the same length as \code{ncol(inSCE)}, where we search for the non-rowname cell indices. Default \code{"rownames"}.} +\item{cluster_columns}{A logical scalar that turns on/off +clustering of columns. Default \code{FALSE}. Clustering columns should be turned off when using reduced dim +for plotting as it will be sorted by PCs} + +\item{cluster_rows}{A logical scalar that turns on/off clustering of rows. +Default \code{FALSE}.} + \item{rowDataName}{character. The column name(s) in \code{rowData} that need to be added to the annotation. Not applicable for \code{plotSCEDimReduceHeatmap}. Default \code{NULL}.} diff --git a/vignettes/articles/heatmap.Rmd b/vignettes/articles/heatmap.Rmd index 8eed173c5..799f88861 100644 --- a/vignettes/articles/heatmap.Rmd +++ b/vignettes/articles/heatmap.Rmd @@ -207,34 +207,56 @@ Other heatmap settings will also be automatically filled for a DE specific heatm
```` -To present the usage of `plotSCEHeatmap()`, we would like to use a small example provided with SCTK. +To present the usage of `plotSCEHeatmap()`, we would like to use a small example provided with SCTK. + +**"Raw" plotting** + +The minimum setting for `plotSCEHeatmap()` is the input SCE object and the data matrix to plot (default `"logcounts"`). In this way, all cells and features will be presented while no annotation or legend (except the main color scheme) will be shown. ```{R setup, eval=TRUE, message=FALSE, cache=TRUE} library(singleCellTK) data("scExample") # This imports SCE object "sce" sce -``` -**"Raw" plotting** +# QC - Remove empty droplets +sce2<-subsetSCECols(sce, colData = c("type != 'EmptyDroplet'")) -The minimum setting for `plotSCEHeatmap()` is the input SCE object and the data matrix to plot (default `"logcounts"`). In this way, all cells and features will be presented while no annotation or legend (except the main color scheme) will be shown. +# Normalize the counts +sce2<-runNormalization(sce2, useAssay = "counts", outAssayName = "logcounts", + normalizationMethod = "logNormCounts",scale = TRUE) -```{R hmFull, eval=TRUE, cache=TRUE} -plotSCEHeatmap(sce, useAssay = "counts") +# plot the data +plotSCEHeatmap(sce2,useAssay = "logcounts",cluster_rows = TRUE, cluster_columns = TRUE) ``` **Subsetting** SCTK allows relatively flexible approaches to select the cells/features to plot. -The basic way to subset the heatmap is to directly use an index vector that can subset the input SCE object to `featureIndex` and `cellIndex`, including `numeric`, and `logical` vectors, which are widely used, and `character` vector containing the row/col names. Of course, user can directly use a subsetted SCE object as input. +The basic way to subset the heatmap is to directly use an index vector that can subset the input SCE object to `featureIndex` and `cellIndex`, including `numeric`, and `logical` vectors, which are widely used, and `character` vector containing the row/col names. Of course, user can directly use a subsetted SCE object as input. First let's run a simple clustering workflow to identify clusters and find DE genes for each cluster. We can subset the heatmap using this list of DE genes + +```{R idxSubset, eval=TRUE, cache=TRUE, message=FALSE,warnings=FALSE, echo=FALSE} + +# Run Clustering workflow +set.seed(348389) +sce2 <- runFeatureSelection(sce2, useAssay = "counts") +sce2 <- setTopHVG(sce2, featureSubsetName = "hvf") +sce2 <- runDimReduce(sce2, useAssay = "logcounts", useFeatureSubset = "hvf", scale = TRUE, reducedDimName = "PCA") +sce2 <- runDimReduce(sce2, method = "scaterUMAP", useReducedDim = "PCA", reducedDimName = "UMAP", nComponents = 10) +sce2 <- runScranSNN(inSCE = sce2, useReducedDim = "PCA", nComp = 10, clusterName = "scranSNN_PCA") + +# set gene ID as rownames +sce2<-setRowNames(sce2,"feature_name") -```{R idxSubset, eval=TRUE, cache=TRUE} -# Make up random downsampling numeric vector -featureSubset <- sample(nrow(sce), 50) -cellSubset <- sample(ncol(sce), 50) -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset) +# Find markers for each cluster +sce2 <- runFindMarker(sce2, useAssay = "logcounts", method = "wilcox", cluster = "scranSNN_PCA") +topMarkers <- getFindMarkerTopTable(sce2, topN = 5, log2fcThreshold = 0.5, + fdrThreshold = 0.05, minClustExprPerc = 0.5, + maxCtrlExprPerc = 0.5, minMeanExpr = 0) + +# Using feature index to select for genes in topMarkers list +plotSCEHeatmap(sce2,useAssay = "logcounts",rowLabel = TRUE,featureIndex = topMarkers$Gene,cluster_columns = TRUE) ``` ````{=html} @@ -246,9 +268,11 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c In a more complex situation, where users might only have a set of identifiers which are not inside the row/col names (i.e. unable to directly subset the SCE object), we provide another approach. The subset, in this situation, can be accessed via specifying a vector that contains the identifiers users have, to `featureIndexBy` or `cellIndexBy`. This specification allows directly giving one column name of `rowData` or `colData`. ```{R indexBy, eval=TRUE, cache=TRUE} -subsetFeatureName <- sample(rowData(sce)$feature_name, 50) -subsetCellBarcode <- sample(sce$cell_barcode, 50) -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = subsetFeatureName, featureIndexBy = "feature_name", cellIndex = subsetCellBarcode, cellIndexBy = "cell_barcode") + +list_of_FIDs<-c("ENSG00000251562","ENSG00000205542","ENSG00000177954","ENSG00000166710") + +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndexBy = "feature_ID", featureIndex = list_of_FIDs, cluster_rows = TRUE, cluster_columns = TRUE, rowLabel = TRUE) + ``` ````{=html} @@ -260,12 +284,8 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = subsetFeatureNam As introduced before, we allow directly using column names of `rowData` or `colData` to attach color bar annotations. To make use of this functionality, pass a `character` vector to `rowDataName` or `colDataName`. ```{R colRowAnn, eval=TRUE, cache=TRUE} -# Make up arbitrary annotation, -rowRandLabel <- c(rep('aa', 100), rep('bb', 100)) -rowData(sce)$randLabel <- rowRandLabel -colRandLabel <- c(rep('cc', 195), rep('dd', 195)) -colData(sce)$randLabel <- colRandLabel -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowDataName = "randLabel", colDataName = c("type", "randLabel")) +# Creat new annotation for markers +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c( "scranSNN_PCA"),rowLabel = TRUE, cluster_rows = TRUE, cluster_columns = TRUE) ``` ````{=html} @@ -273,12 +293,12 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c Customized Annotation ```` -Fully customized annotation is also supported, though it can be complexed for users. For the labeling, it is more recommanded to insert the information into `rowData` or `colData` and then make use. For coloring, information should be passed to `featureAnnotationColor` or `cellAnnotationColor`. The argument must be a `list` object with names matching the annotation classes (such as `"randLabel"` and `"type"`); each inner object under a name must be a named vector, with colors as the values and existing categories as the names. The working instance looks like this: +Fully customized annotation is also supported, though it can be complex for users. For the labeling, it is more recommended to insert the information into `rowData` or `colData` and then make use. For coloring, information should be passed to `featureAnnotationColor` or `cellAnnotationColor`. The argument must be a `list` object with names matching the annotation classes (such as `"randLabel"` and `"type"`); each inner object under a name must be a named vector, with colors as the values and existing categories as the names. The working instance looks like this: ```{R colorEG, eval=FALSE, echo=FALSE} colAnnotattionColor <- list( sample = c(pbmc_4k = "FF4D4D"), - type = c(Singlet = "#4DFFFF", Doublet = "#FFC04D", EmptyDroplet = "#4D4DFF") + type = c(Singlet = "#4DFFFF", Doublet = "#FFC04D") ) ``` @@ -291,7 +311,18 @@ colAnnotattionColor <- list( **1. Grouping/Splitting** In some cases, it might be better to do a "semi-heatmap" (i.e. split the rows/columns first and cluster them within each group) to visualize some expression pattern, such as evaluating the differential expression. For this need, use `rowSplitBy` or `colSplitBy`, and the arguments must be a `character` vector that is a subset of the specified annotation. ```{R split, eval=TRUE, cache=TRUE} -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowDataName = "randLabel", colDataName = c("type", "randLabel"), rowSplitBy = "randLabel", colSplitBy = "type") + +# Create a new label in the rowData using the cluster markers + +data.frame(rowData(sce2)) %>% + left_join(topMarkers, by = c("feature_name" = "Gene")) %>% + rename("cluster_markers" = "scranSNN_PCA") -> new_row_data + +rownames(new_row_data)<-new_row_data$feature_name + +rowData(sce2)<-new_row_data + +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c("type"), aggregateCol = "scranSNN_PCA", rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowDataName = "cluster_markers", rowSplitBy = "cluster_markers" ) ``` **2. Cell/Feature Labeling** Text labels of features or cells can be added via `rowLabel` or `colLabel`. Use `TRUE` or `FALSE` to specify whether to show the `rownames` or `colnames` of the subsetted SCE object. Additionally, giving a single string of a column name of `rowData` or `colData` can enable the labeling of the annotation. Furthermore, users can directly throw a character vector to the parameter, with the same length of either the full SCE object or the subsetted. @@ -301,7 +332,7 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c **4. Row/Column titles** The row title (`"Genes"`) and column title (`"Cells"`) can be changed or removed by passing a string or `NULL` to `rowTitle` or `colTitle`, respectively. ```{R label, eval=TRUE, cache=TRUE} -plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowLabel = "feature_name", colLabel = seq(ncol(sce)), colDend = FALSE, rowTitle = "Downsampled features") +plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowTitle = "Markers",colTitle = "Clusters", cluster_columns = TRUE, cluster_rows = TRUE) ``` There are still some parameters not mentioned here, but they are not frequently used. Please refer to `?plotSCEHeatmap` as well as `?ComplexHeatmap::Heatmap`.