Merge pull request #31 from compbiomed/devel

Devel
mingl1997 · Oct 28, 2024 · 87a7fb0 · 87a7fb0
2 parents 9665aef + e4051af
commit 87a7fb0
Show file tree

Hide file tree

Showing 4 changed files with 93 additions and 31 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -271,6 +271,7 @@ import(GSVAdata)
 import(SingleCellExperiment)
 import(eds)
 importFrom(BiocParallel,SerialParam)
+importFrom(ComplexHeatmap,anno_barplot)
 importFrom(S4Vectors,"metadata<-")
 importFrom(S4Vectors,metadata)
 importFrom(SingleCellExperiment,"counts<-")
@@ -317,8 +318,11 @@ importFrom(stats,prcomp)
 importFrom(stats,quantile)
 importFrom(stringr,str_c)
 importFrom(stringr,str_replace_all)
+importFrom(tibble,column_to_rownames)
+importFrom(tibble,remove_rownames)
 importFrom(tibble,tibble)
 importFrom(tidyr,spread)
+importFrom(tidyr,unite)
 importFrom(tools,file_ext)
 importFrom(utils,head)
 importFrom(utils,packageVersion)
diff --git a/R/plotSCEHeatmap.R b/R/plotSCEHeatmap.R
@@ -25,6 +25,11 @@
 #' @param cellIndexBy A single character specifying a column name of
 #' \code{colData(inSCE)}, or a vector of the same length as \code{ncol(inSCE)},
 #' where we search for the non-rowname cell indices. Default \code{"rownames"}.
+#' @param cluster_columns A logical scalar that turns on/off 
+#' clustering of columns. Default \code{FALSE}. Clustering columns should be turned off when using reduced dim 
+#' for plotting as it will be sorted by PCs
+#' @param cluster_rows A logical scalar that turns on/off clustering of rows. 
+#' Default \code{FALSE}.
 #' @param rowDataName character. The column name(s) in \code{rowData} that need
 #' to be added to the annotation. Not applicable for
 #' \code{plotSCEDimReduceHeatmap}. Default \code{NULL}.
@@ -103,7 +108,8 @@
 #' @importFrom stringr str_replace_all str_c
 #' @importFrom stats prcomp quantile
 #' @importFrom dplyr select arrange group_by count ungroup mutate one_of desc
-#' @importFrom tidyr spread unite column_to_rownames remove_rownames
+#' @importFrom tidyr spread unite 
+#' @importFrom tibble column_to_rownames remove_rownames
 #' @importFrom grid gpar
 #' @importFrom ComplexHeatmap anno_barplot
 #' @importFrom rlang .data
@@ -113,6 +119,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL,
                            scale = TRUE, trim = c(-2,2),
                            featureIndexBy = 'rownames',
                            cellIndexBy = 'rownames',
+                           cluster_columns = FALSE,
+                           cluster_rows = FALSE,
                            rowDataName = NULL, colDataName = NULL,
                            aggregateRow = NULL, aggregateCol = NULL,
                            featureAnnotations = NULL, cellAnnotations = NULL,
@@ -282,8 +290,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL,
     temp_df<-as.data.frame(colData(SCE)[,c(aggregateCol),drop=FALSE]) %>% 
       unite("new_colnames",1:ncol(.),sep = "_",remove = FALSE) %>% 
       remove_rownames() %>% 
-      mutate(aggregated_column = new_colnames) %>%
-      dplyr::select(new_colnames, aggregated_column) %>%
+    #  mutate(aggregated_column = new_colnames) %>%
+    #  dplyr::select(new_colnames, aggregated_column) %>%
       column_to_rownames("new_colnames")
 
     colData(SCE)<-DataFrame(temp_df)
@@ -446,7 +454,8 @@ plotSCEHeatmap <- function(inSCE, useAssay = 'logcounts', useReducedDim = NULL,
                                 show_row_dend = rowDend,
                                 show_column_dend = colDend,
                                 row_dend_reorder = TRUE,
-                                cluster_columns = FALSE,
+                                cluster_columns = cluster_columns,
+                                cluster_rows = cluster_rows,
                                 show_column_names = colLabel,
                                 column_names_gp = grid::gpar(fontsize = colLabelSize),
                                 row_gap = rowGap, column_gap = colGap,

diff --git a/man/plotSCEHeatmap.Rd b/man/plotSCEHeatmap.Rd
diff --git a/vignettes/articles/heatmap.Rmd b/vignettes/articles/heatmap.Rmd
@@ -207,34 +207,56 @@ Other heatmap settings will also be automatically filled for a DE specific heatm
 <div id="console" class="tabcontent">
 ````
 
-To present the usage of `plotSCEHeatmap()`, we would like to use a small example provided with SCTK.  
+To present the usage of `plotSCEHeatmap()`, we would like to use a small example provided with SCTK. 
+
+**"Raw" plotting**
+
+The minimum setting for `plotSCEHeatmap()` is the input SCE object and the data matrix to plot (default `"logcounts"`). In this way, all cells and features will be presented while no annotation or legend (except the main color scheme) will be shown.  
 
 ```{R setup, eval=TRUE, message=FALSE, cache=TRUE}
 library(singleCellTK)
 data("scExample") # This imports SCE object "sce"
 sce
-```
 
-**"Raw" plotting**
+# QC - Remove empty droplets
+sce2<-subsetSCECols(sce, colData = c("type != 'EmptyDroplet'"))
 
-The minimum setting for `plotSCEHeatmap()` is the input SCE object and the data matrix to plot (default `"logcounts"`). In this way, all cells and features will be presented while no annotation or legend (except the main color scheme) will be shown.  
+# Normalize the counts 
+sce2<-runNormalization(sce2, useAssay = "counts", outAssayName = "logcounts",
+                        normalizationMethod = "logNormCounts",scale = TRUE)
 
-```{R hmFull, eval=TRUE, cache=TRUE}
-plotSCEHeatmap(sce, useAssay = "counts")
+# plot the data
+plotSCEHeatmap(sce2,useAssay = "logcounts",cluster_rows = TRUE, cluster_columns = TRUE)
 ```
 
 **Subsetting**
 
 SCTK allows relatively flexible approaches to select the cells/features to plot.  
 
-The basic way to subset the heatmap is to directly use an index vector that can subset the input SCE object to `featureIndex` and `cellIndex`, including `numeric`, and `logical` vectors, which are widely used, and `character` vector containing the row/col names. Of course, user can directly use a subsetted SCE object as input.  
+The basic way to subset the heatmap is to directly use an index vector that can subset the input SCE object to `featureIndex` and `cellIndex`, including `numeric`, and `logical` vectors, which are widely used, and `character` vector containing the row/col names. Of course, user can directly use a subsetted SCE object as input.  First let's run a simple clustering workflow to identify clusters and find DE genes for each cluster. We can subset the heatmap using this list of DE genes
+
+```{R idxSubset, eval=TRUE, cache=TRUE, message=FALSE,warnings=FALSE, echo=FALSE}
+
+# Run Clustering workflow
+set.seed(348389)
+sce2 <- runFeatureSelection(sce2, useAssay = "counts")
+sce2 <- setTopHVG(sce2, featureSubsetName = "hvf")
+sce2 <- runDimReduce(sce2, useAssay = "logcounts", useFeatureSubset = "hvf", scale = TRUE, reducedDimName = "PCA")
+sce2 <- runDimReduce(sce2, method = "scaterUMAP", useReducedDim = "PCA", reducedDimName = "UMAP", nComponents = 10)
+sce2 <- runScranSNN(inSCE = sce2, useReducedDim = "PCA", nComp = 10, clusterName = "scranSNN_PCA")
 
-```{R idxSubset, eval=TRUE, cache=TRUE}
-# Make up random downsampling numeric vector
-featureSubset <- sample(nrow(sce), 50)
-cellSubset <- sample(ncol(sce), 50)
+# set gene ID as rownames
+sce2<-setRowNames(sce2,"feature_name")
 
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset)
+
+# Find markers for each cluster
+sce2 <- runFindMarker(sce2, useAssay = "logcounts", method = "wilcox", cluster = "scranSNN_PCA")
+topMarkers <- getFindMarkerTopTable(sce2, topN = 5, log2fcThreshold = 0.5, 
+                                    fdrThreshold = 0.05, minClustExprPerc = 0.5, 
+                                    maxCtrlExprPerc = 0.5, minMeanExpr = 0)
+
+# Using feature index to select for genes in topMarkers list 
+plotSCEHeatmap(sce2,useAssay = "logcounts",rowLabel = TRUE,featureIndex = topMarkers$Gene,cluster_columns = TRUE)
 ```
 
 ````{=html}
@@ -246,9 +268,11 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c
 In a more complex situation, where users might only have a set of identifiers which are not inside the row/col names (i.e. unable to directly subset the SCE object), we provide another approach. The subset, in this situation, can be accessed via specifying a vector that contains the identifiers users have, to `featureIndexBy` or `cellIndexBy`. This specification allows directly giving one column name of `rowData` or `colData`.  
 
 ```{R indexBy, eval=TRUE, cache=TRUE}
-subsetFeatureName <- sample(rowData(sce)$feature_name, 50)
-subsetCellBarcode <- sample(sce$cell_barcode, 50)
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = subsetFeatureName, featureIndexBy = "feature_name", cellIndex = subsetCellBarcode, cellIndexBy = "cell_barcode")
+
+list_of_FIDs<-c("ENSG00000251562","ENSG00000205542","ENSG00000177954","ENSG00000166710")
+
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndexBy = "feature_ID",  featureIndex = list_of_FIDs, cluster_rows = TRUE, cluster_columns = TRUE, rowLabel = TRUE)
+
 ```
 
 ````{=html}
@@ -260,25 +284,21 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = subsetFeatureNam
 As introduced before, we allow directly using column names of `rowData` or `colData` to attach color bar annotations. To make use of this functionality, pass a `character` vector to `rowDataName` or `colDataName`. 
 
 ```{R colRowAnn, eval=TRUE, cache=TRUE}
-# Make up arbitrary annotation, 
-rowRandLabel <- c(rep('aa', 100), rep('bb', 100))
-rowData(sce)$randLabel <- rowRandLabel
-colRandLabel <- c(rep('cc', 195), rep('dd', 195))
-colData(sce)$randLabel <- colRandLabel
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowDataName = "randLabel", colDataName = c("type", "randLabel"))
+# Creat new annotation for markers 
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c( "scranSNN_PCA"),rowLabel = TRUE, cluster_rows = TRUE, cluster_columns = TRUE)
 ```
 
 ````{=html}
 <details>
   <summary><b>Customized Annotation</b></summary>
 ```` 
 
-Fully customized annotation is also supported, though it can be complexed for users. For the labeling, it is more recommanded to insert the information into `rowData` or `colData` and then make use. For coloring, information should be passed to `featureAnnotationColor` or `cellAnnotationColor`. The argument must be a `list` object with names matching the annotation classes (such as `"randLabel"` and `"type"`); each inner object under a name must be a named vector, with colors as the values and existing categories as the names. The working instance looks like this:
+Fully customized annotation is also supported, though it can be complex for users. For the labeling, it is more recommended to insert the information into `rowData` or `colData` and then make use. For coloring, information should be passed to `featureAnnotationColor` or `cellAnnotationColor`. The argument must be a `list` object with names matching the annotation classes (such as `"randLabel"` and `"type"`); each inner object under a name must be a named vector, with colors as the values and existing categories as the names. The working instance looks like this:
 
 ```{R colorEG, eval=FALSE, echo=FALSE}
 colAnnotattionColor <- list(
   sample = c(pbmc_4k = "FF4D4D"),
-  type = c(Singlet = "#4DFFFF", Doublet = "#FFC04D", EmptyDroplet = "#4D4DFF")
+  type = c(Singlet = "#4DFFFF", Doublet = "#FFC04D")
 )
 ```
 
@@ -291,7 +311,27 @@ colAnnotattionColor <- list(
 **1. Grouping/Splitting** In some cases, it might be better to do a "semi-heatmap" (i.e. split the rows/columns first and cluster them within each group) to visualize some expression pattern, such as evaluating the differential expression. For this need, use `rowSplitBy` or `colSplitBy`, and the arguments must be a `character` vector that is a subset of the specified annotation.  
 
 ```{R split, eval=TRUE, cache=TRUE}
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowDataName = "randLabel", colDataName = c("type", "randLabel"), rowSplitBy = "randLabel", colSplitBy = "type")
+
+# Create a new label in the rowData using the cluster markers
+
+data.frame(rowData(sce2)) %>% 
+  left_join(topMarkers, by = c("feature_name" = "Gene")) %>%
+  rename("cluster_markers" = "scranSNN_PCA") -> new_row_data
+
+rownames(new_row_data)<-new_row_data$feature_name
+
+rowData(sce2)<-new_row_data
+
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c("type"), aggregateCol = "scranSNN_PCA", rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowDataName = "cluster_markers", rowSplitBy = "cluster_markers")
+
+# Adding a summary 
+
+data.frame(colData(sce2)) %>% 
+  mutate(summary_col = sample(5,n(), replace = TRUE)) -> new_col_data
+
+colData(sce2)<-DataFrame(new_col_data)
+
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, colDataName = c("type"), aggregateCol = "scranSNN_PCA", rowGap = grid::unit(2, 'mm'),rowLabel = TRUE, rowDataName = "cluster_markers", rowSplitBy = "cluster_markers", addCellSummary = "summary_col" )
 ```
 
 **2. Cell/Feature Labeling** Text labels of features or cells can be added via `rowLabel` or `colLabel`. Use `TRUE` or `FALSE` to specify whether to show the `rownames` or `colnames` of the subsetted SCE object. Additionally, giving a single string of a column name of `rowData` or `colData` can enable the labeling of the annotation. Furthermore, users can directly throw a character vector to the parameter, with the same length of either the full SCE object or the subsetted.  
@@ -301,7 +341,7 @@ plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, c
 **4. Row/Column titles** The row title (`"Genes"`) and column title (`"Cells"`) can be changed or removed by passing a string or `NULL` to `rowTitle` or `colTitle`, respectively.  
 
 ```{R label, eval=TRUE, cache=TRUE}
-plotSCEHeatmap(inSCE = sce, useAssay = "counts", featureIndex = featureSubset, cellIndex = cellSubset, rowLabel = "feature_name", colLabel = seq(ncol(sce)), colDend = FALSE, rowTitle = "Downsampled features")
+plotSCEHeatmap(inSCE = sce2, useAssay = "logcounts", featureIndex = topMarkers$Gene, rowGap = grid::unit(2, 'mm'),rowLabel = TRUE,  rowTitle = "Markers",colTitle = "Clusters", cluster_columns = TRUE, cluster_rows = TRUE)
 ```
 
 There are still some parameters not mentioned here, but they are not frequently used. Please refer to `?plotSCEHeatmap` as well as `?ComplexHeatmap::Heatmap`.