From d6c4f2921481409ebd9afa374db7f5cf466a7361 Mon Sep 17 00:00:00 2001
From: Natasha Gurevich <ngurevich@dot1x-nat-10-222-36-127.bumc.bu.edu>
Date: Fri, 16 Aug 2024 15:17:33 -0400
Subject: [PATCH] Violin plotting for multiple modules

---
 DESCRIPTION                     |  3 ++-
 R/dropletUtils_emptyDrops.R     |  2 +-
 R/ggPlotting.R                  | 28 ++++++++++++++++++++++------
 R/miscFunctions.R               |  8 ++++----
 R/plotBubble.R                  |  2 +-
 R/runBatchCorrection.R          |  2 +-
 R/runClusterSummaryMetrics.R    |  2 +-
 man/dedupRowNames.Rd            |  8 ++++----
 man/plotBubble.Rd               |  2 +-
 man/plotSCEViolin.Rd            |  6 +++---
 man/runClusterSummaryMetrics.Rd |  2 +-
 man/runEmptyDrops.Rd            |  2 +-
 man/runSCMerge.Rd               |  2 +-
 13 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8a39a7942..caf8f6c0d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -118,7 +118,8 @@ Imports:
     scuttle,
     utils,
     stats,
-    zellkonverter
+    zellkonverter,
+    tidyr
 RoxygenNote: 7.3.1
 Suggests:
     testthat,
diff --git a/R/dropletUtils_emptyDrops.R b/R/dropletUtils_emptyDrops.R
index e774dcb53..3a020868b 100644
--- a/R/dropletUtils_emptyDrops.R
+++ b/R/dropletUtils_emptyDrops.R
@@ -26,7 +26,7 @@
 
 #' @title Identify empty droplets using \link[DropletUtils]{emptyDrops}.
 #' @description Run \link[DropletUtils]{emptyDrops} on the count matrix in the
-#' provided \\linkS4class{SingleCellExperiment} object.
+#' provided \linkS4class{SingleCellExperiment} object.
 #' Distinguish between droplets containing cells and ambient RNA in a
 #' droplet-based single-cell RNA sequencing experiment.
 #' @param inSCE A \linkS4class{SingleCellExperiment} object. Must contain a raw 
diff --git a/R/ggPlotting.R b/R/ggPlotting.R
index a2e9afb3c..4b327d3a7 100644
--- a/R/ggPlotting.R
+++ b/R/ggPlotting.R
@@ -852,11 +852,19 @@ plotSCEScatter <- function(inSCE,
                       vcolor = "red",
                       vsize = 1,
                       vlinetype = 1) {
+  
+  mult_modules <- FALSE
+  
   if (is.null(groupBy)) {
-    groupBy <- rep("Sample", length(y))
+    if (length(colnames(y)) > 1){
+      mult_modules <- TRUE
+      groupBy <- rep(colnames(y), each = dim(y)[1])
+      y <- tidyr::pivot_longer(as.data.frame(y), cols = 1:dim(y)[2], cols_vary = "slowest")$value#
+    }else{
+      groupBy <- rep("Sample", length(y))
+    }
   }
   
-  
   if(!is.factor(groupBy)){
     if(is.null(plotOrder)){
       plotOrder = unique(groupBy)
@@ -920,6 +928,10 @@ plotSCEScatter <- function(inSCE,
                             axis.title.x = ggplot2::element_blank())
   }
   
+  if (mult_modules){
+    p <- p + xlab("Modules")
+  }
+  
   if (gridLine == TRUE){
     p <- p + ggplot2::theme(panel.grid.major.y = ggplot2::element_line("grey"))
   }
@@ -1417,9 +1429,9 @@ plotSCEViolinAssayData <- function(inSCE,
 #' @param feature Desired name of feature stored in assay of SingleCellExperiment
 #'  object. Only used when "assays" slotName is selected. Default NULL.
 #' @param sample Character vector. Indicates which sample each cell belongs to.
-#' @param dimension Desired dimension stored in the specified reducedDims.
-#'  Either an integer which indicates the column or a character vector specifies
-#'  column name. By default, the 1st dimension/column will be used.
+#' @param dimension Desired dimension(s) stored in the specified reducedDims.
+#'  Either an integer which indicates the column(s) or a character vector specifies
+#'  column name(s). By default, the 1st dimension/column will be used.
 #'  Only used when "reducedDims" slotName is selected. Default NULL.
 #' @param groupBy Groupings for each numeric value. A user may input a vector
 #' equal length to the number of the samples in the SingleCellExperiment
@@ -1568,7 +1580,11 @@ plotSCEViolin <- function(inSCE,
   samples <- unique(sample)
   plotlist <- lapply(samples, function(x) {
     sampleInd <- which(sample == x)
-    countSub <- counts[sampleInd]
+    if (length(colnames(counts)) > 1){
+      countSub <- counts[sampleInd,]
+    }else{
+      countSub <- counts[sampleInd]
+    }
     if(!is.null(groupBy)){
       groupbySub <- groupBy[sampleInd]
     }else{
diff --git a/R/miscFunctions.R b/R/miscFunctions.R
index 3e3ca09f1..8826b52d7 100644
--- a/R/miscFunctions.R
+++ b/R/miscFunctions.R
@@ -191,18 +191,18 @@ discreteColorPalette <- function(n, palette = c("random", "ggplot", "celda"),
 #' Adds '-1', '-2', ... '-i' to multiple duplicated rownames, and in place
 #' replace the unique rownames, store unique rownames in \code{rowData}, or
 #' return the unique rownames as character vecetor.
-#' @param x A matrix like or /linkS4class{SingleCellExperiment} object, on which
+#' @param x A matrix like or \linkS4class{SingleCellExperiment} object, on which
 #' we can apply \code{rownames()} to and has duplicated rownames.
 #' @param as.rowData Only applicable when \code{x} is a
-#' /linkS4class{SingleCellExperiment} object. When set to \code{TRUE}, will
+#' \linkS4class{SingleCellExperiment} object. When set to \code{TRUE}, will
 #' insert a new column called \code{"rownames.uniq"} to \code{rowData(x)}, with
 #' the deduplicated rownames.
 #' @param return.list When set to \code{TRUE}, will return a character vector
 #' of the deduplicated rownames.
 #' @export
-#' @return By default, a matrix or /linkS4class{SingleCellExperiment} object
+#' @return By default, a matrix or \linkS4class{SingleCellExperiment} object
 #' with rownames deduplicated.
-#' When \code{x} is a /linkS4class{SingleCellExperiment} and \code{as.rowData}
+#' When \code{x} is a \linkS4class{SingleCellExperiment} and \code{as.rowData}
 #' is set to \code{TRUE}, will return \code{x} with \code{rowData} updated.
 #' When \code{return.list} is set to \code{TRUE}, will return a character vector
 #' with the deduplicated rownames.
diff --git a/R/plotBubble.R b/R/plotBubble.R
index 242eda3ff..44dc7f625 100644
--- a/R/plotBubble.R
+++ b/R/plotBubble.R
@@ -12,7 +12,7 @@
 #' @param ylab The y-axis label
 #' @param colorLow The color to be used for lowest value of mean expression
 #' @param colorHigh The color to be used for highest value of mean expression
-#' @param scale Option to scale the data. Default: /code{FALSE}. Selected assay will not be scaled. 
+#' @param scale Option to scale the data. Default: \code{FALSE}. Selected assay will not be scaled. 
 #' @return A ggplot of the bubble plot.
 #' @importFrom rlang .data
 #' @importFrom reshape2 melt
diff --git a/R/runBatchCorrection.R b/R/runBatchCorrection.R
index f782e283b..bf6328bbf 100644
--- a/R/runBatchCorrection.R
+++ b/R/runBatchCorrection.R
@@ -670,7 +670,7 @@ integrated = integrated[:, orderIdx]
 #' variable genes identification. Default \code{"counts"}.
 #' @param kmeansK An integer vector. Indicating the kmeans' K-value for each
 #' batch (i.e. how many subclusters in each batch should exist), in order to
-#' construct pseudo-replicates. The length of code{kmeansK} needs to be the same
+#' construct pseudo-replicates. The length of \code{kmeansK} needs to be the same
 #' as the number of batches. Default \code{NULL}, and this value will be
 #' auto-detected by default, depending on \code{cellType}.
 #' @param cellType A single character. A string indicating a field in
diff --git a/R/runClusterSummaryMetrics.R b/R/runClusterSummaryMetrics.R
index 5cbb9c062..a57567d2f 100644
--- a/R/runClusterSummaryMetrics.R
+++ b/R/runClusterSummaryMetrics.R
@@ -7,7 +7,7 @@
 #' @param featureNames A string or vector of strings with each gene to aggregate.
 #' @param displayName A string that is the name of the column used for genes.
 #' @param groupNames The name of a colData entry that can be used as groupNames.
-#' @param scale Option to scale the data. Default: /code{FALSE}. Selected assay will not be scaled. 
+#' @param scale Option to scale the data. Default: \code{FALSE}. Selected assay will not be scaled. 
 #' @return A dataframe with mean expression and percent of cells in cluster that 
 #' express for each cluster.
 #' @examples
diff --git a/man/dedupRowNames.Rd b/man/dedupRowNames.Rd
index 38e358985..3e4997e20 100644
--- a/man/dedupRowNames.Rd
+++ b/man/dedupRowNames.Rd
@@ -7,11 +7,11 @@
 dedupRowNames(x, as.rowData = FALSE, return.list = FALSE)
 }
 \arguments{
-\item{x}{A matrix like or /linkS4class{SingleCellExperiment} object, on which
+\item{x}{A matrix like or \linkS4class{SingleCellExperiment} object, on which
 we can apply \code{rownames()} to and has duplicated rownames.}
 
 \item{as.rowData}{Only applicable when \code{x} is a
-/linkS4class{SingleCellExperiment} object. When set to \code{TRUE}, will
+\linkS4class{SingleCellExperiment} object. When set to \code{TRUE}, will
 insert a new column called \code{"rownames.uniq"} to \code{rowData(x)}, with
 the deduplicated rownames.}
 
@@ -19,9 +19,9 @@ the deduplicated rownames.}
 of the deduplicated rownames.}
 }
 \value{
-By default, a matrix or /linkS4class{SingleCellExperiment} object
+By default, a matrix or \linkS4class{SingleCellExperiment} object
 with rownames deduplicated.
-When \code{x} is a /linkS4class{SingleCellExperiment} and \code{as.rowData}
+When \code{x} is a \linkS4class{SingleCellExperiment} and \code{as.rowData}
 is set to \code{TRUE}, will return \code{x} with \code{rowData} updated.
 When \code{return.list} is set to \code{TRUE}, will return a character vector
 with the deduplicated rownames.
diff --git a/man/plotBubble.Rd b/man/plotBubble.Rd
index 7d2d3caf7..70ef5cf5b 100644
--- a/man/plotBubble.Rd
+++ b/man/plotBubble.Rd
@@ -39,7 +39,7 @@ plotBubble(
 
 \item{colorHigh}{The color to be used for highest value of mean expression}
 
-\item{scale}{Option to scale the data. Default: /code{FALSE}. Selected assay will not be scaled.}
+\item{scale}{Option to scale the data. Default: \code{FALSE}. Selected assay will not be scaled.}
 }
 \value{
 A ggplot of the bubble plot.
diff --git a/man/plotSCEViolin.Rd b/man/plotSCEViolin.Rd
index d717a2dd2..402470a63 100644
--- a/man/plotSCEViolin.Rd
+++ b/man/plotSCEViolin.Rd
@@ -53,9 +53,9 @@ object. Only used when "assays" slotName is selected. Default NULL.}
 
 \item{sample}{Character vector. Indicates which sample each cell belongs to.}
 
-\item{dimension}{Desired dimension stored in the specified reducedDims.
-Either an integer which indicates the column or a character vector specifies
-column name. By default, the 1st dimension/column will be used.
+\item{dimension}{Desired dimension(s) stored in the specified reducedDims.
+Either an integer which indicates the column(s) or a character vector specifies
+column name(s). By default, the 1st dimension/column will be used.
 Only used when "reducedDims" slotName is selected. Default NULL.}
 
 \item{groupBy}{Groupings for each numeric value. A user may input a vector
diff --git a/man/runClusterSummaryMetrics.Rd b/man/runClusterSummaryMetrics.Rd
index f9ffd75f8..1aa297bc2 100644
--- a/man/runClusterSummaryMetrics.Rd
+++ b/man/runClusterSummaryMetrics.Rd
@@ -24,7 +24,7 @@ runClusterSummaryMetrics(
 
 \item{groupNames}{The name of a colData entry that can be used as groupNames.}
 
-\item{scale}{Option to scale the data. Default: /code{FALSE}. Selected assay will not be scaled.}
+\item{scale}{Option to scale the data. Default: \code{FALSE}. Selected assay will not be scaled.}
 }
 \value{
 A dataframe with mean expression and percent of cells in cluster that 
diff --git a/man/runEmptyDrops.Rd b/man/runEmptyDrops.Rd
index f707f108e..a8aca5323 100644
--- a/man/runEmptyDrops.Rd
+++ b/man/runEmptyDrops.Rd
@@ -63,7 +63,7 @@ details.
 }
 \description{
 Run \link[DropletUtils]{emptyDrops} on the count matrix in the
-provided \\linkS4class{SingleCellExperiment} object.
+provided \linkS4class{SingleCellExperiment} object.
 Distinguish between droplets containing cells and ambient RNA in a
 droplet-based single-cell RNA sequencing experiment.
 }
diff --git a/man/runSCMerge.Rd b/man/runSCMerge.Rd
index 598889ea7..02e819409 100644
--- a/man/runSCMerge.Rd
+++ b/man/runSCMerge.Rd
@@ -41,7 +41,7 @@ mouse SEG lists is available with \code{\link[scMerge]{segList}} or
 
 \item{kmeansK}{An integer vector. Indicating the kmeans' K-value for each
 batch (i.e. how many subclusters in each batch should exist), in order to
-construct pseudo-replicates. The length of code{kmeansK} needs to be the same
+construct pseudo-replicates. The length of \code{kmeansK} needs to be the same
 as the number of batches. Default \code{NULL}, and this value will be
 auto-detected by default, depending on \code{cellType}.}