updated version

KJeynesCupper · Sep 5, 2023 · edd4c00 · edd4c00
1 parent b1063da
commit edd4c00
Show file tree

Hide file tree

Showing 11 changed files with 75 additions and 88 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mobileRNA
 Type: Package
 Title: Identify mobile RNA molecules in plant graft systems 
-Version: 0.99.7
+Version: 0.99.8
 Authors@R: c(person("Katie", "Jeynes-Cupper", 
     email = "kej031@student.bham.ac.uk", role = c("aut", "cre")),
     person("Marco", "Catoni", email = "m.catoni@bham.ac.uk", role = "aut"))
@@ -26,9 +26,7 @@ RoxygenNote: 7.2.3
 Suggests: 
     knitr,
     rmarkdown,
-    BiocStyle,
-    dplyr,
-    magrittr
+    BiocStyle
 Imports: 
     dplyr,
     tidyr, 

diff --git a/NEWS.md b/NEWS.md
@@ -40,4 +40,10 @@ Updated to version  0.99.6 (2023-08-15)
 + broadened use of RNAattributes function. 
 
 Updated to version  0.99.7 (2023-08-25)
-+ updated vignette
++ updated vignette
+
+Updated to version  0.99.8 (2023-09-05)
++ updated vignette
++ updated RNAdicercall to allow any dicer-classification (not constricted to 20-24)
++ Amended RNA distribution to suit. 
++ Amended plotSamplePCA table and plot
diff --git a/R/RNAdicercall.R b/R/RNAdicercall.R
@@ -72,7 +72,7 @@
 #' 
 #'
 #'The second, labeled `DicerConsensus` states the consensus sRNA class between 
-#'20-24 nucleotides in length or "N" if unclassified. 
+#'nucleotides in length or "N" if unclassified. 
 #'
 #'
 #' @examples
@@ -90,9 +90,7 @@
 #' @export
 #' @importFrom dplyr "%>%"
 #' @importFrom dplyr "mutate"
-#' @importFrom dplyr "select"
 #' @importFrom dplyr "filter"
-#' @importFrom stringr "str_detect"
 #' @importFrom tidyr "replace_na"
 RNAdicercall <- function(data, conditions = NULL, ties.method = NULL, 
                          tidy = FALSE, chimeric = FALSE, controls = NULL, 
@@ -115,39 +113,31 @@ RNAdicercall <- function(data, conditions = NULL, ties.method = NULL,
                                       genome.ID = genome.ID)
   }
 
-  class_colnames <- c()
-  for (i in colnames(data)) {
-    if (stringr::str_detect(i, "DicerCall_")) {
-      class_colnames <- c(class_colnames, i)
-    }
-  }
+  class_colnames <- colnames(data)[grep("DicerCall_", colnames(data))]
+
   if (!is.null(conditions)) {
-    cat("Calculating consensus dicercall based on information from select replicates \n")
+    cat("Calculating consensus dicercall based on information from select replicates... \n")
     onlyconditions <- base::unique(grep(paste(conditions, collapse = "|"), 
                                         class_colnames, value = TRUE))
   }
   else if (is.null(conditions)) {
-    cat("Calculating consensus dicercall based on information from all replicates \n")
+    cat("Calculating consensus dicercall based on information from all replicates... \n")
     onlyconditions <- class_colnames
   }
-
-  other_exclude <- c("20", "21", "22", "23", "24", "N", "NA")
-  data <- data %>% 
-    dplyr::mutate(nt_20 = rowSums(.[onlyconditions] =="20")) %>% 
-    dplyr::mutate(nt_21 = rowSums(.[onlyconditions] == "21")) %>% 
-    dplyr::mutate(nt_22 = rowSums(.[onlyconditions] ==  "22")) %>% 
-    dplyr::mutate(nt_23 = rowSums(.[onlyconditions] == "23")) %>% 
-    dplyr::mutate(nt_24 = rowSums(.[onlyconditions] == "24"))%>% 
-    dplyr::mutate(other = rowSums(!sapply(dplyr::select(.,onlyconditions), 
-                                          `%in%`, other_exclude)))
+  cat("\n")
+  # unique values across the dicer call columns 
+  unique_vals <- unique(unlist(data[onlyconditions]))
+  # rowsum of columsn. 
+  for (value in unique_vals) {
+    add_col <- paste0("nt_", value)
+    data[add_col] <- rowSums(data[onlyconditions] == value)
+  }
 
   # search columns based on location 
-  col_q <- grep("^nt", base::names(data))
-  col_qp <- grep("^other", base::names(data))
-  t <-c(col_q,col_qp)
+  t <- grep("^nt", base::names(data))
 
   if (ties.method == "random"){
-    cat("The consensus dicercall will be choose at random in the case of a tie \n")
+    cat("The consensus dicercall will be choose at random in the case of a tie... \n")
     new_df <- data 
     new_df$DicerCounts <- apply(new_df[t], 1, max)
     new_df <- new_df %>% 
@@ -158,7 +148,7 @@ RNAdicercall <- function(data, conditions = NULL, ties.method = NULL,
 
   } else 
     if(ties.method == "exclude"){
-      cat("The consensus dicercall will be excluded in the case of a tie \n") 
+      cat("The consensus dicercall will be excluded in the case of a tie... \n") 
       new_df <- data
 
       # Initialize result vector
@@ -201,14 +191,13 @@ RNAdicercall <- function(data, conditions = NULL, ties.method = NULL,
     }
 
   # remove calulation columns 
-  new_df <- new_df %>% dplyr::select(-nt_20, -nt_21, -nt_22, 
-                                     -nt_23, -nt_24, -other)
+   new_df <- new_df[, !grepl("^nt_", colnames(new_df))]
   # remove nt from output values
   new_df$DicerConsensus <- gsub("^nt_", "", new_df$DicerConsensus)
 
   if (tidy) {
     cat("\n")
-    cat("Removing sRNA clusters with no consensus dicercall... \n")
+    cat("Removing small RNA clusters with no consensus dicercall... \n")
     new_df_tidy <- new_df %>% dplyr::filter(DicerConsensus != "N")
     return(new_df_tidy)
   }

diff --git a/R/RNAdistribution.R b/R/RNAdistribution.R
@@ -1,7 +1,7 @@
 #' Plot the distribution of sRNA lengths
 #'
 #' @description \code{RNAdistribution} plots the distribution of dicer-derived
-#' sRNA classes (20-24nt) across samples or across the sRNA consensus
+#' sRNA classes across samples or across the sRNA consensus
 #' determined by the function [mobileRNA::RNAdicercall()].
 #'
 #' @param data a dataframe, on which one of the following functions has already
@@ -164,10 +164,11 @@ RNAdistribution  <- function (data, samples = NULL, style,
     # if a replicate only has unclassified sRNAs (N), then we need to alter
     # beware that any tables which do not have the required columns
     if (base::inherits(counts.df, c("list"))) {
-      required_columns <- c("20", "21", "22", "23", "24", "N")
+      class_colnames <- colnames(data)[grep("DicerCall_", colnames(data))]
+      required_columns <- unique(unlist(data[class_colnames]))
       for (i in seq_along(counts.df)) {
         table_i <- counts.df[[i]]  # current table
-        if (length(names(table_i)) < 6) {
+        if (length(names(table_i)) < length(required_columns)) {
           # columns missing from the table
           missing_columns <- setdiff(required_columns, names(table_i))
           # add missing columns to table, and assign a value of 0

diff --git a/R/RNAsubset.R b/R/RNAsubset.R
@@ -22,13 +22,8 @@
 #' defined consensus sRNA class/type for each sRNA dicer-derived cluster
 #' (see [mobileRNA::RNAdicercall()].
 #'
-#' @param type A number to represent the type of small RNA population to subset
-#' for.
-#' @param ... Related to number in the `type` argument
-#' This can be a value from 20-24. To select, 24-nt sRNA, state 24.
-#' Multiple values can be inputted, for instance both 21 and 22 can be
-#' stated to select both.
-#'
+#' @param type numeric; small RNA class(es) to select.
+#' 
 #' @param sig Parameter to filter and select significant sRNA. If
 #'  \code{sig=TRUE}, data will be filtered based on p-adjusted < 0.05
 #'  significance threshold.
@@ -52,7 +47,7 @@
 #' @importFrom magrittr "%>%"
 #' @importFrom dplyr "filter"
 
-RNAsubset <- function(data, type,  sig=FALSE, ...){
+RNAsubset <- function(data, type,  sig=FALSE){
     x <- data %>% dplyr::filter(DicerConsensus %in% type)
     if(sig){
       x<- x %>%

diff --git a/R/invisible.functions.mobileRNA.R b/R/invisible.functions.mobileRNA.R
@@ -148,6 +148,7 @@ utils::globalVariables(c("ID", "DicerConsensus", "nt_20", "nt_21", "nt_22",
                          "value" , "variable" , "repeats_info" , "Genome" ,
                          "Dataset" ,"setNames" , "DicerCall" , "Reads" , "RPM" ,
                          "MajorRNA", "i", "other", "report", "DicerCounts", 
-                         "Sequence", "new_df",  "PC1", "PC2", "conditions"))
+                         "Sequence", "new_df",  "PC1", "PC2", "Conditions",
+                         "name"))
 
 
diff --git a/R/plotSamplePCA.R b/R/plotSamplePCA.R
@@ -73,6 +73,8 @@
 #' @importFrom ggrepel "geom_label_repel"
 #' @importFrom ggplot2 "aes"
 #' @importFrom ggplot2 "labs"
+#' @importFrom ggplot2 "xlab"
+#' @importFrom ggplot2 "ylab"
 #' @importFrom ggplot2 "coord_fixed"
 #' @importFrom ggrepel "geom_text_repel"
 #' @importFrom ggplot2 "ggplot"
@@ -98,6 +100,7 @@ plotSamplePCA <- function(data, group, vst = FALSE, labels = TRUE, boxed = TRUE,
     countData=data,colData=column.data,design= ~conditions))
   count.data.set$conditions <- stats::relevel(count.data.set$conditions,
                                               group[1])
+
   dds <- DESeq2::estimateSizeFactors(count.data.set)
 
   # log transform the data.
@@ -114,54 +117,54 @@ plotSamplePCA <- function(data, group, vst = FALSE, labels = TRUE, boxed = TRUE,
 
   # use the DEseq plot pca function, store in an object.
   pca <- DESeq2::plotPCA(rld1, returnData = TRUE, intgroup = "conditions")
-  ## change position
-  sample_names <- sub("Count_", "", colnames(data))
-  pca["ID"] <- sample_names # create new column with sample names
+  colnames(pca)[colnames(pca) == 'conditions'] <- 'Conditions'
+  rownames(pca) <- gsub("^Count_", "", rownames(pca))
+  pca$name <- gsub("^Count_", "",  pca$name)
   percentVar <- round(100 * attr(pca, "percentVar"))
 
   cat("Organising principal component analysis \n")
   if(labels == TRUE){
     if(boxed == TRUE){
-      X <- ggplot2::ggplot(pca, ggplot2::aes(PC1, PC2, color=conditions)) +
+      X <- ggplot2::ggplot(pca, ggplot2::aes(PC1, PC2, color=Conditions)) +
         {if(point.shape) ggplot2::geom_point(size=3, ggplot2::aes(
-          shape = conditions))}+
+          shape = Conditions))}+
         {if(point.shape == FALSE) ggplot2::geom_point(size=3)}+
-        xlab(paste0("PC1: ",percentVar[1],"% variance")) +
-        ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
+        ggplot2::xlab(paste0("PC1: ",percentVar[1],"% variance")) +
+        ggplot2::ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
         {if(!is.null(colours)) ggplot2::scale_color_manual(values=colours)}+ 
         ggplot2::coord_fixed()+
-        ggrepel::geom_label_repel(data = pca, ggplot2::aes(label = ID), 
+        ggrepel::geom_label_repel(data = pca, ggplot2::aes(label = name), 
                                   show.legend = FALSE, box.padding = 1)+
         ggplot2::labs(color = legend.title) + 
         ggplot2::coord_fixed(ratio = size.ratio)+
         {if(!is.null(ggplot.theme)) ggplot.theme() }
 
     } else
-      X <- ggplot2::ggplot(pca, ggplot2::aes(PC1, PC2, color=conditions)) +
+      X <- ggplot2::ggplot(pca, ggplot2::aes(PC1, PC2, color=Conditions)) +
         {if(point.shape) ggplot2::geom_point(size=3, ggplot2::aes(
-          shape =conditions))}+
+          shape =Conditions))}+
         {if(point.shape == FALSE) ggplot2::geom_point(size=3)}+
-        xlab(paste0("PC1: ",percentVar[1],"% variance")) +
-        ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
+        ggplot2::xlab(paste0("PC1: ",percentVar[1],"% variance")) +
+        ggplot2::ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
         {if(!is.null(colours)) ggplot2::scale_color_manual(values=colours)}+ 
-        ggrepel::geom_label_repel(data = pca, ggplot2::aes(label = ID), 
+        ggrepel::geom_label_repel(data = pca, ggplot2::aes(label = name), 
                                   show.legend = FALSE, box.padding = 1)+
         ggplot2::labs(color = legend.title) + 
         suppressMessages(ggplot2::coord_fixed(ratio = size.ratio))+
         {if(!is.null(ggplot.theme)) ggplot.theme() }
 
 
   } else {
-    X <- ggplot2::ggplot(pca, ggplot2::aes(PC1, PC2, color=conditions)) +
+    X <- ggplot2::ggplot(pca, ggplot2::aes(PC1, PC2, color=Conditions)) +
       {if(point.shape) ggplot2::geom_point(size=3, ggplot2::aes(
-        shape = conditions))}+
+        shape = Conditions))}+
       {if(point.shape == FALSE) ggplot2::geom_point(size=3)}+
-      xlab(paste0("PC1: ",percentVar[1],"% variance")) +
-      ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
+      ggplot2::xlab(paste0("PC1: ",percentVar[1],"% variance")) +
+      ggplot2::ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
       {if(!is.null(colours)) ggplot2::scale_color_manual(values=colours)}+ 
       ggplot2::labs(color = legend.title) + 
       ggplot2::coord_fixed(ratio = size.ratio)+
       {if(!is.null(ggplot.theme)) ggplot.theme() }
   }
   return(X)
-}
+}
diff --git a/man/RNAdicercall.Rd b/man/RNAdicercall.Rd
diff --git a/man/RNAdistribution.Rd b/man/RNAdistribution.Rd
diff --git a/man/RNAsubset.Rd b/man/RNAsubset.Rd