fixed merged SCE object export to AnnData issue

mingl1997 · Sep 6, 2023 · 4d81e15 · 4d81e15
1 parent 0bb8cb1
commit 4d81e15
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 22 deletions.
diff --git a/R/exportSCEtoAnndata.R b/R/exportSCEtoAnndata.R
@@ -34,6 +34,7 @@ exportSCEtoAnnData <- function(sce,
                                 compressionOpts = NULL,
                                 forceDense = FALSE){
   compression <- match.arg(compression)
+  # currently not needed
   #forceDense <- match.arg(forceDense)
   if (compression == 'None'){
     compression <- NULL
@@ -68,18 +69,28 @@ exportSCEtoAnnData <- function(sce,
       SummarizedExperiment::assay(sce, assay) <- .convertToMatrix(SummarizedExperiment::assay(sce, assay))
     }
   }
+  # convert to AnnData
   annData <- .sce2adata(sce, useAssay)
-  if (is.null(forceDense)) {
-    anndata::write_h5ad(annData, 
-                      filePath,
-                      compression = compression,
-                      compression_opts = compressionOpts)
-  }
-  else {
-    anndata::write_h5ad(annData, 
-                      filePath,
-                      compression = compression,
-                      compression_opts = compressionOpts,
-                      as.dense = forceDense)
-  }
+  # use zellkonverter to output h5ad, but first we have to turn it back into SCE for it to process
+  # and zellkonverter won't take the output SCE as it stands
+  out <- zellkonverter::AnnData2SCE(annData)
+  zellkonverter::writeH5AD(out, file = filePath, compression = compression)
+
+  # commented out until a permanent fix can be found with zellkonverter
+  # in the future, sce2adata may be depreciated altogether in favor of just this function, since zellkonverter's output takes an SCE object and writes it
+  # and we can drop forceDense from the options, because sparse is default
+
+  #if (is.null(forceDense)) {
+  #  anndata::write_h5ad(annData, 
+  #                    filePath,
+  #                     compression = compression,
+  #                     compression_opts = compressionOpts)
+  # }
+  # else {
+  #   anndata::write_h5ad(annData, 
+  #                     filePath,
+  #                     compression = compression,
+  #                     compression_opts = compressionOpts,
+  #                     as.dense = forceDense)
+  # }
 }
diff --git a/R/sce2adata.R b/R/sce2adata.R
@@ -13,13 +13,51 @@
 #' @return A Python anndata.AnnData object
 #' @noRd
 .sce2adata <- function(SCE, useAssay = 'counts') {
-    # Transfer SCE object back to AnnData
-    # Argument check first
-    stopifnot(inherits(SCE, "SingleCellExperiment"))
 
-    # Extract information that correspond to AnnData structure
-    #X <- as.matrix(t(SummarizedExperiment::assay(SCE, useAssay)))
-    # Sparse matrix conversion supported now, commenting the line above.
+    # TODO: use zellkonverter in the future, temporary fix for now
+    # this is how we used to do it until we started running into problems with the getters and setters
+    # in the future, this function might be depreciated altogether since it is only called internally
+    # and we will use zellkonverter::writeH5AD directly from an SCE object
+
+    # # Transfer SCE object back to AnnData
+    # # Argument check first
+    # stopifnot(inherits(SCE, "SingleCellExperiment"))
+
+    # # Extract information that correspond to AnnData structure
+    # #X <- as.matrix(t(SummarizedExperiment::assay(SCE, useAssay)))
+    # # Sparse matrix conversion supported now, commenting the line above.
+    # X <- t(SummarizedExperiment::assay(SCE, useAssay))
+    # AnnData <- sc$AnnData(X = X)
+    # obs <- as.data.frame(SummarizedExperiment::colData(SCE))
+    # if(length(obs) > 0){
+    #     AnnData$obs = obs
+    # } else {
+    #     AnnData$obs_names <- colnames(SCE)
+    # }
+    # var <- as.data.frame(SummarizedExperiment::rowData(SCE))
+    # if(length(var) > 0){
+    #     AnnData$var = var
+    # } else {
+    #     AnnData$var_names <- rownames(SCE)
+    # }
+    # # uns  <- S4Vectors::metadata(SCE)
+    # # if(length(uns) > 0){ AnnData$uns <- uns }
+    # obsmNames <- SingleCellExperiment::reducedDimNames(SCE)
+    # if(length(obsmNames) > 0){
+    #     for (i in seq_along(obsmNames)) {
+    #         AnnData$obsm$'__setitem__'(obsmNames[i], SingleCellExperiment::reducedDim(SCE, obsmNames[i]))
+    #     }
+    # }
+
+    # # Furthermore, the other assays will for now also be saved to .layers
+    # allAssayNames <- SummarizedExperiment::assayNames(SCE)
+    # for (i in seq_along(allAssayNames)) {
+    #     oneName <- allAssayNames[i]
+    #     if (!oneName == useAssay) {
+    #         AnnData$layers$'__setitem__'(oneName, as.matrix(t(SummarizedExperiment::assay(SCE, oneName))))
+    #     }
+    # }
+
     X <- t(SummarizedExperiment::assay(SCE, useAssay))
     AnnData <- sc$AnnData(X = X)
     obs <- as.data.frame(SummarizedExperiment::colData(SCE))
@@ -34,22 +72,35 @@
     } else {
         AnnData$var_names <- rownames(SCE)
     }
+    # previously commented out by someone else
     # uns  <- S4Vectors::metadata(SCE)
     # if(length(uns) > 0){ AnnData$uns <- uns }
     obsmNames <- SingleCellExperiment::reducedDimNames(SCE)
+    # new method: make a list of dataframes, which Python can process for H5AD construction
+    # initialize empty list for obsm and layers, which are the things we need to do
+    obsm <- list()
+
     if(length(obsmNames) > 0){
         for (i in seq_along(obsmNames)) {
-            reticulate::py_set_item(AnnData$obsm, obsmNames[i], SingleCellExperiment::reducedDim(SCE, obsmNames[i]))
+            obsm[[obsmNames[i]]] <- data.frame(SingleCellExperiment::reducedDim(SCE, obsmNames[i]))
+            #previously using the dunder setter, which doesn't always work
+            #AnnData$obsm$'__setitem__'(obsmNames[i], SingleCellExperiment::reducedDim(SCE, obsmNames[i]))
         }
     }
+
+    AnnData$obsm <- obsm
 
     # Furthermore, the other assays will for now also be saved to .layers
     allAssayNames <- SummarizedExperiment::assayNames(SCE)
+    layers <- list()
     for (i in seq_along(allAssayNames)) {
         oneName <- allAssayNames[i]
         if (!oneName == useAssay) {
-            reticulate::py_set_item(AnnData$layers, oneName, as.matrix(t(SummarizedExperiment::assay(SCE, oneName))))
+            layers[[allAssayNames[i]]] <- data.frame(as.matrix(t(SummarizedExperiment::assay(SCE, oneName))))
+            # this is the way we used to do it, through a Pythonic dunder setter
+            #AnnData$layers$'__setitem__'(oneName, as.matrix(t(SummarizedExperiment::assay(SCE, oneName))))
         }
     }
+    AnnData$layers <- layers
     return(AnnData)
 }
diff --git a/exec/SCTK_runQC.R b/exec/SCTK_runQC.R
@@ -435,6 +435,8 @@ for(i in seq_along(process)) {
     if (dataType == "Both") {
         mergedDropletSCE <- mergeSCEColData(dropletSCE, cellSCE)
         mergedFilteredSCE <- mergeSCEColData(cellSCE, dropletSCE)
+        #mergedDropletSCE <- dropletSCE
+        #mergedCellSCE <- cellSCE
     }
 
     if (dataType == "Cell") {
@@ -445,7 +447,7 @@ for(i in seq_along(process)) {
         if (isTRUE(detectCell)) {
             mergedDropletSCE <- mergeSCEColData(dropletSCE, cellSCE)
             mergedFilteredSCE <- mergeSCEColData(cellSCE, dropletSCE)
-        } else{
+        } else {
             mergedDropletSCE <- dropletSCE
         }
     }