From 6cafee7a45dfba9f50e78455d9c46ea9231de5b3 Mon Sep 17 00:00:00 2001
From: nfancy <n.naharfancy@yahoo.com>
Date: Sat, 18 Sep 2021 09:42:26 +0100
Subject: [PATCH 1/7] bug fixes

---
 bin/scflow_dge.r                       |  52 ++-
 bin/scflow_finalize_sce.r              | 111 ++++--
 bin/scflow_integrate.r                 | 124 ++----
 bin/scflow_ipa.r                       |  94 +++--
 bin/scflow_qc.r                        |   5 +
 conf/modules.config                    |  10 +-
 conf/scflow_analysis.config            |  59 ++-
 modules/local/get_software_versions.nf |  11 +-
 modules/local/process/scflow/dge.nf    |   2 +-
 modules/local/process/scflow/ipa.nf    |   2 +-
 nextflow_schema.json                   | 497 ++++++++++---------------
 workflows/scflow.nf                    |  23 +-
 12 files changed, 451 insertions(+), 539 deletions(-)

diff --git a/bin/scflow_dge.r b/bin/scflow_dge.r
index 1893c7a..ef4d27e 100755
--- a/bin/scflow_dge.r
+++ b/bin/scflow_dge.r
@@ -146,6 +146,14 @@ required$add_argument(
   help = "p-value cutoff for DE [default %(default)s]"
 )
 
+required$add_argument(
+  "--n_label",
+  type = "integer",
+  default = 5,
+  metavar = "number",
+  help = "Number of genes to be highlighted on volcano plot"
+)
+
 required$add_argument(
   "--ensembl_mappings",
   help = "path to ensembl mappings file",
@@ -177,9 +185,9 @@ options("scflow_species" = args$species)
 args$rescale_numerics <- as.logical(args$rescale_numerics)
 args$pseudobulk <- as.logical(args$pseudobulk)
 args$force_run <- as.logical(args$force_run)
-if (tolower(args$random_effects_var) == "null") args$random_effects_var <- NULL
+if(tolower(args$random_effects_var) == "null") args$random_effects_var <- NULL
 
-args$max_cores <- if (toupper(args$max_cores) == "NULL") NULL else {
+args$max_cores <- if(toupper(args$max_cores) == "NULL") NULL else { 
   as.numeric(as.character(args$max_cores))
 }
 
@@ -202,6 +210,8 @@ cli::cli_alert(sprintf(
   n_cores
 ))
 
+# RhpcBLASctl::omp_set_num_threads(1L)
+
 library(scFlow)
 
 #   ____________________________________________________________________________
@@ -220,9 +230,7 @@ if (args$pseudobulk) {
   pb_str <- "_pb"
   sce_subset <- pseudobulk_sce(
     sce_subset,
-    keep_vars = c(
-      args$dependent_var, args$confounding_vars, args$random_effects_var
-      ),
+    keep_vars = c(args$dependent_var, args$confounding_vars, args$random_effects_var),
     assay_name = "counts",
     celltype_var = args$celltype_var,
     sample_var = args$sample_var
@@ -255,20 +263,36 @@ file_name <- paste0(args$celltype, "_",
 for (result in names(de_results)) {
   if (dim(de_results[[result]])[[1]] > 0) {
     write.table(de_results[[result]],
-                file = file.path(getwd(),
-                                paste0(file_name, result, "_DE.tsv")),
+                file = file.path(getwd(), 
+                                 paste0(file_name, result, "_DE.tsv")),
                 quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
+    
     report_de(de_results[[result]],
+              fc_threshold = args$fc_threshold,
+              pval_cutoff = args$pval_cutoff,
+              n_label = args$n_label,
               report_folder_path = file.path(getwd()),
               report_file = paste0(file_name, result, "_scflow_de_report"))
+    
     print("report generated")
-    png(file.path(getwd(),
-                  paste0(file_name, result, "_volcano_plot.png")),
-        width = 247, height = 170, units = "mm", res = 600)
-    print(attr(de_results[[result]], "plot"))
-    dev.off()
-
+    
+    p <- scFlow::volcano_plot(
+      dt = de_results[[result]],
+      fc_threshold = args$fc_threshold,
+      pval_cutoff =  args$pval_cutoff,
+      n_label = args$n_label
+    )
+    ggplot2::ggsave(filename = file.path(getwd(),
+                                         paste0(file_name, result, "_volcano_plot.png")),
+                    plot = p,
+                    width = 7, height = 5, units = "in", dpi = 600)
+    
+    print("Volcano plot generated")
+  
+    
   } else {
     print(sprintf("No DE genes found for %s", result))
-    }
+  }
 }
+
+
diff --git a/bin/scflow_finalize_sce.r b/bin/scflow_finalize_sce.r
index 58813af..ffa1743 100755
--- a/bin/scflow_finalize_sce.r
+++ b/bin/scflow_finalize_sce.r
@@ -5,12 +5,11 @@
 #   ____________________________________________________________________________
 #   Initialization                                                          ####
 
+options(mc.cores = future::availableCores())
+
 ##  ............................................................................
 ##  Load packages                                                           ####
 library(argparse)
-library(scFlow)
-library(magrittr)
-library(SingleCellExperiment)
 
 ##  ............................................................................
 ##  Parse command-line arguments                                            ####
@@ -25,42 +24,42 @@ optional <- parser$add_argument_group("Optional", "required arguments")
 required$add_argument(
   "--sce_path",
   help = "-path to the SingleCellExperiment",
-  metavar = "dir",
+  metavar = "dir", 
   required = TRUE
 )
 
 required$add_argument(
   "--celltype_mappings",
   help = "path to a tsv file with revised celltype mappings",
-  metavar = "foo/bar",
+  metavar = "foo/bar", 
   required = TRUE
 )
 
 required$add_argument(
   "--clusters_colname",
   help = "name of the column with cluster numbers",
-  metavar = "foo/bar",
+  metavar = "foo/bar", 
   required = TRUE
 )
 
 required$add_argument(
   "--celltype_var",
   help = "name of the column with celltype names",
-  metavar = "foo/bar",
+  metavar = "foo/bar", 
   required = TRUE
 )
 
 required$add_argument(
   "--unique_id_var",
   help = "name of the column with unique sample ids",
-  metavar = "foo/bar",
+  metavar = "foo/bar", 
   required = TRUE
 )
 
 required$add_argument(
   "--facet_vars",
   help = "names of variables to examine in the celltype metrics report",
-  metavar = "foo/bar",
+  metavar = "foo/bar", 
   required = TRUE
 )
 
@@ -68,14 +67,14 @@ required$add_argument(
 required$add_argument(
   "--input_reduced_dim",
   help = "name of the reduced dimension slot to use for plots in the report",
-  metavar = "foo/bar",
+  metavar = "foo/bar", 
   required = TRUE
 )
 
 required$add_argument(
   "--metric_vars",
   help = "names of variables to examine in the celltype metrics report",
-  metavar = "foo/bar",
+  metavar = "foo/bar", 
   required = TRUE
 )
 
@@ -84,7 +83,7 @@ required$add_argument(
   default = 5,
   type = "integer",
   required = TRUE,
-  help = "The number of top marker genes",
+  help ="The number of top marker genes",
   metavar = "N"
 )
 
@@ -106,6 +105,13 @@ required$add_argument(
   metavar = "N"
 )
 
+required$add_argument(
+  "--max_cores",
+  default = NULL,
+  help = "override for lower cpu core usage",
+  metavar = "N",
+  required = TRUE
+)
 
 ### . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ..
 ### Pre-process args                                                        ####
@@ -117,6 +123,31 @@ args$metric_vars <- strsplit(args$metric_vars, ",")[[1]]
 options("scflow_reddimplot_pointsize" = args$reddimplot_pointsize)
 options("scflow_reddimplot_alpha" = args$reddimplot_alpha)
 
+args$max_cores <- if(toupper(args$max_cores) == "NULL") NULL else { 
+  as.numeric(as.character(args$max_cores))
+}
+
+#   ____________________________________________________________________________
+#   Delay Package Loading for Optional Max Cores Override
+
+n_cores <- future::availableCores(methods = "mc.cores")
+
+if (is.null(args$max_cores)) {
+  options(mc.cores = n_cores)
+} else {
+  options(mc.cores = min(args$max_cores, n_cores))
+}
+
+cli::cli_alert(sprintf(
+  "Using %s cores on system with %s available cores.",
+  getOption("mc.cores"),
+  n_cores
+))
+
+library(scFlow)
+library(magrittr)
+library(SingleCellExperiment)
+
 ##  ............................................................................
 ##  Start                                                                   ####
 
@@ -161,8 +192,8 @@ celltypes <- as.data.frame(SummarizedExperiment::colData(sce)) %>%
 colnames(celltypes) <- c("celltype", "n_cells")
 
 write.table(
-  data.frame(celltypes),
-  file = "celltypes.tsv",
+  data.frame(celltypes), 
+  file = "celltypes.tsv", 
   row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
 
 ### Save Marker Gene Plots
@@ -170,58 +201,58 @@ folder_path <- file.path(getwd(), "celltype_marker_plots")
 dir.create(folder_path)
 
 for (group in names(sce@metadata$markers)) {
+  
   pwidth <- max(10,
-                length(
-                  unique(sce@metadata$markers[[group]]$marker_plot$data$Group)
-                  )
+                length(unique(sce@metadata$markers[[group]]$marker_plot$data$Group))
   )
-  pheight <- length(
-    unique(sce@metadata$markers[[group]]$marker_plot$data$Gene)
-    )
+  pheight <- length(unique(sce@metadata$markers[[group]]$marker_plot$data$Gene))
+  
   p <- sce@metadata$markers[[group]]$marker_plot
+  
   plot_file_name <- paste0(group, "_markers")
+  
   # save PNG
-  png(file.path(folder_path, paste0(plot_file_name, ".png")),
-      width = pwidth * 12, height = pheight * 5, units = "mm", res = 600)
+  png(file.path(folder_path, paste0(plot_file_name, ".png")), 
+      width = pwidth * 12, height = pheight*5, units = "mm", res = 600)
   print(p)
   dev.off()
-
+  
   # save PDF
   ggsave(
     file.path(folder_path, paste0(group, ".pdf")),
-    p,
-    width = pwidth * 12,
-    height = pheight * 5,
-    units = "mm",
+    p, 
+    width = pwidth * 12, 
+    height = pheight * 5, 
+    units = "mm", 
     scale = 1
   )
-
+  
 }
 
 ### Save Marker Gene Tables
 folder_path <- file.path(getwd(), "celltype_marker_tables")
 dir.create(folder_path)
 for (group in names(sce@metadata$markers)) {
-
+  
   marker_test_file_name <- paste0(group, "_markers_test.tsv")
   top_markers_file_name <- paste0(group, "_top_markers.tsv")
-
+  
   write.table(
-    sce@metadata$markers[[group]]$marker_test_res,
-    file = file.path(folder_path, marker_test_file_name),
-    row.names = FALSE,
-    col.names = TRUE,
+    sce@metadata$markers[[group]]$marker_test_res, 
+    file = file.path(folder_path, marker_test_file_name), 
+    row.names = FALSE, 
+    col.names = TRUE, 
     sep = "\t"
   )
-
+  
   write.table(
-    sce@metadata$markers[[group]]$top_specific_markers,
-    file = file.path(folder_path, top_markers_file_name),
-    row.names = FALSE,
-    col.names = TRUE,
+    sce@metadata$markers[[group]]$top_specific_markers, 
+    file = file.path(folder_path, top_markers_file_name), 
+    row.names = FALSE, 
+    col.names = TRUE, 
     sep = "\t"
   )
-
+  
 }
 
 
diff --git a/bin/scflow_integrate.r b/bin/scflow_integrate.r
index b9879e0..48ec53a 100755
--- a/bin/scflow_integrate.r
+++ b/bin/scflow_integrate.r
@@ -5,13 +5,13 @@
 # ____________________________________________________________________________
 # Initialization ####
 
-options(mc.cores = future::availableCores())
+options(mc.cores = future::availableCores(methods = "mc.cores"))
 
 ## ............................................................................
 ## Load packages ####
-library(argparse)
 library(scFlow)
-library(parallel)
+library(argparse)
+#library(parallel)
 
 ## ............................................................................
 ## Parse command-line arguments ####
@@ -33,14 +33,14 @@ required$add_argument(
 required$add_argument(
   "--method",
   required = TRUE,
-  help = "The integration method to use",
+  help ="The integration method to use",
   metavar = "Liger"
 )
 
 required$add_argument(
   "--unique_id_var",
   required = TRUE,
-  help = "Unique id variable",
+  help ="Unique id variable",
   metavar = "manifest"
 )
 
@@ -48,7 +48,7 @@ required$add_argument(
   "--take_gene_union",
   default = FALSE,
   required = TRUE,
-  help = "Whether to fill out raw.data matrices with union of genes",
+  help ="Whether to fill out raw.data matrices with union of genes across all datasets (filling in 0 for missing data)",
   metavar = "Boolean"
 )
 
@@ -56,7 +56,7 @@ required$add_argument(
   "--remove_missing",
   default = TRUE,
   required = TRUE,
-  help = "Remove non-expressive genes and cells",
+  help ="Whether to remove cells not expressing any measured genes, and genes not expressed in any cells",
   metavar = "Boolean"
 )
 
@@ -65,7 +65,7 @@ required$add_argument(
   default = 3000,
   type = "integer",
   required = TRUE,
-  help = "Number of genes to find for each dataset",
+  help ="Number of genes to find for each dataset",
   metavar = "N"
 )
 
@@ -73,23 +73,15 @@ required$add_argument(
   "--combine",
   default = "union",
   required = TRUE,
-  help = "How to combine variable genes across experiments",
+  help ="How to combine variable genes across experiments",
   metavar = "union,intersect"
 )
 
-required$add_argument(
-  "--keep_unique",
-  default = FALSE,
-  required = TRUE,
-  help = "Keep genes that occur only in one dataset",
-  metavar = "Boolean"
-)
-
 required$add_argument(
   "--capitalize",
   default = FALSE,
   required = TRUE,
-  help = "Capitalize gene names to match homologous genes(i.e. across species)",
+  help ="Capitalize gene names to match homologous genes(ie. across species)",
   metavar = "Boolean"
 )
 
@@ -97,7 +89,7 @@ required$add_argument(
   "--use_cols",
   default = TRUE,
   required = TRUE,
-  help = "Treat each column as a cell",
+  help ="Treat each column as a cell",
   metavar = "Boolean"
 )
 
@@ -106,7 +98,7 @@ required$add_argument(
   default = 30,
   type = "integer",
   required = TRUE,
-  help = "Inner dimension of factorization (number of factors)",
+  help ="Inner dimension of factorization (number of factors)",
   metavar = "N"
 )
 
@@ -115,7 +107,7 @@ required$add_argument(
   default = 5.0,
   type = "double",
   required = TRUE,
-  help = "Regularization parameter",
+  help ="Regularization parameter. Larger values penalize dataset-specific effects more strongly (ie. alignment should increase as lambda increases)",
   metavar = "N"
 )
 
@@ -124,7 +116,7 @@ required$add_argument(
   default = 0.0001,
   type = "double",
   required = TRUE,
-  help = "Convergence threshold.",
+  help ="Convergence threshold. Convergence occurs when |obj0-obj|/(mean(obj0,obj)) < thresh",
   metavar = "N"
 )
 
@@ -133,7 +125,7 @@ required$add_argument(
   default = 100,
   type = "integer",
   required = TRUE,
-  help = "Maximum number of block coordinate descent iterations to perform",
+  help ="Maximum number of block coordinate descent iterations to perform",
   metavar = "N"
 )
 
@@ -142,7 +134,7 @@ required$add_argument(
   default = 1,
   type = "integer",
   required = TRUE,
-  help = "Number of restarts to perform",
+  help ="Number of restarts to perform",
   metavar = "N"
 )
 
@@ -151,7 +143,7 @@ required$add_argument(
   default = 1,
   type = "integer",
   required = TRUE,
-  help = "Random seed to allow reproducible results",
+  help ="Random seed to allow reproducible results",
   metavar = "N"
 )
 
@@ -160,33 +152,15 @@ required$add_argument(
   default = 20,
   type = "integer",
   required = TRUE,
-  help = "Number of nearest neighbors for within-dataset knn graph",
-  metavar = "N"
-)
-
-required$add_argument(
-  "--k2",
-  default = 500,
-  type = "integer",
-  required = TRUE,
-  help = "Horizon parameter for shared nearest factor graph",
-  metavar = "N"
-)
-
-required$add_argument(
-  "--prune_thresh",
-  default = 0.2,
-  type = "double",
-  required = TRUE,
-  help = "Minimum allowed edge weight. Any edges below this are removed",
+  help ="Number of nearest neighbors for within-dataset knn graph",
   metavar = "N"
 )
 
 required$add_argument(
   "--ref_dataset",
-  default = "",
+  default = '',
   required = TRUE,
-  help = "Name of dataset to use as a reference for normalization",
+  help ="Name of dataset to use as a reference for normalization",
   metavar = "ref"
 )
 
@@ -195,7 +169,7 @@ required$add_argument(
   default = 2,
   type = "integer",
   required = TRUE,
-  help = "Minimum number of cells to consider a cluster shared across datasets",
+  help ="Minimum number of cells to consider a cluster shared across datasets",
   metavar = "N"
 )
 
@@ -204,16 +178,7 @@ required$add_argument(
   default = 50,
   type = "integer",
   required = TRUE,
-  help = "Number of quantiles to use for quantile normalization",
-  metavar = "N"
-)
-
-required$add_argument(
-  "--nstart",
-  default = 10,
-  type = "integer",
-  required = TRUE,
-  help = "Number of times to perform Louvain community detection",
+  help ="Number of quantiles to use for quantile normalization",
   metavar = "N"
 )
 
@@ -222,43 +187,18 @@ required$add_argument(
   default = 1,
   type = "double",
   required = TRUE,
-  help = "Controls the number of communities detected",
+  help ="Controls the number of communities detected (Higher resolution -> more communities)",
   metavar = "N"
 )
 
-required$add_argument(
-  "--dims_use",
-  default = "null",
-  required = TRUE,
-  help = "Indices of factors to use for shared nearest factor determination",
-  metavar = "Indices"
-)
-
-required$add_argument(
-  "--dist_use",
-  default = "CR",
-  required = TRUE,
-  help = "Distance metric to use in calculating nearest neighbors",
-  metavar = "CR"
-)
-
 required$add_argument(
   "--center",
   default = FALSE,
   required = TRUE,
-  help = "Centers the data when scaling factors",
+  help ="Centers the data when scaling factors (useful for less sparse modalities like methylation data)",
   metavar = "Boolean"
 )
 
-required$add_argument(
-  "--small_clust_thresh",
-  default = 0,
-  type = "double",
-  required = TRUE,
-  help = "Extracts small clusters loading highly on single factor",
-  metavar = "N"
-)
-
 ### . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ..
 ### Pre-process args ####
 
@@ -290,12 +230,11 @@ sce <- integrate_sce(
   unique_id_var = args$unique_id_var,
   take_gene_union = args$take_gene_union,
   remove.missing = args$remove_missing,
-  make.sparse = T,
   num_genes = args$num_genes,
   combine = args$combine,
-  keep_unique = args$keep_unique,
   capitalize = args$capitalize,
   use_cols = args$use_cols,
+  num_cores = future::availableCores(methods = "mc.cores"),
   k = args$k,
   lambda = args$lambda,
   thresh = args$thresh,
@@ -306,24 +245,15 @@ sce <- integrate_sce(
   V_init = NULL,
   rand_seed = args$rand_seed,
   knn_k = args$knn_k,
-  k2 = args$k2,
-  prune_thresh = args$prune_thresh,
   ref_dataset = args$ref_dataset,
   min_cells = args$min_cells,
   quantiles = args$quantiles,
-  nstart = args$nstart,
   resolution = args$resolution,
-  dims_use = args$dims_use,
-  dist_use = args$dist_use,
   center = args$center,
-  small_clust_thresh = args$small_clust_thresh,
-  do_plot = FALSE,
-  id_number = NULL,
-  print_obj = FALSE,
-  print_mod = FALSE,
-  print_align_summary = FALSE
+  print_obj = FALSE
 )
 
+
 ## ............................................................................
 ## Save Outputs ####
 
diff --git a/bin/scflow_ipa.r b/bin/scflow_ipa.r
index 0cca3a8..10f6335 100755
--- a/bin/scflow_ipa.r
+++ b/bin/scflow_ipa.r
@@ -12,6 +12,7 @@ options(mc.cores = parallel::detectCores())
 library(argparse)
 library(scFlow)
 library(cli)
+library(dplyr)
 
 ##  ............................................................................
 ##  Parse command-line arguments                                            ####
@@ -50,9 +51,32 @@ required$add_argument(
 required$add_argument(
   "--enrichment_database",
   help = "name of the enrichment databases",
-  metavar = "GO_Biological_Process,GO_Cellular_Component,GO_Molecular_Function",
+  metavar = "GO_Biological_Process,Reactome,Wikipathway",
   required = TRUE,
-  default = "KEGG"
+  default = "GO_Biological_Process"
+)
+
+required$add_argument(
+  "--species",
+  help = "the biological species (e.g. mouse, human)",
+  default = "human",
+  required = TRUE
+)
+
+required$add_argument(
+  "--fc_threshold",
+  type = "double",
+  default = 1.1,
+  metavar = "number",
+  help = "Absolute fold-change cutoff for DE [default %(default)s]"
+)
+
+required$add_argument(
+  "--pval_cutoff",
+  type = "double",
+  default = 0.05,
+  metavar = "number",
+  help = "p-value cutoff for DE [default %(default)s]"
 )
 
 
@@ -62,6 +86,8 @@ required$add_argument(
 
 args <- parser$parse_args()
 
+options("scflow_species" = args$species)
+
 args$enrichment_method <- strsplit(args$enrichment_method, ",")[[1]]
 args$enrichment_tool <- strsplit(args$enrichment_tool, ",")[[1]]
 args$enrichment_database <- strsplit(args$enrichment_database, ",")[[1]]
@@ -91,23 +117,47 @@ dir.create(output_dir)
 dir.create(report_dir)
 
 for (gene_file in args$gene_file) {
-  enrichment_result <- find_impacted_pathways(
-    gene_file = gene_file,
-    enrichment_tool = args$enrichment_tool,
-    enrichment_method = args$enrichment_method,
-    enrichment_database = args$enrichment_database,
-    is_output = TRUE,
-    output_dir = output_dir
-  )
-  report_name <-  tools::file_path_sans_ext(gene_file)
-  report_fp <- paste0(report_name, "_scflow_ipa_report")
-  report_impacted_pathway(
-    res = enrichment_result,
-    report_folder_path = report_dir,
-    report_file = report_fp
-  )
-  cli::cli_text(c(
-    "{cli::col_green(symbol$tick)} Analysis complete, output is found at: ",
-    "{.file {output_dir}}"
-  ))
-}
+  
+  dt <- read.delim(gene_file)
+  
+  dt <- dt %>%
+    dplyr::filter(padj <= args$pval_cutoff, 
+                  abs(logFC) >= log2(args$fc_threshold))
+  
+  if (nrow(dt) < 5 ) {
+    cli::cli_alert_danger("Gene list is very short!")
+  } else {
+    
+    enrichment_result <- find_impacted_pathways(
+      gene_file = dt,
+      reference_file = NULL,
+      organism = getOption("scflow_species"),
+      enrichment_tool = args$enrichment_tool,
+      enrichment_method = args$enrichment_method,
+      enrichment_database = args$enrichment_database,
+      is_output = TRUE,
+      output_dir = output_dir
+    )
+    
+    if (all(unlist(lapply(
+      enrichment_result, function(dt){
+        isFALSE(dt$metadata$result)})))) {
+      cli::cli_alert_danger("No significant pathway was found at FDR 0.05")
+    } else {
+      
+      report_name <-  tools::file_path_sans_ext(gene_file)
+      report_fp <- paste0(report_name, "_scflow_ipa_report")
+      
+      report_impacted_pathway(
+        res = enrichment_result,
+        report_folder_path = report_dir,
+        report_file = report_fp
+      )
+      
+      cli::cli_text(c(
+        "{cli::col_green(symbol$tick)} Analysis complete, output is found at: ",
+        "{.file {output_dir}}"
+      ))
+    }
+  }
+}
\ No newline at end of file
diff --git a/bin/scflow_qc.r b/bin/scflow_qc.r
index 726a29a..6fff875 100755
--- a/bin/scflow_qc.r
+++ b/bin/scflow_qc.r
@@ -422,6 +422,11 @@ if (args$find_singlets) {
   )
 }
 
+
+sce <- sce[ , sce$total_counts >= args$min_library_size]
+sce <- sce[ , sce$total_features_by_counts >= args$min_features]
+
+
 dir.create(file.path(getwd(), "qc_report"))
 
 report_qc_sce(
diff --git a/conf/modules.config b/conf/modules.config
index 92362c1..ce287c0 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -70,8 +70,10 @@ params {
         }
 
         'scflow_reportintegrated' {
-            publish_dir = 'integration'
-            publish_files = ['integration_report':'../reports']
+            publish_dir = 'reports'
+            publish_files = [
+            'integration_report':''
+            ]
         }
 
         'scflow_mapcelltypes' {
@@ -112,9 +114,9 @@ params {
         }
 
         'scflow_dirichlet' {
-            publish_dir   = 'dirichlet'
+            publish_dir   = 'reports'
             publish_files = [
-                'dirichlet_report':'../reports'
+                'dirichlet_report':''
             ]
         }
 
diff --git a/conf/scflow_analysis.config b/conf/scflow_analysis.config
index 88ac706..6077605 100644
--- a/conf/scflow_analysis.config
+++ b/conf/scflow_analysis.config
@@ -14,13 +14,13 @@ params {
     qc_max_ribo = 1
     qc_min_counts = 2
     qc_min_cells = 2
-    qc_drop_unmapped = true
-    qc_drop_mito = true
-    qc_drop_ribo = true
+    qc_drop_unmapped = 'true'
+    qc_drop_mito = 'true'
+    qc_drop_ribo = 'true'
     qc_nmads = 4.0
 
     // Options: Ambient RNA Profiling
-    amb_find_cells = false
+    amb_find_cells = 'false'
     amb_lower = 100
     amb_retain = 'auto' // if numeric, pass as string
     amb_alpha_cutoff = 0.001
@@ -28,7 +28,7 @@ params {
     amb_expect_cells = 3000
 
     // Options: Multiplet Identification
-    mult_find_singlets = false
+    mult_find_singlets = 'false'
     mult_singlets_method = 'doubletfinder'
     mult_vars_to_regress_out = 'nCount_RNA,pc_mito' // *
     mult_pca_dims = 10
@@ -39,32 +39,25 @@ params {
 
     // Options: Integration
     integ_method = 'Liger'
+    integ_k = 30
     integ_unique_id_var = 'manifest'
-    integ_take_gene_union = false
-    integ_remove_missing = true
+    integ_take_gene_union = 'false'
+    integ_remove_missing = 'true'
     integ_num_genes = 3000
     integ_combine = 'union'
-    integ_keep_unique = false
-    integ_capitalize = false
-    integ_use_cols = true
-    integ_k = 30
+    integ_capitalize = 'false'
+    integ_use_cols = 'true'
     integ_lambda = 5.0
     integ_thresh = 0.0001
     integ_max_iters = 100
     integ_nrep = 1
     integ_rand_seed = 1
-    integ_knn_k = 20
-    integ_k2 = 500
-    integ_prune_thresh = 0.2
-    integ_ref_dataset = null
-    integ_min_cells = 2
     integ_quantiles = 50
-    integ_nstart = 10
+    integ_ref_dataset = 'NULL'
+    integ_min_cells = 2
+    integ_knn_k = 20
+    integ_center = 'false'
     integ_resolution = 1
-    integ_dims_use = null
-    integ_dist_use = 'CR'
-    integ_center = false
-    integ_small_clust_thresh = 0
 
     // Options: Integration report
     integ_categorical_covariates = 'manifest,diagnosis,sex' // *
@@ -72,7 +65,7 @@ params {
 
     // Options: Merge
     merge_plot_vars = 'total_features_by_counts,total_counts,pc_mito,pc_ribo'
-    merge_facet_vars = null // *
+    merge_facet_vars = 'null' // *
     merge_outlier_vars = 'total_features_by_counts,total_counts' // *
 
     // Options: Dimensionality Reduction
@@ -93,7 +86,7 @@ params {
     reddim_umap_local_connectivity = 1
     reddim_umap_repulsion_strength = 1
     reddim_umap_negative_sample_rate = 5
-    reddim_umap_fast_sgd = false
+    reddim_umap_fast_sgd = 'false'
     // tsne
     reddim_tsne_dims = 2
     reddim_tsne_initial_dims = 50
@@ -102,9 +95,9 @@ params {
     reddim_tsne_stop_lying_iter = 250
     reddim_tsne_mom_switch_iter = 250
     reddim_tsne_max_iter = 1000
-    reddim_tsne_pca_center = true
-    reddim_tsne_pca_scale = false
-    reddim_tsne_normalize = true
+    reddim_tsne_pca_center = 'true'
+    reddim_tsne_pca_scale = 'false'
+    reddim_tsne_normalize = 'true'
     reddim_tsne_momentum = 0.5
     reddim_tsne_final_momentum = 0.8
     reddim_tsne_eta = 1000
@@ -133,18 +126,19 @@ params {
     dge_mast_method = 'bayesglm'
     dge_min_counts = 1
     dge_min_cells_pc = 0.1
-    dge_rescale_numerics = true
-    dge_pseudobulk = false
+    dge_rescale_numerics = 'true'
+    dge_pseudobulk = 'false'
     dge_celltype_var = 'cluster_celltype'
     dge_sample_var = 'manifest'
     dge_dependent_var = 'diagnosis'
     dge_ref_class = 'Control'
     dge_confounding_vars = 'cngeneson' // *
-    dge_random_effects_var = null
+    dge_random_effects_var = 'null'
     dge_fc_threshold = 1.1
     dge_pval_cutoff = 0.05
-    dge_force_run = false
-    dge_max_cores = null
+    dge_n_label = 5
+    dge_force_run = 'false'
+    dge_max_cores = 'null'
 
     // Options: Integrated Pathway Analysis
     ipa_enrichment_tool = 'WebGestaltR'
@@ -156,7 +150,7 @@ params {
     dirich_celltype_var = 'cluster_celltype'
     dirich_dependent_var = 'diagnosis'
     dirich_ref_class = 'Control'
-    dirich_var_order = null // *
+    dirich_var_order = 'null' // *
 
     // Options: Plots (Reduced Dim)
     plotreddim_reduction_methods = 'UMAP_Liger' // *
@@ -165,4 +159,5 @@ params {
 
     // Misc
     species = 'human'
+    max_cores = 'null'
 }
diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf
index 7c83440..1e70bdc 100644
--- a/modules/local/get_software_versions.nf
+++ b/modules/local/get_software_versions.nf
@@ -7,13 +7,14 @@ process GET_SOFTWARE_VERSIONS {
     publishDir "${params.outdir}",
         mode: params.publish_dir_mode,
         saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'pipeline_info', meta:[:], publish_by_meta:[]) }
-
-    tag 'Version Info'
-    label 'process_tiny'
+    
+    tag "Version Info"
+    label 'process_low'
     //cache false
+    
 
     output:
-    path 'software_versions.tsv'     , emit: tsv
+    path "software_versions.tsv"     , emit: tsv
 
     script: // This script is bundled with the pipeline, in nf-core/scflow/bin/
     """
@@ -21,4 +22,4 @@ process GET_SOFTWARE_VERSIONS {
     echo $workflow.nextflow.version > nextflow.version.txt
     scrape_software_versions.r software_versions.tsv
     """
-}
+}
\ No newline at end of file
diff --git a/modules/local/process/scflow/dge.nf b/modules/local/process/scflow/dge.nf
index 7459e3b..c866516 100644
--- a/modules/local/process/scflow/dge.nf
+++ b/modules/local/process/scflow/dge.nf
@@ -1,5 +1,5 @@
 /*
- * Generate 2D reduced dimension plots of gene expression
+ * Run differential gene expression analysis
  */
 
 // Import generic module functions
diff --git a/modules/local/process/scflow/ipa.nf b/modules/local/process/scflow/ipa.nf
index 79c1169..42791b2 100644
--- a/modules/local/process/scflow/ipa.nf
+++ b/modules/local/process/scflow/ipa.nf
@@ -1,5 +1,5 @@
 /*
- * Integrated pathway analysis of differentially expressed genes
+ * Impacted pathway analysis of differentially expressed genes
  */
 
 // Import generic module functions
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 35c68a1..37e35c9 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -55,11 +55,16 @@
                     "description": "Input sample species.",
                     "help_text": "Currently, \"human\" and \"mouse\" are supported."
                 },
+                "max_cores": {
+                    "type": "string",
+                    "default": "'null'",
+                    "description": "Maximum CPU cores.",
+                    "help_text": "The default value of 'null' utilizes all available CPU cores.  Manually overriding this parameter can reduce the memory demands of parallelization across multiple cores."
+                },
                 "outdir": {
                     "type": "string",
                     "default": "./results",
-                    "description": "Outputs directory.",
-                    "fa_icon": "fas fa-folder-open"
+                    "description": "Outputs directory."
                 }
             },
             "required": [
@@ -67,7 +72,8 @@
                 "input",
                 "ensembl_mappings",
                 "ctd_path",
-                "species"
+                "species",
+                "max_cores"
             ],
             "help_text": ""
         },
@@ -88,7 +94,7 @@
                     "default": "seqdate",
                     "description": "The sample sheet variables to treat as factors.",
                     "help_text": "All sample sheet columns with numbers which should be treated as factors should be specified here separated by commas.   Examples include columns with dates, numeric sample identifiers, etc.",
-                    "fa_icon": "fas fa-layer-group"
+                    "fa_icon": "fas fa-quote-left"
                 },
                 "qc_min_library_size": {
                     "type": "integer",
@@ -123,7 +129,7 @@
                 },
                 "qc_max_ribo": {
                     "type": "number",
-                    "default": 1.0,
+                    "default": 1,
                     "description": "Maximum proportion of counts mapping to ribosomal genes.",
                     "fa_icon": "fas fa-less-than-equal",
                     "minimum": 0,
@@ -150,25 +156,26 @@
                     "fa_icon": "fas fa-greater-than-equal"
                 },
                 "qc_drop_unmapped": {
-                    "type": "boolean",
-                    "default": true,
+                    "type": "string",
+                    "default": "True",
                     "description": "Option to drop unmapped genes.",
                     "fa_icon": "fas fa-cut"
                 },
                 "qc_drop_mito": {
-                    "type": "boolean",
-                    "default": true,
+                    "type": "string",
+                    "default": "True",
                     "description": "Option to drop mitochondrial genes.",
                     "fa_icon": "fas fa-cut"
                 },
                 "qc_drop_ribo": {
-                    "type": "boolean",
+                    "type": "string",
                     "description": "Option to drop ribosomal genes.",
-                    "fa_icon": "fas fa-cut"
+                    "fa_icon": "fas fa-cut",
+                    "default": "false"
                 },
                 "qc_nmads": {
                     "type": "number",
-                    "default": 4.0,
+                    "default": 4,
                     "description": "The number of MADs for outlier detection.",
                     "help_text": "The number of median absolute deviations (MADs) used to define outliers for adaptive thresholding.",
                     "fa_icon": "fas fa-mountain"
@@ -199,8 +206,8 @@
             "default": "",
             "properties": {
                 "amb_find_cells": {
-                    "type": "boolean",
-                    "default": true,
+                    "type": "string",
+                    "default": "true",
                     "description": "Enable ambient RNA / empty droplet profiling.",
                     "fa_icon": "fas fa-cut"
                 },
@@ -257,40 +264,34 @@
             "default": "",
             "properties": {
                 "mult_find_singlets": {
-                    "type": "boolean",
-                    "default": true,
-                    "description": "Enable doublet/multiplet identification.",
-                    "fa_icon": "fas fa-cut"
+                    "type": "string",
+                    "default": "true",
+                    "description": "Enable doublet/multiplet identification."
                 },
                 "mult_singlets_method": {
                     "type": "string",
                     "default": "doubletfinder",
-                    "description": "Algorithm to use for doublet/multiplet identification.",
-                    "fa_icon": "fas fa-toolbox"
+                    "description": "Algorithm to use for doublet/multiplet identification."
                 },
                 "mult_vars_to_regress_out": {
                     "type": "string",
                     "default": "nCount_RNA,pc_mito",
-                    "description": "Variables to regress out for dimensionality reduction.",
-                    "fa_icon": "fas fa-layer-group"
+                    "description": "Variables to regress out for dimensionality reduction."
                 },
                 "mult_pca_dims": {
                     "type": "integer",
                     "default": 10,
-                    "description": "Number of PCA dimensions to use.",
-                    "fa_icon": "fas fa-calculator"
+                    "description": "Number of PCA dimensions to use."
                 },
                 "mult_var_features": {
                     "type": "integer",
                     "default": 2000,
-                    "description": "The top n most variable features to use.",
-                    "fa_icon": "fas fa-calculator"
+                    "description": "The top n most variable features to use."
                 },
                 "mult_doublet_rate": {
                     "type": "number",
                     "description": "A fixed doublet rate.",
-                    "help_text": "Use a fixed default rate (e.g. 0.075 to specify that 7.5% of all cells should be marked as doublets), or set to 0 to use the \"dpk\" method (recommended).",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "Use a fixed default rate (e.g. 0.075 to specify that 7.5% of all cells should be marked as doublets), or set to 0 to use the \"dpk\" method (recommended)."
                 },
                 "mult_dpk": {
                     "type": "integer",
@@ -298,15 +299,13 @@
                     "description": "Doublets per thousand cells increment.",
                     "help_text": "The doublets per thousand cell increment specifies the expected doublet rate based on the number of cells, i.e. with a dpk of 8 (recommended by 10X), a dataset with 1000 cells is expected to contain 8 doublets per thousand cells, a dataset with 2000 cells is expected to contain 16 doublets per thousand cells, and a dataset with 10000 cells is expected to contain 80 cells per thousand cells (or 800 doublets in total).  If the \"doublet_rate\" parameter is manually specified this recommended incremental behaviour is overridden.",
                     "minimum": 0,
-                    "maximum": 1000,
-                    "fa_icon": "fas fa-calculator"
+                    "maximum": 1000
                 },
                 "mult_pK": {
                     "type": "number",
                     "default": 0.02,
                     "description": "Specify a pK value instead of parameter sweep.",
-                    "help_text": "The optimal pK value used by the doubletFinder algorithm is determined following a compute-intensive parameter sweep.  The parameter sweep can be overridden by manually specifying a pK value.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "The optimal pK value used by the doubletFinder algorithm is determined following a compute-intensive parameter sweep.  The parameter sweep can be overridden by manually specifying a pK value."
                 }
             },
             "fa_icon": "fas fa-adjust",
@@ -330,26 +329,25 @@
                     "type": "string",
                     "default": "total_features_by_counts,total_counts,pc_mito,pc_ribo",
                     "description": "Numeric variables for inter-sample metrics.",
-                    "help_text": "A comma-separated list of numeric variables which differ between individual cells of each sample.  The merged sample report will include plots facilitating between-sample comparisons for each of these numeric variables.",
-                    "fa_icon": "fas fa-layer-group"
+                    "help_text": "A comma-separated list of numeric variables which differ between individual cells of each sample.  The merged sample report will include plots facilitating between-sample comparisons for each of these numeric variables."
                 },
                 "merge_facet_vars": {
                     "type": "string",
+                    "default": "NULL",
                     "description": "Categorical variables for further sub-setting of plots",
-                    "help_text": "A comma-separated list of categorical variables.  The merged sample report will include additional plots of sample metrics subset by each of these variables (e.g. sex, diagnosis).",
-                    "fa_icon": "fas fa-layer-group"
+                    "help_text": "A comma-separated list of categorical variables.  The merged sample report will include additional plots of sample metrics subset by each of these variables (e.g. sex, diagnosis)."
                 },
                 "merge_outlier_vars": {
                     "type": "string",
                     "default": "total_features_by_counts,total_counts",
                     "description": "Numeric variables for outlier identification.",
-                    "help_text": "The merged report will include tables highlighting samples that are putative outliers for each of these numeric variables.",
-                    "fa_icon": "fas fa-layer-group"
+                    "help_text": "The merged report will include tables highlighting samples that are putative outliers for each of these numeric variables."
                 }
             },
             "fa_icon": "fas fa-object-ungroup",
             "required": [
                 "merge_plot_vars",
+                "merge_facet_vars",
                 "merge_outlier_vars"
             ]
         },
@@ -362,223 +360,155 @@
                 "integ_method": {
                     "type": "string",
                     "default": "Liger",
-                    "description": "Choice of integration method.",
-                    "fa_icon": "fas fa-toolbox"
+                    "description": "Choice of integration method."
+                },
+                "integ_k": {
+                    "type": "integer",
+                    "default": 30,
+                    "description": "Inner dimension of factorization (n factors).",
+                    "help_text": "See rliger::optimizeALS().  Inner dimension of factorization (number of factors). Run suggestK to determine appropriate value; a general rule of thumb is that a higher k will be needed for datasets with more sub-structure."
                 },
                 "integ_unique_id_var": {
                     "type": "string",
                     "default": "manifest",
-                    "description": "Unique sample identifier variable.",
-                    "fa_icon": "fas fa-key"
+                    "description": "Unique sample identifier variable."
                 },
                 "integ_take_gene_union": {
-                    "type": "boolean",
+                    "type": "string",
+                    "default": "false",
                     "description": "Fill out matrices with union of genes.",
-                    "help_text": "See rliger::createLiger().  Whether to fill out raw.data matrices with union of genes across all datasets (filling in 0 for missing data) (requires make.sparse = TRUE) (default FALSE).",
-                    "fa_icon": "fas fa-cut"
+                    "help_text": "See rliger::createLiger().  Whether to fill out raw.data matrices with union of genes across all datasets (filling in 0 for missing data) (requires make.sparse = TRUE) (default FALSE)."
                 },
                 "integ_remove_missing": {
-                    "type": "boolean",
-                    "default": true,
+                    "type": "string",
+                    "default": "true",
                     "description": "Remove non-expressing cells/genes.",
-                    "help_text": "See rliger::createLiger().  Whether to remove cells not expressing any measured genes, and genes not expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any dataset) (default TRUE).",
-                    "fa_icon": "fas fa-cut"
+                    "help_text": "See rliger::createLiger().  Whether to remove cells not expressing any measured genes, and genes not expressed in any cells (if take.gene.union = TRUE, removes only genes not expressed in any dataset) (default TRUE)."
                 },
                 "integ_num_genes": {
                     "type": "integer",
                     "default": 3000,
                     "description": "Number of genes to find for each dataset.",
-                    "help_text": "See rliger::selectGenes(). Number of genes to find for each dataset. Optimises the value of var.thresh for each dataset to get this number of genes.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See rliger::selectGenes(). Number of genes to find for each dataset. Optimises the value of var.thresh for each dataset to get this number of genes."
                 },
                 "integ_combine": {
                     "type": "string",
                     "default": "union",
                     "description": "How to combine variable genes across experiments.",
-                    "help_text": "See rliger::selectGenes().  Either \"union\" or \"intersection\".",
-                    "fa_icon": "fas fa-calculator"
-                },
-                "integ_keep_unique": {
-                    "type": "boolean",
-                    "description": "Keep unique genes.",
-                    "help_text": "See rliger::selectGenes().",
-                    "fa_icon": "fas fa-cut"
+                    "help_text": "See rliger::selectGenes().  Either \"union\" or \"intersection\"."
                 },
                 "integ_capitalize": {
-                    "type": "boolean",
+                    "type": "string",
+                    "default": "false",
                     "description": "Capitalize gene names to match homologous genes.",
-                    "help_text": "See rliger::selectGenes().",
-                    "fa_icon": "fab fa-adn"
+                    "help_text": "See rliger::selectGenes()."
                 },
                 "integ_use_cols": {
-                    "type": "boolean",
-                    "default": true,
+                    "type": "string",
+                    "default": "true",
                     "description": "Treat each column as a cell.",
-                    "help_text": "See rliger::removeMissingObs().",
-                    "fa_icon": "fas fa-columns"
-                },
-                "integ_k": {
-                    "type": "integer",
-                    "default": 30,
-                    "description": "Inner dimension of factorization (n factors).",
-                    "help_text": "See rliger::optimizeALS().  Inner dimension of factorization (number of factors). Run suggestK to determine appropriate value; a general rule of thumb is that a higher k will be needed for datasets with more sub-structure.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See rliger::removeMissingObs()."
                 },
                 "integ_lambda": {
                     "type": "number",
-                    "default": 5.0,
+                    "default": 5,
                     "description": "Regularization parameter.",
-                    "help_text": "See rliger::optimizeALS(). Regularization parameter. Larger values penalize dataset-specific effects more strongly (ie. alignment should increase as lambda increases). Run suggestLambda to determine most appropriate value for balancing dataset alignment and agreement (default 5.0).",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See rliger::optimizeALS(). Regularization parameter. Larger values penalize dataset-specific effects more strongly (ie. alignment should increase as lambda increases). Run suggestLambda to determine most appropriate value for balancing dataset alignment and agreement (default 5.0)."
                 },
                 "integ_thresh": {
                     "type": "number",
                     "default": 0.0001,
                     "description": "Convergence threshold.",
-                    "help_text": "See rliger::optimizeALS().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See rliger::optimizeALS()."
                 },
                 "integ_max_iters": {
                     "type": "integer",
                     "default": 100,
                     "description": "Maximum number of block coordinate descent iterations.",
-                    "help_text": "See rliger::optimizeALS().",
-                    "fa_icon": "fas fa-less-than-equal"
+                    "help_text": "See rliger::optimizeALS()."
                 },
                 "integ_nrep": {
                     "type": "integer",
                     "default": 1,
                     "description": "Number of restarts to perform.",
-                    "help_text": "See rliger::optimizeALS().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See rliger::optimizeALS()."
                 },
                 "integ_rand_seed": {
                     "type": "integer",
                     "default": 1,
-                    "description": "Random seed for reproducible results.",
-                    "fa_icon": "fas fa-calculator"
+                    "description": "Random seed for reproducible results."
                 },
-                "integ_knn_k": {
-                    "type": "integer",
-                    "default": 20,
-                    "description": "Number of neearest neighbours for within-dataset knn graph.",
-                    "help_text": "See rliger::quantile_norm().",
-                    "fa_icon": "fas fa-calculator"
-                },
-                "integ_k2": {
+                "integ_quantiles": {
                     "type": "integer",
-                    "default": 500,
-                    "description": "Horizon parameter for shared nearest factor graph.",
-                    "help_text": "See rliger::quantileAlignSNF().  Distances to all but the k2 nearest neighbors are set to 0 (cuts down on memory usage for very large graphs).",
-                    "fa_icon": "fas fa-calculator"
-                },
-                "integ_prune_thresh": {
-                    "type": "number",
-                    "default": 0.2,
-                    "description": "Minimum allowed edge weight.",
-                    "help_text": "See rliger::quantileAlignSNF().",
-                    "fa_icon": "fas fa-greater-than-equal"
+                    "default": 50,
+                    "description": "Number of quantiles to use for normalization.",
+                    "help_text": "See rliger::quantile_norm()."
                 },
                 "integ_ref_dataset": {
                     "type": "string",
+                    "default": "NULL",
                     "description": "Name of dataset to use as a reference.",
-                    "help_text": "See rliger::quantile_norm().  Name of dataset to use as a \"reference\" for normalization. By default, the dataset with the largest number of cells is used.",
-                    "fa_icon": "fas fa-quote-left"
+                    "help_text": "See rliger::quantile_norm().  Name of dataset to use as a \"reference\" for normalization. By default, the dataset with the largest number of cells is used."
                 },
                 "integ_min_cells": {
                     "type": "integer",
                     "default": 2,
                     "description": "Minimum number of cells to consider a cluster shared across datasets.",
-                    "help_text": "See rliger::quantile_norm().",
-                    "fa_icon": "fas fa-greater-than-equal"
+                    "help_text": "See rliger::quantile_norm()."
                 },
-                "integ_quantiles": {
+                "integ_knn_k": {
                     "type": "integer",
-                    "default": 50,
-                    "description": "Number of quantiles to use for normalization.",
-                    "help_text": "See rliger::quantile_norm().",
-                    "fa_icon": "fas fa-calculator"
+                    "default": 20,
+                    "description": "Number of neearest neighbours for within-dataset knn graph.",
+                    "help_text": "See rliger::quantile_norm()."
                 },
-                "integ_nstart": {
-                    "type": "integer",
-                    "default": 10,
-                    "description": "Number of times to perform Louvain community detection.",
-                    "help_text": "See rliger::quantileAlignSNF().  Number of times to perform Louvain community detection with different random starts (default 10).",
-                    "fa_icon": "fas fa-recycle"
+                "integ_center": {
+                    "type": "string",
+                    "default": "false",
+                    "description": "Center the data when scaling factors.",
+                    "help_text": "See rliger::quantile_norm()."
                 },
                 "integ_resolution": {
                     "type": "integer",
                     "default": 1,
                     "description": "Controls the number of communities detected.",
-                    "help_text": "See rliger::quantileAlignSNF().",
-                    "fa_icon": "fas fa-calculator"
-                },
-                "integ_dims_use": {
-                    "type": "string",
-                    "description": "Indices of factors to use for shared nearest factor determination.",
-                    "help_text": "See rliger::quantile_norm().",
-                    "fa_icon": "fas fa-calculator"
-                },
-                "integ_dist_use": {
-                    "type": "string",
-                    "default": "CR",
-                    "description": "Distance metric to use in calculating nearest neighbour.",
-                    "help_text": "See rliger::quantileAlignSNF().  Default \"CR\".",
-                    "fa_icon": "fas fa-digital-tachograph"
-                },
-                "integ_center": {
-                    "type": "boolean",
-                    "description": "Center the data when scaling factors.",
-                    "help_text": "See rliger::quantile_norm().",
-                    "fa_icon": "fas fa-compress-arrows-alt"
-                },
-                "integ_small_clust_thresh": {
-                    "type": "integer",
-                    "help_text": "See rliger::quantileAlignSNF().  Extracts small clusters loading highly on single factor with fewer cells than this before regular alignment (default 0 \u2013 no small cluster extraction).",
-                    "description": "Small cluster extraction cells threshold.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See rliger::quantileAlignSNF()."
                 },
                 "integ_categorical_covariates": {
                     "type": "string",
                     "default": "individual,diagnosis,region,sex",
                     "description": "Categorical variables for integration report metrics.",
-                    "help_text": "The integration report will provide plots and integration metrics for these categorical variables.",
-                    "fa_icon": "fas fa-layer-group"
+                    "help_text": "The integration report will provide plots and integration metrics for these categorical variables."
                 },
                 "integ_input_reduced_dim": {
                     "type": "string",
                     "default": "UMAP",
                     "description": "Reduced dimension embedding for the integration report.",
-                    "help_text": "The integration report will provide with and without integration plots using this embedding.",
-                    "fa_icon": "fas fa-chess-board"
+                    "help_text": "The integration report will provide with and without integration plots using this embedding."
                 }
             },
             "fa_icon": "far fa-object-group",
             "required": [
                 "integ_method",
+                "integ_k",
                 "integ_unique_id_var",
                 "integ_take_gene_union",
                 "integ_remove_missing",
                 "integ_num_genes",
                 "integ_combine",
-                "integ_keep_unique",
                 "integ_capitalize",
                 "integ_use_cols",
-                "integ_k",
                 "integ_lambda",
                 "integ_thresh",
                 "integ_max_iters",
                 "integ_nrep",
                 "integ_rand_seed",
-                "integ_knn_k",
-                "integ_k2",
-                "integ_prune_thresh",
-                "integ_min_cells",
                 "integ_quantiles",
-                "integ_nstart",
-                "integ_resolution",
-                "integ_dist_use",
+                "integ_ref_dataset",
+                "integ_min_cells",
+                "integ_knn_k",
                 "integ_center",
+                "integ_resolution",
                 "integ_categorical_covariates",
                 "integ_input_reduced_dim"
             ]
@@ -592,42 +522,36 @@
                 "reddim_input_reduced_dim": {
                     "type": "string",
                     "default": "PCA,Liger",
-                    "description": "Input matrix for dimension reduction.",
-                    "fa_icon": "fas fa-chess-board"
+                    "description": "Input matrix for dimension reduction."
                 },
                 "reddim_reduction_methods": {
                     "type": "string",
                     "default": "tSNE,UMAP,UMAP3D",
                     "description": "Dimension reduction outputs to generate.",
-                    "help_text": "Typically 'UMAP,UMAP3D' or 'tSNE'.",
-                    "fa_icon": "fas fa-toolbox"
+                    "help_text": "Typically 'UMAP,UMAP3D' or 'tSNE'."
                 },
                 "reddim_vars_to_regress_out": {
                     "type": "string",
                     "default": "nCount_RNA,pc_mito",
-                    "description": "Variables to regress out before dimension reduction.",
-                    "fa_icon": "fas fa-layer-group"
+                    "description": "Variables to regress out before dimension reduction."
                 },
                 "reddim_umap_pca_dims": {
                     "type": "integer",
                     "default": 30,
                     "description": "Number of PCA dimensions.",
-                    "help_text": "See uwot::umap().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See uwot::umap()."
                 },
                 "reddim_umap_n_neighbors": {
                     "type": "integer",
                     "default": 35,
                     "description": "Number of nearest neighbours to use.",
-                    "help_text": "See uwot::umap().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See uwot::umap()."
                 },
                 "reddim_umap_n_components": {
                     "type": "integer",
                     "default": 2,
                     "description": "The dimension of the space to embed into.",
-                    "help_text": "See uwot::umap(). The dimension of the space to embed into. This defaults to 2 to provide easy visualization, but can reasonably be set to any integer value in the range 2 to 100.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See uwot::umap(). The dimension of the space to embed into. This defaults to 2 to provide easy visualization, but can reasonably be set to any integer value in the range 2 to 100."
                 },
                 "reddim_umap_init": {
                     "type": "string",
@@ -643,8 +567,7 @@
                         "pca",
                         "spca",
                         "agspectral"
-                    ],
-                    "fa_icon": "fas fa-calculator"
+                    ]
                 },
                 "reddim_umap_metric": {
                     "type": "string",
@@ -658,169 +581,147 @@
                         "hamming",
                         "correlation",
                         "categorical"
-                    ],
-                    "fa_icon": "fas fa-digital-tachograph"
+                    ]
                 },
                 "reddim_umap_n_epochs": {
                     "type": "integer",
                     "default": 200,
                     "description": "Number of epochs to us during optimization of embedded coordinates.",
-                    "help_text": "See uwot::umap().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See uwot::umap()."
                 },
                 "reddim_umap_learning_rate": {
                     "type": "integer",
                     "default": 1,
                     "description": "Initial learning rate used in optimization of coordinates.",
-                    "help_text": "See uwot::umap().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See uwot::umap()."
                 },
                 "reddim_umap_min_dist": {
                     "type": "number",
                     "default": 0.4,
                     "description": "Effective minimum distance between embedded points.",
-                    "help_text": "See uwot::umap().  Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out.",
-                    "fa_icon": "fas fa-greater-than-equal"
+                    "help_text": "See uwot::umap().  Smaller values will result in a more clustered/clumped embedding where nearby points on the manifold are drawn closer together, while larger values will result on a more even dispersal of points. The value should be set relative to the spread value, which determines the scale at which embedded points will be spread out."
                 },
                 "reddim_umap_spread": {
                     "type": "number",
                     "default": 0.85,
                     "description": "Effective scale of embedded points.",
-                    "help_text": "See uwot::umap().  In combination with min_dist, this determines how clustered/clumped the embedded points are.",
-                    "fa_icon": "fas fa-arrows-alt-h"
+                    "help_text": "See uwot::umap().  In combination with min_dist, this determines how clustered/clumped the embedded points are."
                 },
                 "reddim_umap_set_op_mix_ratio": {
                     "type": "number",
-                    "default": 1.0,
+                    "default": 1,
                     "description": "Interpolation to combine local fuzzy sets.",
                     "help_text": "See uwot::umap().  The value of this parameter should be between 0.0 and 1.0; a value of 1.0 will use a pure fuzzy union, while 0.0 will use a pure fuzzy intersection.",
                     "minimum": 0,
-                    "maximum": 1,
-                    "fa_icon": "fas fa-adjust"
+                    "maximum": 1
                 },
                 "reddim_umap_local_connectivity": {
                     "type": "integer",
                     "default": 1,
                     "description": "Local connectivity required.",
-                    "help_text": "See uwot::umap().  The local connectivity required \u2013 i.e. the number of nearest neighbors that should be assumed to be connected at a local level. The higher this value the more connected the manifold becomes locally.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See uwot::umap().  The local connectivity required \u2013 i.e. the number of nearest neighbors that should be assumed to be connected at a local level. The higher this value the more connected the manifold becomes locally."
                 },
                 "reddim_umap_repulsion_strength": {
                     "type": "integer",
                     "default": 1,
                     "description": "Weighting applied to negative samples in embedding optimization.",
-                    "help_text": "See uwot::umap().  Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See uwot::umap().  Weighting applied to negative samples in low dimensional embedding optimization. Values higher than one will result in greater weight being given to negative samples."
                 },
                 "reddim_umap_negative_sample_rate": {
                     "type": "integer",
                     "default": 5,
                     "description": "Number of negative edge samples to use per positive edge sample.",
-                    "help_text": "See uwot::umap().  The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See uwot::umap().  The number of negative edge/1-simplex samples to use per positive edge/1-simplex sample in optimizing the low dimensional embedding."
                 },
                 "reddim_umap_fast_sgd": {
-                    "type": "boolean",
+                    "type": "string",
+                    "default": "false",
                     "description": "Use fast SGD.",
-                    "help_text": "See uwot::umap().  Setting this to TRUE will speed up the stochastic optimization phase, but give a potentially less accurate embedding, and which will not be exactly reproducible even with a fixed seed. For visualization, fast_sgd = TRUE will give perfectly good results. For more generic dimensionality reduction, it's safer to leave fast_sgd = FALSE.",
-                    "fa_icon": "fas fa-skiing"
+                    "help_text": "See uwot::umap().  Setting this to TRUE will speed up the stochastic optimization phase, but give a potentially less accurate embedding, and which will not be exactly reproducible even with a fixed seed. For visualization, fast_sgd = TRUE will give perfectly good results. For more generic dimensionality reduction, it's safer to leave fast_sgd = FALSE."
                 },
                 "reddim_tsne_dims": {
                     "type": "integer",
                     "default": 2,
                     "description": "Output dimensionality.",
-                    "help_text": "See Rtsne::Rtsne().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne()."
                 },
                 "reddim_tsne_initial_dims": {
                     "type": "integer",
                     "default": 50,
                     "description": "Number of dimensions retained in the initial PCA step.",
-                    "help_text": "See Rtsne::Rtsne().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne()."
                 },
                 "reddim_tsne_perplexity": {
                     "type": "integer",
                     "default": 150,
                     "description": "Perplexity parameter.",
-                    "help_text": "See Rtsne::Rtsne().",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne()."
                 },
                 "reddim_tsne_theta": {
                     "type": "number",
                     "default": 0.5,
                     "description": "Speed/accuracy trade-off.",
-                    "help_text": "See Rtsne::Rtsne().  Speed/accuracy trade-off (increase for less accuracy), set to 0.0 for exact TSNE (default: 0.5).",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne().  Speed/accuracy trade-off (increase for less accuracy), set to 0.0 for exact TSNE (default: 0.5)."
                 },
                 "reddim_tsne_stop_lying_iter": {
                     "type": "integer",
                     "default": 250,
                     "description": "Iteration after which perplexities are no longer exaggerated.",
-                    "help_text": "See Rtsne::Rtsne().  Iteration after which the perplexities are no longer exaggerated (default: 250, except when Y_init is used, then 0).",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne().  Iteration after which the perplexities are no longer exaggerated (default: 250, except when Y_init is used, then 0)."
                 },
                 "reddim_tsne_mom_switch_iter": {
                     "type": "integer",
                     "default": 250,
                     "description": "Iteration after which the final momentum is used.",
-                    "help_text": "See Rtsne::Rtsne().  Iteration after which the final momentum is used (default: 250, except when Y_init is used, then 0).",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne().  Iteration after which the final momentum is used (default: 250, except when Y_init is used, then 0)."
                 },
                 "reddim_tsne_max_iter": {
                     "type": "integer",
                     "default": 1000,
                     "description": "Number of iterations.",
-                    "help_text": "See Rtsne::Rtsne(). ",
-                    "fa_icon": "fas fa-less-than-equal"
+                    "help_text": "See Rtsne::Rtsne(). "
                 },
                 "reddim_tsne_pca_center": {
-                    "type": "boolean",
-                    "default": true,
+                    "type": "string",
+                    "default": "true",
                     "description": "Center data before PCA.",
-                    "help_text": "See Rtsne::Rtsne(). Should data be centered before pca is applied? (default: TRUE)",
-                    "fa_icon": "fas fa-compress-arrows-alt"
+                    "help_text": "See Rtsne::Rtsne(). Should data be centered before pca is applied? (default: TRUE)"
                 },
                 "reddim_tsne_pca_scale": {
-                    "type": "boolean",
+                    "type": "string",
+                    "default": "false",
                     "description": "Scale data before PCA.",
-                    "help_text": "See Rtsne::Rtsne().  Should data be scaled before pca is applied? (default: FALSE).",
-                    "fa_icon": "fas fa-balance-scale"
+                    "help_text": "See Rtsne::Rtsne().  Should data be scaled before pca is applied? (default: FALSE)."
                 },
                 "reddim_tsne_normalize": {
-                    "type": "boolean",
-                    "default": true,
+                    "type": "string",
+                    "default": "true",
                     "description": "Normalize data before distance calculations.",
-                    "help_text": "See Rtsne::Rtsne(). Should data be normalized internally prior to distance calculations with normalize_input? (default: TRUE)",
-                    "fa_icon": "fas fa-balance-scale"
+                    "help_text": "See Rtsne::Rtsne(). Should data be normalized internally prior to distance calculations with normalize_input? (default: TRUE)"
                 },
                 "reddim_tsne_momentum": {
                     "type": "number",
                     "default": 0.5,
                     "description": "Momentum used in the first part of optimization.",
-                    "help_text": "See Rtsne::Rtsne(). ",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne(). "
                 },
                 "reddim_tsne_final_momentum": {
                     "type": "number",
                     "default": 0.8,
                     "description": "Momentum used in the final part of optimization.",
-                    "help_text": "See Rtsne::Rtsne(). ",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne(). "
                 },
                 "reddim_tsne_eta": {
                     "type": "integer",
                     "default": 1000,
                     "description": "Learning rate.",
-                    "help_text": "See Rtsne::Rtsne(). ",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne(). "
                 },
                 "reddim_tsne_exaggeration_factor": {
                     "type": "integer",
                     "default": 12,
                     "description": "Exaggeration factor used in the first part of the optimization.",
-                    "help_text": "See Rtsne::Rtsne().  Exaggeration factor used to multiply the P matrix in the first part of the optimization (default: 12.0).",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "See Rtsne::Rtsne().  Exaggeration factor used to multiply the P matrix in the first part of the optimization (default: 12.0)."
                 }
             },
             "fa_icon": "fas fa-cubes",
@@ -868,34 +769,29 @@
                     "type": "string",
                     "default": "leiden",
                     "description": "Clustering method.",
-                    "help_text": "Specify \"leiden\" or \"louvain\".",
-                    "fa_icon": "fas fa-toolbox"
+                    "help_text": "Specify \"leiden\" or \"louvain\"."
                 },
                 "clust_reduction_method": {
                     "type": "string",
                     "default": "UMAP_Liger",
                     "description": "Reduced dimension input(s) for clustering.",
-                    "help_text": "One or more of \"UMAP\", \"tSNE\", \"PCA\", \"LSI\".",
-                    "fa_icon": "fas fa-chess-board"
+                    "help_text": "One or more of \"UMAP\", \"tSNE\", \"PCA\", \"LSI\"."
                 },
                 "clust_res": {
                     "type": "number",
                     "default": 0.001,
-                    "description": "The resolution of clustering.",
-                    "fa_icon": "fas fa-calculator"
+                    "description": "The resolution of clustering."
                 },
                 "clust_k": {
                     "type": "integer",
                     "default": 50,
                     "description": "Integer number of nearest neighbours for clustering.",
-                    "help_text": "Integer number of nearest neighbors to use when creating the k nearest neighbor graph for Louvain/Leiden clustering. k is related to the resolution of the clustering result, a bigger k will result in lower resolution and vice versa.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "Integer number of nearest neighbors to use when creating the k nearest neighbor graph for Louvain/Leiden clustering. k is related to the resolution of the clustering result, a bigger k will result in lower resolution and vice versa."
                 },
                 "clust_louvain_iter": {
                     "type": "integer",
                     "default": 1,
-                    "description": "The number of iterations for clustering.",
-                    "fa_icon": "fas fa-recycle"
+                    "description": "The number of iterations for clustering."
                 }
             },
             "fa_icon": "fas fa-braille",
@@ -916,44 +812,37 @@
                 "cta_clusters_colname": {
                     "type": "string",
                     "default": "clusters",
-                    "description": "SingleCellExperiment clusters colData variable name.",
-                    "fa_icon": "fas fa-quote-left"
+                    "description": "SingleCellExperiment clusters colData variable name."
                 },
                 "cta_cells_to_sample": {
                     "type": "integer",
                     "default": 10000,
-                    "description": "Max cells to sample.",
-                    "fa_icon": "fas fa-calculator"
+                    "description": "Max cells to sample."
                 },
                 "cta_unique_id_var": {
                     "type": "string",
                     "default": "individual",
-                    "description": "A sample metadata unique sample ID.",
-                    "fa_icon": "fas fa-key"
+                    "description": "A sample metadata unique sample ID."
                 },
                 "cta_celltype_var": {
                     "type": "string",
                     "default": "cluster_celltype",
-                    "description": "SingleCellExperiment cell-type colData variable name.",
-                    "fa_icon": "fas fa-quote-left"
+                    "description": "SingleCellExperiment cell-type colData variable name."
                 },
                 "cta_facet_vars": {
                     "type": "string",
                     "default": "manifest,diagnosis,sex,capdate,prepdate,seqdate",
-                    "description": "Cell-type metrics for categorical variables.",
-                    "fa_icon": "fas fa-layer-group"
+                    "description": "Cell-type metrics for categorical variables."
                 },
                 "cta_metric_vars": {
                     "type": "string",
                     "default": "pc_mito,pc_ribo,total_counts,total_features_by_counts",
-                    "description": "Cell-type metrics for numeric variables.",
-                    "fa_icon": "fas fa-layer-group"
+                    "description": "Cell-type metrics for numeric variables."
                 },
                 "cta_top_n": {
                     "type": "integer",
                     "default": 5,
-                    "description": "Number of top marker genes for plot/table generation.",
-                    "fa_icon": "fas fa-calculator"
+                    "description": "Number of top marker genes for plot/table generation."
                 }
             },
             "fa_icon": "fas fa-brain",
@@ -976,8 +865,7 @@
                 "dge_de_method": {
                     "type": "string",
                     "default": "MASTZLM",
-                    "description": "Differential gene expression method.",
-                    "fa_icon": "fas fa-toolbox"
+                    "description": "Differential gene expression method."
                 },
                 "dge_mast_method": {
                     "type": "string",
@@ -988,15 +876,13 @@
                         "glm",
                         "glmer",
                         "bayesglm"
-                    ],
-                    "fa_icon": "fas fa-toolbox"
+                    ]
                 },
                 "dge_min_counts": {
                     "type": "integer",
                     "default": 1,
                     "description": "Expressive gene minimum counts.",
-                    "help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression.",
-                    "fa_icon": "fas fa-greater-than-equal"
+                    "help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression."
                 },
                 "dge_min_cells_pc": {
                     "type": "number",
@@ -1004,87 +890,83 @@
                     "minimum": 0,
                     "maximum": 1,
                     "description": "Expressive gene minimum cells fraction.",
-                    "help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression.  Default 0.1 (i.e. 10% of cells).",
-                    "fa_icon": "fas fa-greater-than-equal"
+                    "help_text": "Only genes with at least min_counts in min_cells_pc will be tested for differential gene expression.  Default 0.1 (i.e. 10% of cells)."
                 },
                 "dge_rescale_numerics": {
-                    "type": "boolean",
-                    "default": true,
+                    "type": "string",
+                    "default": "true",
                     "description": "Re-scale numeric covariates.",
-                    "help_text": "Re-scaling and centring numeric covariates in a model can improve model performance.",
-                    "fa_icon": "fas fa-balance-scale"
+                    "help_text": "Re-scaling and centring numeric covariates in a model can improve model performance."
                 },
                 "dge_pseudobulk": {
-                    "type": "boolean",
+                    "type": "string",
+                    "default": "false",
                     "description": "Pseudobulked differential gene expression.",
-                    "help_text": "Perform differential gene expression on a smaller matrix where counts are first summed across all cells within a sample (defined by dge_sample_var level).",
-                    "fa_icon": "far fa-object-group"
+                    "help_text": "Perform differential gene expression on a smaller matrix where counts are first summed across all cells within a sample (defined by dge_sample_var level)."
                 },
                 "dge_celltype_var": {
                     "type": "string",
                     "default": "cluster_celltype",
                     "description": "Cell-type annotation variable name.",
-                    "help_text": "Differential gene expression is performed separately for each cell-type of this colData variable.",
-                    "fa_icon": "fas fa-quote-left"
+                    "help_text": "Differential gene expression is performed separately for each cell-type of this colData variable."
                 },
                 "dge_sample_var": {
                     "type": "string",
                     "default": "manifest",
-                    "description": "Unique sample identifier variable.",
-                    "fa_icon": "fas fa-key"
+                    "description": "Unique sample identifier variable."
                 },
                 "dge_dependent_var": {
                     "type": "string",
                     "default": "group",
                     "description": "Dependent variable of DGE model.",
-                    "help_text": "The dependent variable may be a categorical (e.g. diagnosis) or a numeric (e.g. histopathology score) variable.",
-                    "fa_icon": "fas fa-quote-left"
+                    "help_text": "The dependent variable may be a categorical (e.g. diagnosis) or a numeric (e.g. histopathology score) variable."
                 },
                 "dge_ref_class": {
                     "type": "string",
                     "default": "Control",
                     "help_text": "If a categorical dependent variable is specified, then the reference class of the dependent variable is specified here (e.g. 'Control').",
-                    "description": "Reference class of categorical dependent variable.",
-                    "fa_icon": "fas fa-quote-left"
+                    "description": "Reference class of categorical dependent variable."
                 },
                 "dge_confounding_vars": {
                     "type": "string",
                     "default": "cngeneson,seqdate,pc_mito",
                     "description": "Confounding variables.",
-                    "help_text": "A comma-separated list of confounding variables to account for in the DGE model.",
-                    "fa_icon": "fas fa-layer-group"
+                    "help_text": "A comma-separated list of confounding variables to account for in the DGE model."
                 },
                 "dge_random_effects_var": {
                     "type": "string",
+                    "default": "NULL",
                     "description": "Random effect confounding variable.",
-                    "help_text": "If specified, the term `+ (1 | x ) +`is added to the model, where x is the specified random effects variable.",
-                    "fa_icon": "fas fa-quote-left"
+                    "help_text": "If specified, the term `+ (1 | x ) +`is added to the model, where x is the specified random effects variable."
                 },
                 "dge_fc_threshold": {
                     "type": "number",
                     "default": 1.1,
                     "description": "Fold-change threshold for plotting.",
-                    "help_text": "This absolute fold-change cut-off value is used in plots (e.g. volcano) and the DGE report.",
-                    "fa_icon": "fas fa-calculator"
+                    "help_text": "This absolute fold-change cut-off value is used in plots (e.g. volcano) and the DGE report."
                 },
                 "dge_pval_cutoff": {
                     "type": "number",
                     "default": 0.05,
                     "description": "Adjusted p-value cutoff.",
-                    "help_text": "The adjusted p-value cutoff value is used in plots (e.g. volcano) and the DGE report.",
-                    "fa_icon": "fas fa-less-than-equal"
+                    "help_text": "The adjusted p-value cutoff value is used in plots (e.g. volcano) and the DGE report."
+                },
+                "dge_n_label": {
+                    "type": "number",
+                    "default": 5,
+                    "help_text": "The number of genes to label in plots (e.g. volcano) and the DGE report."
                 },
                 "dge_force_run": {
-                    "type": "boolean",
+                    "type": "string",
+                    "default": "false",
                     "description": "Force model fit for non-full rank.",
-                    "help_text": "A non-full rank model specification will return an error; to override this to return a warning only, set to TRUE.",
-                    "fa_icon": "fas fa-exclamation"
+                    "help_text": "A non-full rank model specification will return an error; to override this to return a warning only, set to TRUE."
                 },
                 "dge_max_cores": {
-                    "type": "integer",
+                    "type": "string",
+                    "default": "'null'",
                     "description": "Maximum CPU cores.",
-                    "help_text": "The default value of 'null' utilizes all available CPU cores.  As each additional CPU core increases the number of genes simultaneously fit, the RAM/memory demand increases concomitantly.  Manually overriding this parameter can reduce the memory demands of parallelization across multiple cores.",
-                    "fa_icon": "fas fa-microchip"
+                    "help_text": "The default value of 'null' utilizes all available CPU cores.  As each additional CPU core increases the number of genes simultaneously fit, the RAM/memory demand increases concomitantly.  Manually overriding this parameter can reduce the memory demands of parallelization across multiple cores."
                 }
             },
             "fa_icon": "fas fa-chart-bar",
@@ -1100,9 +982,12 @@
                 "dge_dependent_var",
                 "dge_ref_class",
                 "dge_confounding_vars",
+                "dge_random_effects_var",
                 "dge_fc_threshold",
                 "dge_pval_cutoff",
-                "dge_force_run"
+                "dge_n_label",
+                "dge_force_run",
+                "dge_max_cores"
             ]
         },
         "impacted_pathway_analysis": {
@@ -1114,26 +999,18 @@
                 "ipa_enrichment_tool": {
                     "type": "string",
                     "default": "WebGestaltR",
-                    "description": "Pathway enrichment tool(s) to use.",
-                    "enum": [
-                        "WebGestaltR",
-                        "ROntoTools",
-                        "enrichR"
-                    ],
-                    "fa_icon": "fas fa-toolbox"
+                    "description": "Pathway enrichment tool(s) to use."
                 },
                 "ipa_enrichment_method": {
                     "type": "string",
                     "default": "ORA",
-                    "description": "Enrichment method.",
-                    "fa_icon": "fas fa-layer-group"
+                    "description": "Enrichment method."
                 },
                 "ipa_enrichment_database": {
                     "type": "string",
                     "default": "GO_Biological_Process",
                     "description": "Database(s) to use for enrichment.",
-                    "help_text": "See scFlow::list_databases().  Name of the database(s) for enrichment. Examples include \"GO_Biological_Process\", \"GO_Cellular_Component\", \"GO_Molecular_Function\", \"KEGG\", \"Reactome\", \"Wikipathway\".",
-                    "fa_icon": "fas fa-layer-group"
+                    "help_text": "See scFlow::list_databases().  Name of the database(s) for enrichment. Examples include \"GO_Biological_Process\", \"GO_Cellular_Component\", \"GO_Molecular_Function\", \"KEGG\", \"Reactome\", \"Wikipathway\"."
                 }
             },
             "fa_icon": "fas fa-project-diagram",
@@ -1152,33 +1029,28 @@
                 "dirich_unique_id_var": {
                     "type": "string",
                     "default": "individual",
-                    "description": "Unique sampler identifier.",
-                    "fa_icon": "fas fa-key"
+                    "description": "Unique sampler identifier."
                 },
                 "dirich_celltype_var": {
                     "type": "string",
                     "default": "cluster_celltype",
-                    "description": "Cell-type annotation variable name.",
-                    "fa_icon": "fas fa-quote-left"
+                    "description": "Cell-type annotation variable name."
                 },
                 "dirich_dependent_var": {
                     "type": "string",
                     "default": "group",
-                    "description": "Dependent variable of Dirichlet model.",
-                    "fa_icon": "fas fa-quote-left"
+                    "description": "Dependent variable of Dirichlet model."
                 },
                 "dirich_ref_class": {
                     "type": "string",
                     "default": "Control",
-                    "description": "Reference class of categorical dependent variable.",
-                    "fa_icon": "fas fa-quote-left"
+                    "description": "Reference class of categorical dependent variable."
                 },
                 "dirich_var_order": {
                     "type": "string",
                     "default": "Control,Low,High",
                     "description": "Dependent variable classes order.",
-                    "help_text": "For plotting and reports, the order of classes for the dependent variable can be manually specified (e.g. 'Control,Low,High').",
-                    "fa_icon": "fas fa-layer-group"
+                    "help_text": "For plotting and reports, the order of classes for the dependent variable can be manually specified (e.g. 'Control,Low,High')."
                 }
             },
             "fa_icon": "fas fa-chart-pie",
@@ -1401,6 +1273,11 @@
                     "description": "Send plain-text email instead of HTML.",
                     "hidden": true,
                     "fa_icon": "fas fa-envelope"
+                },
+                "options": {
+                    "type": "string",
+                    "description": "NA",
+                    "hidden": true
                 }
             }
         }
diff --git a/workflows/scflow.nf b/workflows/scflow.nf
index 9f27efd..d79ff84 100644
--- a/workflows/scflow.nf
+++ b/workflows/scflow.nf
@@ -94,32 +94,25 @@ scflow_merge_options.args            =
 def scflow_integrate_options         = modules['scflow_integrate']
 scflow_integrate_options.args        =
     "--method ${params.integ_method} \
+    --k ${params.integ_k} \
     --unique_id_var ${params.integ_unique_id_var} \
     --take_gene_union ${params.integ_take_gene_union} \
     --remove_missing ${params.integ_remove_missing} \
     --num_genes ${params.integ_num_genes} \
     --combine ${params.integ_combine} \
-    --keep_unique ${params.integ_keep_unique} \
     --capitalize ${params.integ_capitalize} \
     --use_cols ${params.integ_use_cols} \
-    --k ${params.integ_k} \
     --lambda ${params.integ_lambda} \
     --thresh ${params.integ_thresh} \
     --max_iters ${params.integ_max_iters} \
     --nrep ${params.integ_nrep} \
     --rand_seed ${params.integ_rand_seed} \
-    --knn_k ${params.integ_knn_k} \
-    --k2 ${params.integ_k2} \
-    --prune_thresh ${params.integ_prune_thresh} \
+    --quantiles ${params.integ_quantiles} \
     --ref_dataset ${params.integ_ref_dataset} \
     --min_cells ${params.integ_min_cells} \
-    --quantiles ${params.integ_quantiles} \
-    --nstart ${params.integ_nstart} \
-    --resolution ${params.integ_resolution} \
-    --dims_use ${params.integ_dims_use} \
-    --dist_use ${params.integ_dist_use} \
+    --knn_k ${params.integ_knn_k} \
     --center ${params.integ_center} \
-    --small_clust_thresh ${params.integ_small_clust_thresh}"
+    --resolution ${params.integ_resolution}"
 
 def scflow_reducedims_options        = modules['scflow_reducedims']
 scflow_reducedims_options.args       =
@@ -188,7 +181,8 @@ scflow_finalize_options.args         =
     --metric_vars ${params.cta_metric_vars} \
     --top_n ${params.cta_top_n} \
     --reddimplot_pointsize ${params.reddimplot_pointsize} \
-    --reddimplot_alpha ${params.reddimplot_alpha}"
+    --reddimplot_alpha ${params.reddimplot_alpha} \
+    --max_cores ${params.max_cores}"
 
 def scflow_dge_options               = modules['scflow_dge']
 scflow_dge_options.args              =
@@ -219,7 +213,10 @@ def scflow_ipa_options               = modules['scflow_ipa']
 scflow_ipa_options.args              =
     "--enrichment_tool ${params.ipa_enrichment_tool} \
     --enrichment_method ${params.ipa_enrichment_method} \
-    --enrichment_database ${params.ipa_enrichment_database}"
+    --enrichment_database ${params.ipa_enrichment_database} \
+    --pval_cutoff ${params.dge_pval_cutoff} \
+    --fc_threshold ${params.dge_fc_threshold} \
+    --species ${params.species}"
 
 def scflow_dirichlet_options         = modules['scflow_dirichlet']
 scflow_dirichlet_options.args =

From ed044bb0c609be2ddc4ef391b5a36852156e015c Mon Sep 17 00:00:00 2001
From: nfancy <n.naharfancy@yahoo.com>
Date: Mon, 11 Oct 2021 14:06:34 +0100
Subject: [PATCH 2/7] minor updates in cluster.nf

---
 bin/scflow_integrate.r                  | 2 +-
 modules/local/process/scflow/cluster.nf | 2 +-
 nextflow.config                         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/scflow_integrate.r b/bin/scflow_integrate.r
index 48ec53a..d0c2266 100755
--- a/bin/scflow_integrate.r
+++ b/bin/scflow_integrate.r
@@ -11,7 +11,7 @@ options(mc.cores = future::availableCores(methods = "mc.cores"))
 ## Load packages ####
 library(scFlow)
 library(argparse)
-#library(parallel)
+library(parallel)
 
 ## ............................................................................
 ## Parse command-line arguments ####
diff --git a/modules/local/process/scflow/cluster.nf b/modules/local/process/scflow/cluster.nf
index 9c2ab10..4f5f42e 100644
--- a/modules/local/process/scflow/cluster.nf
+++ b/modules/local/process/scflow/cluster.nf
@@ -10,7 +10,7 @@ def options    = initOptions(params.options)
 
 process SCFLOW_CLUSTER {
     tag 'MERGED'
-    label 'process_low'
+    label 'process_medium'
     publishDir "${params.outdir}",
         mode: params.publish_dir_mode,
         saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') }
diff --git a/nextflow.config b/nextflow.config
index 7de6214..39a402b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -18,7 +18,7 @@ manifest {
 
 // Container slug. Stable releases should specify release tag!
 // Developmental code should specify :dev
-process.container = 'almurphy/scfdev:dev'
+process.container = 'almurphy/scfdev:0.7.1'
 
 //workDir = "/rds/general/user/$USER/ephemeral/tmp"
 workDir = './work'

From 1b2fc11cd7c74c8c8cb241676ca7196500417695 Mon Sep 17 00:00:00 2001
From: nfancy <n.naharfancy@yahoo.com>
Date: Mon, 11 Oct 2021 15:15:12 +0100
Subject: [PATCH 3/7] lint checks

---
 bin/scflow_dge.r                       | 65 +++++++++++++++-----------
 bin/scflow_ipa.r                       | 29 ++++++------
 modules/local/get_software_versions.nf |  1 -
 3 files changed, 52 insertions(+), 43 deletions(-)

diff --git a/bin/scflow_dge.r b/bin/scflow_dge.r
index ef4d27e..b7114b5 100755
--- a/bin/scflow_dge.r
+++ b/bin/scflow_dge.r
@@ -185,9 +185,11 @@ options("scflow_species" = args$species)
 args$rescale_numerics <- as.logical(args$rescale_numerics)
 args$pseudobulk <- as.logical(args$pseudobulk)
 args$force_run <- as.logical(args$force_run)
-if(tolower(args$random_effects_var) == "null") args$random_effects_var <- NULL
+if (tolower(args$random_effects_var) == "null") args$random_effects_var <- NULL
 
-args$max_cores <- if(toupper(args$max_cores) == "NULL") NULL else { 
+args$max_cores <- if (toupper(args$max_cores) == "NULL") {
+  NULL
+} else {
   as.numeric(as.character(args$max_cores))
 }
 
@@ -210,7 +212,6 @@ cli::cli_alert(sprintf(
   n_cores
 ))
 
-# RhpcBLASctl::omp_set_num_threads(1L)
 
 library(scFlow)
 
@@ -230,7 +231,9 @@ if (args$pseudobulk) {
   pb_str <- "_pb"
   sce_subset <- pseudobulk_sce(
     sce_subset,
-    keep_vars = c(args$dependent_var, args$confounding_vars, args$random_effects_var),
+    keep_vars = c(args$dependent_var,
+                  args$confounding_vars, 
+                  args$random_effects_var),
     assay_name = "counts",
     celltype_var = args$celltype_var,
     sample_var = args$sample_var
@@ -257,42 +260,48 @@ de_results <- perform_de(
   species = getOption("scflow_species")
 )
 
-file_name <- paste0(args$celltype, "_",
-                    args$de_method, pb_str, "_")
+file_name <- paste0(
+  args$celltype, "_",
+  args$de_method, pb_str, "_"
+)
 
 for (result in names(de_results)) {
   if (dim(de_results[[result]])[[1]] > 0) {
     write.table(de_results[[result]],
-                file = file.path(getwd(), 
-                                 paste0(file_name, result, "_DE.tsv")),
-                quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
-    
+      file = file.path(
+        getwd(),
+        paste0(file_name, result, "_DE.tsv")
+      ),
+      quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE
+    )
+
     report_de(de_results[[result]],
-              fc_threshold = args$fc_threshold,
-              pval_cutoff = args$pval_cutoff,
-              n_label = args$n_label,
-              report_folder_path = file.path(getwd()),
-              report_file = paste0(file_name, result, "_scflow_de_report"))
-    
+      fc_threshold = args$fc_threshold,
+      pval_cutoff = args$pval_cutoff,
+      n_label = args$n_label,
+      report_folder_path = file.path(getwd()),
+      report_file = paste0(file_name, result, "_scflow_de_report")
+    )
+
     print("report generated")
-    
+
     p <- scFlow::volcano_plot(
       dt = de_results[[result]],
       fc_threshold = args$fc_threshold,
-      pval_cutoff =  args$pval_cutoff,
+      pval_cutoff = args$pval_cutoff,
       n_label = args$n_label
     )
-    ggplot2::ggsave(filename = file.path(getwd(),
-                                         paste0(file_name, result, "_volcano_plot.png")),
-                    plot = p,
-                    width = 7, height = 5, units = "in", dpi = 600)
-    
+    ggplot2::ggsave(
+      filename = file.path(
+        getwd(),
+        paste0(file_name, result, "_volcano_plot.png")
+      ),
+      plot = p,
+      width = 7, height = 5, units = "in", dpi = 600
+    )
+
     print("Volcano plot generated")
-  
-    
   } else {
     print(sprintf("No DE genes found for %s", result))
   }
-}
-
-
+}
\ No newline at end of file
diff --git a/bin/scflow_ipa.r b/bin/scflow_ipa.r
index 10f6335..3b18b5e 100755
--- a/bin/scflow_ipa.r
+++ b/bin/scflow_ipa.r
@@ -117,17 +117,17 @@ dir.create(output_dir)
 dir.create(report_dir)
 
 for (gene_file in args$gene_file) {
-  
   dt <- read.delim(gene_file)
-  
+
   dt <- dt %>%
-    dplyr::filter(padj <= args$pval_cutoff, 
-                  abs(logFC) >= log2(args$fc_threshold))
-  
-  if (nrow(dt) < 5 ) {
+    dplyr::filter(
+      padj <= args$pval_cutoff,
+      abs(logFC) >= log2(args$fc_threshold)
+    )
+
+  if (nrow(dt) < 5) {
     cli::cli_alert_danger("Gene list is very short!")
   } else {
-    
     enrichment_result <- find_impacted_pathways(
       gene_file = dt,
       reference_file = NULL,
@@ -138,22 +138,23 @@ for (gene_file in args$gene_file) {
       is_output = TRUE,
       output_dir = output_dir
     )
-    
+
     if (all(unlist(lapply(
-      enrichment_result, function(dt){
-        isFALSE(dt$metadata$result)})))) {
+      enrichment_result, function(dt) {
+        isFALSE(dt$metadata$result)
+      }
+    )))) {
       cli::cli_alert_danger("No significant pathway was found at FDR 0.05")
     } else {
-      
-      report_name <-  tools::file_path_sans_ext(gene_file)
+      report_name <- tools::file_path_sans_ext(gene_file)
       report_fp <- paste0(report_name, "_scflow_ipa_report")
-      
+
       report_impacted_pathway(
         res = enrichment_result,
         report_folder_path = report_dir,
         report_file = report_fp
       )
-      
+
       cli::cli_text(c(
         "{cli::col_green(symbol$tick)} Analysis complete, output is found at: ",
         "{.file {output_dir}}"
diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf
index 9acdf16..e910f09 100644
--- a/modules/local/get_software_versions.nf
+++ b/modules/local/get_software_versions.nf
@@ -12,7 +12,6 @@ process GET_SOFTWARE_VERSIONS {
     label 'process_tiny'
     errorStrategy 'ignore'
     //cache false
-    
 
     output:
     path "software_versions.tsv"     , emit: tsv

From 63028a1de9ff686d509de7a8044c6fd9f92dde7e Mon Sep 17 00:00:00 2001
From: nfancy <n.naharfancy@yahoo.com>
Date: Mon, 11 Oct 2021 15:24:47 +0100
Subject: [PATCH 4/7] EClint checks

---
 bin/scflow_dge.r          | 11 ++++--
 bin/scflow_finalize_sce.r | 79 ++++++++++++++++++++-------------------
 2 files changed, 47 insertions(+), 43 deletions(-)

diff --git a/bin/scflow_dge.r b/bin/scflow_dge.r
index b7114b5..d419394 100755
--- a/bin/scflow_dge.r
+++ b/bin/scflow_dge.r
@@ -231,9 +231,11 @@ if (args$pseudobulk) {
   pb_str <- "_pb"
   sce_subset <- pseudobulk_sce(
     sce_subset,
-    keep_vars = c(args$dependent_var,
-                  args$confounding_vars, 
-                  args$random_effects_var),
+    keep_vars = c(
+      args$dependent_var,
+      args$confounding_vars,
+      args$random_effects_var
+    ),
     assay_name = "counts",
     celltype_var = args$celltype_var,
     sample_var = args$sample_var
@@ -291,6 +293,7 @@ for (result in names(de_results)) {
       pval_cutoff = args$pval_cutoff,
       n_label = args$n_label
     )
+
     ggplot2::ggsave(
       filename = file.path(
         getwd(),
@@ -304,4 +307,4 @@ for (result in names(de_results)) {
   } else {
     print(sprintf("No DE genes found for %s", result))
   }
-}
\ No newline at end of file
+}
diff --git a/bin/scflow_finalize_sce.r b/bin/scflow_finalize_sce.r
index ffa1743..aed989e 100755
--- a/bin/scflow_finalize_sce.r
+++ b/bin/scflow_finalize_sce.r
@@ -24,42 +24,42 @@ optional <- parser$add_argument_group("Optional", "required arguments")
 required$add_argument(
   "--sce_path",
   help = "-path to the SingleCellExperiment",
-  metavar = "dir", 
+  metavar = "dir",
   required = TRUE
 )
 
 required$add_argument(
   "--celltype_mappings",
   help = "path to a tsv file with revised celltype mappings",
-  metavar = "foo/bar", 
+  metavar = "foo/bar",
   required = TRUE
 )
 
 required$add_argument(
   "--clusters_colname",
   help = "name of the column with cluster numbers",
-  metavar = "foo/bar", 
+  metavar = "foo/bar",
   required = TRUE
 )
 
 required$add_argument(
   "--celltype_var",
   help = "name of the column with celltype names",
-  metavar = "foo/bar", 
+  metavar = "foo/bar",
   required = TRUE
 )
 
 required$add_argument(
   "--unique_id_var",
   help = "name of the column with unique sample ids",
-  metavar = "foo/bar", 
+  metavar = "foo/bar",
   required = TRUE
 )
 
 required$add_argument(
   "--facet_vars",
   help = "names of variables to examine in the celltype metrics report",
-  metavar = "foo/bar", 
+  metavar = "foo/bar",
   required = TRUE
 )
 
@@ -67,14 +67,14 @@ required$add_argument(
 required$add_argument(
   "--input_reduced_dim",
   help = "name of the reduced dimension slot to use for plots in the report",
-  metavar = "foo/bar", 
+  metavar = "foo/bar",
   required = TRUE
 )
 
 required$add_argument(
   "--metric_vars",
   help = "names of variables to examine in the celltype metrics report",
-  metavar = "foo/bar", 
+  metavar = "foo/bar",
   required = TRUE
 )
 
@@ -83,7 +83,7 @@ required$add_argument(
   default = 5,
   type = "integer",
   required = TRUE,
-  help ="The number of top marker genes",
+  help = "The number of top marker genes",
   metavar = "N"
 )
 
@@ -123,7 +123,9 @@ args$metric_vars <- strsplit(args$metric_vars, ",")[[1]]
 options("scflow_reddimplot_pointsize" = args$reddimplot_pointsize)
 options("scflow_reddimplot_alpha" = args$reddimplot_alpha)
 
-args$max_cores <- if(toupper(args$max_cores) == "NULL") NULL else { 
+args$max_cores <- if (toupper(args$max_cores) == "NULL") {
+  NULL
+} else {
   as.numeric(as.character(args$max_cores))
 }
 
@@ -192,67 +194,66 @@ celltypes <- as.data.frame(SummarizedExperiment::colData(sce)) %>%
 colnames(celltypes) <- c("celltype", "n_cells")
 
 write.table(
-  data.frame(celltypes), 
-  file = "celltypes.tsv", 
-  row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
+  data.frame(celltypes),
+  file = "celltypes.tsv",
+  row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t"
+)
 
 ### Save Marker Gene Plots
 folder_path <- file.path(getwd(), "celltype_marker_plots")
 dir.create(folder_path)
 
 for (group in names(sce@metadata$markers)) {
-  
-  pwidth <- max(10,
-                length(unique(sce@metadata$markers[[group]]$marker_plot$data$Group))
+  pwidth <- max(
+    10,
+    length(unique(sce@metadata$markers[[group]]$marker_plot$data$Group))
   )
   pheight <- length(unique(sce@metadata$markers[[group]]$marker_plot$data$Gene))
-  
+
   p <- sce@metadata$markers[[group]]$marker_plot
-  
+
   plot_file_name <- paste0(group, "_markers")
-  
+
   # save PNG
-  png(file.path(folder_path, paste0(plot_file_name, ".png")), 
-      width = pwidth * 12, height = pheight*5, units = "mm", res = 600)
+  png(file.path(folder_path, paste0(plot_file_name, ".png")),
+    width = pwidth * 12, height = pheight * 5, units = "mm", res = 600
+  )
   print(p)
   dev.off()
-  
+
   # save PDF
   ggsave(
     file.path(folder_path, paste0(group, ".pdf")),
-    p, 
-    width = pwidth * 12, 
-    height = pheight * 5, 
-    units = "mm", 
+    p,
+    width = pwidth * 12,
+    height = pheight * 5,
+    units = "mm",
     scale = 1
   )
-  
 }
 
 ### Save Marker Gene Tables
 folder_path <- file.path(getwd(), "celltype_marker_tables")
 dir.create(folder_path)
 for (group in names(sce@metadata$markers)) {
-  
   marker_test_file_name <- paste0(group, "_markers_test.tsv")
   top_markers_file_name <- paste0(group, "_top_markers.tsv")
-  
+
   write.table(
-    sce@metadata$markers[[group]]$marker_test_res, 
-    file = file.path(folder_path, marker_test_file_name), 
-    row.names = FALSE, 
-    col.names = TRUE, 
+    sce@metadata$markers[[group]]$marker_test_res,
+    file = file.path(folder_path, marker_test_file_name),
+    row.names = FALSE,
+    col.names = TRUE,
     sep = "\t"
   )
-  
+
   write.table(
-    sce@metadata$markers[[group]]$top_specific_markers, 
-    file = file.path(folder_path, top_markers_file_name), 
-    row.names = FALSE, 
-    col.names = TRUE, 
+    sce@metadata$markers[[group]]$top_specific_markers,
+    file = file.path(folder_path, top_markers_file_name),
+    row.names = FALSE,
+    col.names = TRUE,
     sep = "\t"
   )
-  
 }
 
 

From 5d218aba1c5750525d83eaa080c0793023ffb471 Mon Sep 17 00:00:00 2001
From: nfancy <n.naharfancy@yahoo.com>
Date: Mon, 11 Oct 2021 15:28:36 +0100
Subject: [PATCH 5/7] EClint checks

---
 bin/scflow_finalize_sce.r              | 2 +-
 bin/scflow_ipa.r                       | 2 +-
 modules/local/get_software_versions.nf | 2 +-
 nextflow.config                        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/bin/scflow_finalize_sce.r b/bin/scflow_finalize_sce.r
index aed989e..d82cac2 100755
--- a/bin/scflow_finalize_sce.r
+++ b/bin/scflow_finalize_sce.r
@@ -264,4 +264,4 @@ write_sce(
 )
 
 ##  ............................................................................
-##  Clean up                                                                ####
+##  Clean up                                                                ####
\ No newline at end of file
diff --git a/bin/scflow_ipa.r b/bin/scflow_ipa.r
index 3b18b5e..5e09573 100755
--- a/bin/scflow_ipa.r
+++ b/bin/scflow_ipa.r
@@ -161,4 +161,4 @@ for (gene_file in args$gene_file) {
       ))
     }
   }
-}
\ No newline at end of file
+}
diff --git a/modules/local/get_software_versions.nf b/modules/local/get_software_versions.nf
index e910f09..76a74f5 100644
--- a/modules/local/get_software_versions.nf
+++ b/modules/local/get_software_versions.nf
@@ -22,4 +22,4 @@ process GET_SOFTWARE_VERSIONS {
     echo $workflow.nextflow.version > nextflow.version.txt
     scrape_software_versions.r software_versions.tsv
     """
-}
\ No newline at end of file
+}
diff --git a/nextflow.config b/nextflow.config
index db640b7..c90fd42 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -192,4 +192,4 @@ def check_max(obj, type) {
             return obj
         }
     }
-}
\ No newline at end of file
+}

From 08e388c3ab7958c0efb95028ce26a9f572712bbc Mon Sep 17 00:00:00 2001
From: nfancy <n.naharfancy@yahoo.com>
Date: Mon, 11 Oct 2021 15:29:33 +0100
Subject: [PATCH 6/7] EClint checks

---
 bin/scflow_finalize_sce.r | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/scflow_finalize_sce.r b/bin/scflow_finalize_sce.r
index d82cac2..aed989e 100755
--- a/bin/scflow_finalize_sce.r
+++ b/bin/scflow_finalize_sce.r
@@ -264,4 +264,4 @@ write_sce(
 )
 
 ##  ............................................................................
-##  Clean up                                                                ####
\ No newline at end of file
+##  Clean up                                                                ####

From 4eb8e301864ca436d5980d4ba6bd6883bad8770c Mon Sep 17 00:00:00 2001
From: nfancy <n.naharfancy@yahoo.com>
Date: Mon, 11 Oct 2021 15:30:35 +0100
Subject: [PATCH 7/7] EClint checks

---
 bin/scflow_finalize_sce.r | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bin/scflow_finalize_sce.r b/bin/scflow_finalize_sce.r
index aed989e..2b4e473 100755
--- a/bin/scflow_finalize_sce.r
+++ b/bin/scflow_finalize_sce.r
@@ -263,5 +263,3 @@ write_sce(
   folder_path = file.path(getwd(), "final_sce")
 )
 
-##  ............................................................................
-##  Clean up                                                                ####