Merge remote-tracking branch 'origin/development' into jashapiro/714-…

…nextflow-schema
AlexsLemonade · Mar 7, 2024 · 8ffeca3 · 8ffeca3
2 parents a53e7f6 + a4132cc
commit 8ffeca3
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 14 deletions.
diff --git a/bin/merge_sces.R b/bin/merge_sces.R
@@ -223,8 +223,8 @@ possible_columns <- c(
   "altexps_cellhash_sum",
   "altexps_cellhash_detected",
   "altexps_cellhash_percent",
-  "hashedDrops_sampleID",
-  "HTODemux_sampleID",
+  "hashedDrops_sampleid",
+  "HTODemux_sampleid",
   "vireo_sampleid"
 )
 

diff --git a/merge.nf b/merge.nf
@@ -32,9 +32,9 @@ process merge_sce {
   label 'mem_32'
   publishDir "${params.results_dir}/${merge_group_id}/merged"
   input:
-    tuple val(merge_group_id), val(has_adt), val(multiplexed), val(library_ids), path(scpca_nf_file)
+    tuple val(merge_group_id), val(has_adt), val(library_ids), path(scpca_nf_file)
   output:
-    tuple path(merged_sce_file), val(merge_group_id), val(has_adt), val(multiplexed)
+    tuple path(merged_sce_file), val(merge_group_id), val(has_adt)
   script:
     input_library_ids = library_ids.join(',')
     input_sces = scpca_nf_file.join(',')
@@ -46,7 +46,6 @@ process merge_sce {
       --output_sce_file "${merged_sce_file}" \
       --n_hvg ${params.num_hvg} \
       ${has_adt ? "--include_altexp" : ''} \
-      ${multiplexed ? "--multiplexed" : '' } \
       --threads ${task.cpus}
     """
   stub:
@@ -63,7 +62,7 @@ process generate_merge_report {
   publishDir "${params.results_dir}/${merge_group_id}/merged"
   label 'mem_16'
   input:
-    tuple path(merged_sce_file), val(merge_group_id), val(has_adt), val(multiplexed)
+    tuple path(merged_sce_file), val(merge_group_id), val(has_adt)
     path(report_template)
   output:
     path(merge_report)
@@ -159,9 +158,23 @@ workflow {
       .collect{it.project_id}
       .map{it.unique()}
 
-    grouped_libraries_ch = libraries_ch
+    filtered_libraries_ch = libraries_ch
       // only include single-cell/single-nuclei which ensures we don't try to merge libraries from spatial or bulk data
       .filter{it.seq_unit in ['cell', 'nucleus']}
+      // remove any multiplexed projects
+      // future todo: only filter library ids that are multiplexed, but keep all other non-multiplexed libraries
+      .branch{
+        multiplexed: it.project_id in multiplex_projects.getVal()
+        single_sample: true
+      }
+
+    filtered_libraries_ch.multiplexed
+      .unique{ it.project_id }
+      .subscribe{
+        log.warn("Not merging ${it.project_id} because it contains multiplexed libraries.")
+      }
+
+    grouped_libraries_ch = filtered_libraries_ch.single_sample
       // create tuple of [project id, library_id, processed_sce_file]
       .map{[
         it.project_id,
@@ -178,7 +191,6 @@ workflow {
       .map{project_id, library_id_list, sce_file_list -> tuple(
         project_id,
         project_id in adt_projects.getVal(), // determines if altExp should be included in the merged object
-        project_id in multiplex_projects.getVal(), // determines if sample metadata should be added to colData and to skip anndata
         library_id_list,
         sce_file_list
       )}
@@ -189,10 +201,5 @@ workflow {
     generate_merge_report(merge_sce.out, file(merge_template))
 
     // export merged objects to AnnData
-    anndata_ch = merge_sce.out
-      .filter{!it[3]} // remove multiplexed samples before export
-      .map{it.take(3)} // keep everything but multiplexed logical
-
-
-    export_anndata(anndata_ch)
+    export_anndata(merge_sce.out)
 }