diff --git a/bin/merge_sces.R b/bin/merge_sces.R index fbe876de..15bf4f38 100755 --- a/bin/merge_sces.R +++ b/bin/merge_sces.R @@ -223,8 +223,8 @@ possible_columns <- c( "altexps_cellhash_sum", "altexps_cellhash_detected", "altexps_cellhash_percent", - "hashedDrops_sampleID", - "HTODemux_sampleID", + "hashedDrops_sampleid", + "HTODemux_sampleid", "vireo_sampleid" ) diff --git a/merge.nf b/merge.nf index 3cf3e83a..012547fa 100644 --- a/merge.nf +++ b/merge.nf @@ -32,9 +32,9 @@ process merge_sce { label 'mem_32' publishDir "${params.results_dir}/${merge_group_id}/merged" input: - tuple val(merge_group_id), val(has_adt), val(multiplexed), val(library_ids), path(scpca_nf_file) + tuple val(merge_group_id), val(has_adt), val(library_ids), path(scpca_nf_file) output: - tuple path(merged_sce_file), val(merge_group_id), val(has_adt), val(multiplexed) + tuple path(merged_sce_file), val(merge_group_id), val(has_adt) script: input_library_ids = library_ids.join(',') input_sces = scpca_nf_file.join(',') @@ -46,7 +46,6 @@ process merge_sce { --output_sce_file "${merged_sce_file}" \ --n_hvg ${params.num_hvg} \ ${has_adt ? "--include_altexp" : ''} \ - ${multiplexed ? "--multiplexed" : '' } \ --threads ${task.cpus} """ stub: @@ -63,7 +62,7 @@ process generate_merge_report { publishDir "${params.results_dir}/${merge_group_id}/merged" label 'mem_16' input: - tuple path(merged_sce_file), val(merge_group_id), val(has_adt), val(multiplexed) + tuple path(merged_sce_file), val(merge_group_id), val(has_adt) path(report_template) output: path(merge_report) @@ -159,9 +158,23 @@ workflow { .collect{it.project_id} .map{it.unique()} - grouped_libraries_ch = libraries_ch + filtered_libraries_ch = libraries_ch // only include single-cell/single-nuclei which ensures we don't try to merge libraries from spatial or bulk data .filter{it.seq_unit in ['cell', 'nucleus']} + // remove any multiplexed projects + // future todo: only filter library ids that are multiplexed, but keep all other non-multiplexed libraries + .branch{ + multiplexed: it.project_id in multiplex_projects.getVal() + single_sample: true + } + + filtered_libraries_ch.multiplexed + .unique{ it.project_id } + .subscribe{ + log.warn("Not merging ${it.project_id} because it contains multiplexed libraries.") + } + + grouped_libraries_ch = filtered_libraries_ch.single_sample // create tuple of [project id, library_id, processed_sce_file] .map{[ it.project_id, @@ -178,7 +191,6 @@ workflow { .map{project_id, library_id_list, sce_file_list -> tuple( project_id, project_id in adt_projects.getVal(), // determines if altExp should be included in the merged object - project_id in multiplex_projects.getVal(), // determines if sample metadata should be added to colData and to skip anndata library_id_list, sce_file_list )} @@ -189,10 +201,5 @@ workflow { generate_merge_report(merge_sce.out, file(merge_template)) // export merged objects to AnnData - anndata_ch = merge_sce.out - .filter{!it[3]} // remove multiplexed samples before export - .map{it.take(3)} // keep everything but multiplexed logical - - - export_anndata(anndata_ch) + export_anndata(merge_sce.out) }