Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/development' into jashapiro/714-…
Browse files Browse the repository at this point in the history
…nextflow-schema
  • Loading branch information
jashapiro committed Mar 7, 2024
2 parents a53e7f6 + a4132cc commit 8ffeca3
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 14 deletions.
4 changes: 2 additions & 2 deletions bin/merge_sces.R
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,8 @@ possible_columns <- c(
"altexps_cellhash_sum",
"altexps_cellhash_detected",
"altexps_cellhash_percent",
"hashedDrops_sampleID",
"HTODemux_sampleID",
"hashedDrops_sampleid",
"HTODemux_sampleid",
"vireo_sampleid"
)

Expand Down
31 changes: 19 additions & 12 deletions merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ process merge_sce {
label 'mem_32'
publishDir "${params.results_dir}/${merge_group_id}/merged"
input:
tuple val(merge_group_id), val(has_adt), val(multiplexed), val(library_ids), path(scpca_nf_file)
tuple val(merge_group_id), val(has_adt), val(library_ids), path(scpca_nf_file)
output:
tuple path(merged_sce_file), val(merge_group_id), val(has_adt), val(multiplexed)
tuple path(merged_sce_file), val(merge_group_id), val(has_adt)
script:
input_library_ids = library_ids.join(',')
input_sces = scpca_nf_file.join(',')
Expand All @@ -46,7 +46,6 @@ process merge_sce {
--output_sce_file "${merged_sce_file}" \
--n_hvg ${params.num_hvg} \
${has_adt ? "--include_altexp" : ''} \
${multiplexed ? "--multiplexed" : '' } \
--threads ${task.cpus}
"""
stub:
Expand All @@ -63,7 +62,7 @@ process generate_merge_report {
publishDir "${params.results_dir}/${merge_group_id}/merged"
label 'mem_16'
input:
tuple path(merged_sce_file), val(merge_group_id), val(has_adt), val(multiplexed)
tuple path(merged_sce_file), val(merge_group_id), val(has_adt)
path(report_template)
output:
path(merge_report)
Expand Down Expand Up @@ -159,9 +158,23 @@ workflow {
.collect{it.project_id}
.map{it.unique()}

grouped_libraries_ch = libraries_ch
filtered_libraries_ch = libraries_ch
// only include single-cell/single-nuclei which ensures we don't try to merge libraries from spatial or bulk data
.filter{it.seq_unit in ['cell', 'nucleus']}
// remove any multiplexed projects
// future todo: only filter library ids that are multiplexed, but keep all other non-multiplexed libraries
.branch{
multiplexed: it.project_id in multiplex_projects.getVal()
single_sample: true
}

filtered_libraries_ch.multiplexed
.unique{ it.project_id }
.subscribe{
log.warn("Not merging ${it.project_id} because it contains multiplexed libraries.")
}

grouped_libraries_ch = filtered_libraries_ch.single_sample
// create tuple of [project id, library_id, processed_sce_file]
.map{[
it.project_id,
Expand All @@ -178,7 +191,6 @@ workflow {
.map{project_id, library_id_list, sce_file_list -> tuple(
project_id,
project_id in adt_projects.getVal(), // determines if altExp should be included in the merged object
project_id in multiplex_projects.getVal(), // determines if sample metadata should be added to colData and to skip anndata
library_id_list,
sce_file_list
)}
Expand All @@ -189,10 +201,5 @@ workflow {
generate_merge_report(merge_sce.out, file(merge_template))

// export merged objects to AnnData
anndata_ch = merge_sce.out
.filter{!it[3]} // remove multiplexed samples before export
.map{it.take(3)} // keep everything but multiplexed logical


export_anndata(anndata_ch)
export_anndata(merge_sce.out)
}

0 comments on commit 8ffeca3

Please sign in to comment.