Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parameter for maximum number of libraries to allow when merging #777

Merged
merged 2 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,11 @@ workflow {
|| (it.scpca_sample_id in run_ids)
}
.map{[
seq_unit: it.seq_unit,
technology: it.technology,
project_id: it.scpca_project_id,
library_id: it.scpca_library_id,
sample_id: it.scpca_sample_id.split(";").sort().join(",")
sample_id: it.scpca_sample_id.split(";").sort().join(","),
seq_unit: it.seq_unit,
technology: it.technology
]}

// get all projects that contain at least one library with CITEseq
Expand All @@ -162,13 +162,23 @@ workflow {
.collect{it.project_id}
.map{it.unique()}

oversized_projects = libraries_ch
.map{[
it.project_id, // pull out project id for grouping
it
]}
.groupTuple(by: 0) // group by project id
.filter{it[1].size() > params.max_merge_libraries} // get projects with more samples than max merge
.collect{it[0]} // get project id

filtered_libraries_ch = libraries_ch
// only include single-cell/single-nuclei which ensures we don't try to merge libraries from spatial or bulk data
.filter{it.seq_unit in ['cell', 'nucleus']}
// remove any multiplexed projects
// remove any multiplexed projects or oversized projects
// future todo: only filter library ids that are multiplexed, but keep all other non-multiplexed libraries
.branch{
multiplexed: it.project_id in multiplex_projects.getVal()
oversized: it.project_id in oversized_projects.getVal()
single_sample: true
}

Expand All @@ -178,6 +188,12 @@ workflow {
log.warn("Not merging ${it.project_id} because it contains multiplexed libraries.")
}

filtered_libraries_ch.oversized
.unique{ it.project_id }
.subscribe{
log.warn("Not merging ${it.project_id} because it contains too many libraries.")
}

// print out warning message for any libraries not included in merging
filtered_libraries_ch.single_sample
.map{[
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ params {

// Merge workflow-specfic options
params.reuse_merge = false // if later steps fail, you can use `--reuse_merge` reuse the merged RDS object during a rerun

params.max_merge_libraries = 100 // maximum number of libraries that can be merged

// Docker container images
includeConfig 'config/containers.config'
Expand Down
Loading