Merge pull request #772 from AlexsLemonade/development

Sync changes from `development` into `main`
AlexsLemonade · Jul 16, 2024 · f215046 · f215046
2 parents a9dc826 + 574d4b9
commit f215046
Show file tree

Hide file tree

Showing 20 changed files with 216 additions and 59 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -32,28 +32,22 @@ repos:
         args: [--update-only, --title=**Table of Contents**]
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff for linting and formatting python
-    rev: v0.3.3
+    rev: v0.5.0
     hooks:
       # Run the linter.
       - id: ruff
-        # Run the formatter.
+      # Run the formatter.
       - id: ruff-format
   - repo: https://github.com/lorenzwalthert/precommit
     # R styling and linting
-    rev: v0.4.0
+    rev: v0.4.2
     hooks:
       - id: style-files
         args: [--style_pkg=styler, --style_fun=tidyverse_style]
       # - id: lintr #skip R linting for now...
       - id: parsable-R
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    # Format YAML and other languages
-    rev: v3.0.3
-    hooks:
-      - id: prettier
-        exclude: '\.md$'
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
       - id: check-added-large-files
         args: ["--maxkb=200"]

diff --git a/bin/generate_bulk_metadata.R b/bin/generate_bulk_metadata.R
@@ -89,21 +89,14 @@ bulk_metadata_df <- library_metadata |>
       scpca_project_id %in% opt$project_id
   ) |>
   dplyr::select(
-    scpca_sample_id, scpca_library_id, scpca_project_id,
-    technology, seq_unit
+    scpca_project_id, scpca_sample_id, scpca_library_id,
+    seq_unit, technology
   ) |>
   # rename column names to match format of metadata files from other modalities
   dplyr::rename(
+    project_id = scpca_project_id,
     sample_id = scpca_sample_id,
-    library_id = scpca_library_id,
-    project_id = scpca_project_id
-  ) |>
-  # add columns with processing information and date processed (same for all libraries )
-  dplyr::mutate(
-    genome_assembly = opt$genome_assembly,
-    workflow = opt$workflow_url,
-    workflow_version = opt$workflow_version,
-    workflow_commit = opt$workflow_commit
+    library_id = scpca_library_id
   )
 
 
@@ -128,10 +121,11 @@ get_processing_info <- function(library_id) {
 
   library_processing <- data.frame(
     library_id = library_id,
-    salmon_version = cmd_info$salmon_version,
-    mapping_index = cmd_info$index,
     total_reads = meta_info$num_processed,
     mapped_reads = meta_info$num_mapped,
+    genome_assembly = opt$genome_assembly,
+    mapping_index = cmd_info$index,
+    salmon_version = cmd_info$salmon_version,
     date_processed = lubridate::format_ISO8601(date_processed, usetz = TRUE)
   )
 
@@ -141,7 +135,13 @@ get_processing_info <- function(library_id) {
 bulk_processing_metadata <- purrr::map_dfr(library_ids, get_processing_info)
 
 bulk_metadata_df <- bulk_metadata_df |>
-  dplyr::left_join(bulk_processing_metadata, by = c("library_id"))
+  dplyr::left_join(bulk_processing_metadata, by = c("library_id")) |>
+  # add columns with processing information and date processed (same for all libraries )
+  dplyr::mutate(
+    workflow = opt$workflow_url,
+    workflow_version = opt$workflow_version,
+    workflow_commit = opt$workflow_commit
+  )
 
 # write out file
 readr::write_tsv(bulk_metadata_df, file = opt$metadata_output)
diff --git a/bin/move_counts_anndata.py b/bin/move_counts_anndata.py
@@ -49,6 +49,7 @@
     # move logcounts to X and rename
     object.X = object.layers["logcounts"]
     object.uns["X_name"] = "logcounts"
+    del object.layers["logcounts"]
 
     # export object
     object.write_h5ad(args.anndata_file, compression="gzip" if args.compress else None)
diff --git a/config/containers.config b/config/containers.config
@@ -1,5 +1,10 @@
 // Docker container images
-SCPCATOOLS_CONTAINER = 'ghcr.io/alexslemonade/scpca-tools:v0.3.2'
+SCPCATOOLS_CONTAINER = 'ghcr.io/alexslemonade/scpcatools:v0.4.0'
+SCPCATOOLS_SLIM_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-slim:v0.4.0'
+SCPCATOOLS_ANNDATA_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-anndata:v0.4.0'
+SCPCATOOLS_REPORTS_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-reports:v0.4.0'
+SCPCATOOLS_SEURAT_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-seurat:v0.4.0'
+SCPCATOOLS_SCVI_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-scvi:v0.4.0'
 
 ALEVINFRY_CONTAINER = 'quay.io/biocontainers/alevin-fry:0.7.0--h9f5acd7_1'
 BCFTOOLS_CONTAINER = 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0'
@@ -8,7 +13,7 @@ FASTP_CONTAINER = 'quay.io/biocontainers/fastp:0.23.0--h79da9fb_0'
 SALMON_CONTAINER = 'quay.io/biocontainers/salmon:1.9.0--h7e5ed60_1'
 SAMTOOLS_CONTAINER = 'quay.io/biocontainers/samtools:1.14--hb421002_0'
 STAR_CONTAINER = 'quay.io/biocontainers/star:2.7.9a--h9ee0642_0'
-TIDYVERSE_CONTAINER = 'rocker/tidyverse:4.3.1'
+TIDYVERSE_CONTAINER = 'rocker/tidyverse:4.4.0'
 VIREO_CONTAINER = 'ghcr.io/alexslemonade/vireo-snp:v0.5.7'
 
 // 10X software containers not set by default

diff --git a/config/profile_ccdl.config b/config/profile_ccdl.config
@@ -11,7 +11,9 @@ params {
   results_dir = "${params.outdir}/results"
 
   // a set of run_ids for testing. used only by the main workflow
-  run_ids = "SCPCR000001,SCPCS000101"
+  // one single-cell with bulk, one CITE, one spatial, one multiplexed
+  run_ids = "SCPCS000001,SCPCS000050,SCPCS000203,SCPCL000537"
+
   // include all runs in a merged project. used only by the merged workflow
   merge_run_ids = "All"
 

diff --git a/external-instructions.md b/external-instructions.md
@@ -86,12 +86,12 @@ Using the above command will run the workflow from the `main` branch of the work
 To update to the latest released version you can run `nextflow pull AlexsLemonade/scpca-nf` before the `nextflow run` command.
 
 To be sure that you are using a consistent version, you can specify use of a release tagged version of the workflow, set below with the `-r` flag.
-The command below will pull the `scpca-nf` workflow directly from Github using the `v0.8.1` version.
+The command below will pull the `scpca-nf` workflow directly from Github using the `v0.8.2` version.
 Released versions can be found on the [`scpca-nf` repository releases page](https://github.com/AlexsLemonade/scpca-nf/releases).
 
 ```sh
 nextflow run AlexsLemonade/scpca-nf \
-  -r v0.8.1 \
+  -r v0.8.2 \
   -config <path to config file>  \
   -profile <name of profile>
 ```
@@ -325,7 +325,7 @@ If you will be analyzing spatial expression data, you will also need the Cell Ra
 
 If your compute nodes do not have internet access, you will likely have to pre-pull the required container images as well.
 When doing this, it is important to be sure that you also specify the revision (version tag) of the `scpca-nf` workflow that you are using.
-For example, if you would run `nextflow run AlexsLemonade/scpca-nf -r v0.8.1`, then you will want to set `-r v0.8.1` for `get_refs.py` as well to be sure you have the correct containers.
+For example, if you would run `nextflow run AlexsLemonade/scpca-nf -r v0.8.2`, then you will want to set `-r v0.8.2` for `get_refs.py` as well to be sure you have the correct containers.
 By default, `get_refs.py` will download files and images associated with the latest release.
 
 If your system uses Docker, you can add the `--docker` flag:

diff --git a/internal-instructions.md b/internal-instructions.md
@@ -87,7 +87,7 @@ Please refer to our [`CONTRIBUTING.md`](CONTRIBUTING.md#stub-workflows) for more
 When running the workflow for a project or group of samples that is ready to be released on ScPCA portal, please use the tag for the latest release:
 
 ```
-nextflow run AlexsLemonade/scpca-nf -r v0.8.1 -profile ccdl,batch --project SCPCP000000
+nextflow run AlexsLemonade/scpca-nf -r v0.8.2 -profile ccdl,batch --project SCPCP000000
 ```
 
 ### Processing example data

diff --git a/merge.nf b/merge.nf
@@ -28,7 +28,7 @@ if (param_error) {
 
 // merge individual SCE objects into one SCE object
 process merge_sce {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SLIM_CONTAINER
   tag "${merge_group_id}"
   label 'mem_max'
   label 'long_running'
@@ -60,7 +60,7 @@ process merge_sce {
 
 // create merge report
 process generate_merge_report {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_REPORTS_CONTAINER
   tag "${merge_group_id}"
   publishDir "${params.results_dir}/${merge_group_id}/merged"
   label 'mem_max'
@@ -88,7 +88,7 @@ process generate_merge_report {
 }
 
 process export_anndata {
-    container params.SCPCATOOLS_CONTAINER
+    container params.SCPCATOOLS_ANNDATA_CONTAINER
     label 'mem_max'
     label 'long_running'
     tag "${merge_group_id}"

diff --git a/modules/bulk-salmon.nf b/modules/bulk-salmon.nf
@@ -66,9 +66,9 @@ process salmon {
 }
 
 process merge_bulk_quants {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SLIM_CONTAINER
   label 'mem_8'
-  publishDir "${params.results_dir}/${meta.project_id}", mode: 'copy'
+  publishDir "${params.results_dir}/${meta.project_id}/bulk", mode: 'copy'
   tag "${meta.project_id}"
   input:
     tuple val(meta), path(salmon_directories), path(t2g_bulk)

diff --git a/modules/classify-celltypes.nf b/modules/classify-celltypes.nf
@@ -44,7 +44,7 @@ process classify_singler {
 
 
 process classify_cellassign {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SCVI_CONTAINER
     publishDir (
       path: "${meta.celltype_checkpoints_dir}",
       mode: 'copy',
@@ -103,7 +103,7 @@ process classify_cellassign {
 }
 
 process add_celltypes_to_sce {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SLIM_CONTAINER
   label 'mem_4'
   label 'cpus_2'
   tag "${meta.library_id}"

diff --git a/modules/cluster-sce.nf b/modules/cluster-sce.nf
@@ -1,6 +1,6 @@
 // perform graph-based clustering on a processed SCE object
 process cluster_sce {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SLIM_CONTAINER
   label 'mem_8'
   tag "${meta.library_id}"
   input:

diff --git a/modules/export-anndata.nf b/modules/export-anndata.nf
@@ -1,7 +1,7 @@
 
 // process for converting rds files containing an SCE to h5 containing anndata containing the RNA data
 process export_anndata {
-    container params.SCPCATOOLS_CONTAINER
+    container params.SCPCATOOLS_ANNDATA_CONTAINER
     label 'mem_16'
     tag "${meta.library_id}"
     publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'

diff --git a/modules/qc-report.nf b/modules/qc-report.nf
@@ -2,7 +2,7 @@
 // generate QC report from unfiltered and filtered SCE.rds files using scpcaTools
 
 process sce_qc_report {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_REPORTS_CONTAINER
   label 'mem_16'
   tag "${meta.library_id}"
   publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'

diff --git a/modules/sce-processing.nf b/modules/sce-processing.nf
@@ -2,7 +2,7 @@
 
 // RNA only libraries
 process make_unfiltered_sce {
-    container params.SCPCATOOLS_CONTAINER
+    container params.SCPCATOOLS_SLIM_CONTAINER
     label 'mem_8'
     tag "${meta.library_id}"
     input:
@@ -48,7 +48,7 @@ process make_unfiltered_sce {
 process make_merged_unfiltered_sce {
     label 'mem_8'
     tag "${rna_meta.library_id}"
-    container params.SCPCATOOLS_CONTAINER
+    container params.SCPCATOOLS_SLIM_CONTAINER
     input:
         tuple val(feature_meta), path(feature_alevin_dir),
               val(rna_meta), path(alevin_dir),
@@ -105,7 +105,7 @@ process make_merged_unfiltered_sce {
 }
 
 process filter_sce {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SLIM_CONTAINER
   label 'mem_8'
   tag "${meta.library_id}"
   input:
@@ -138,7 +138,7 @@ process filter_sce {
 }
 
 process genetic_demux_sce {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SLIM_CONTAINER
   label 'mem_8'
   tag "${meta.library_id}"
   input:
@@ -164,7 +164,7 @@ process genetic_demux_sce {
 }
 
 process cellhash_demux_sce {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SEURAT_CONTAINER
   label 'mem_8'
   tag "${meta.library_id}"
   input:
@@ -192,7 +192,7 @@ process cellhash_demux_sce {
 }
 
 process post_process_sce {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SLIM_CONTAINER
   label 'mem_8'
   tag "${meta.library_id}"
   input:

diff --git a/modules/spaceranger.nf b/modules/spaceranger.nf
@@ -45,7 +45,7 @@ process spaceranger {
 }
 
 process spaceranger_publish {
-  container params.SCPCATOOLS_CONTAINER
+  container params.SCPCATOOLS_SLIM_CONTAINER
   tag "${meta.library_id}"
   publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
   input:

diff --git a/nextflow.config b/nextflow.config
@@ -5,7 +5,7 @@ manifest {
   homePage = 'https://github.com/AlexsLemonade/scpca-nf'
   mainScript = 'main.nf'
   defaultBranch = 'main'
-  version = 'v0.8.1'
+  version = 'v0.8.2'
 }
 
 // global parameters for workflows

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -289,7 +289,27 @@
       "properties": {
         "SCPCATOOLS_CONTAINER": {
           "type": "string",
-          "default": "ghcr.io/alexslemonade/scpca-tools:edge"
+          "default": "ghcr.io/alexslemonade/scpcatools:edge"
+        },
+        "SCPCATOOLS_SLIM_CONTAINER": {
+          "type": "string",
+          "default": "ghcr.io/alexslemonade/scpcatools-slim:edge"
+        },
+        "SCPCATOOLS_ANNDATA_CONTAINER": {
+          "type": "string",
+          "default": "ghcr.io/alexslemonade/scpcatools-anndata:edge"
+        },
+        "SCPCATOOLS_REPORTS_CONTAINER": {
+          "type": "string",
+          "default": "ghcr.io/alexslemonade/scpcatools-reports:edge"
+        },
+        "SCPCATOOLS_SEURAT_CONTAINER": {
+          "type": "string",
+          "default": "ghcr.io/alexslemonade/scpcatools-seurat:edge"
+        },
+        "SCPCATOOLS_SCVI_CONTAINER": {
+          "type": "string",
+          "default": "ghcr.io/alexslemonade/scpcatools-scvi:edge"
         },
         "ALEVINFRY_CONTAINER": {
           "type": "string",
@@ -325,7 +345,7 @@
         },
         "TIDYVERSE_CONTAINER": {
           "type": "string",
-          "default": "rocker/tidyverse:4.3.1"
+          "default": "rocker/tidyverse:4.4.0"
         },
         "VIREO_CONTAINER": {
           "type": "string",

diff --git a/templates/qc_report/celltypes_supplemental_report.rmd b/templates/qc_report/celltypes_supplemental_report.rmd
@@ -381,6 +381,34 @@ glue::glue("
 ```
 
 
+# Sample metadata 
+
+The below table summarizes clinical metadata for the sample associated with this library. 
+Blue hyperlinks are present for any terms with an ontology term identifier associated with the displayed human readable value. 
+These links will direct you to a web page with information about that ontology term identifier.   
+
+```{r}
+# extract sce metadata containing processing information as table
+processed_meta <- metadata(processed_sce)
+
+# if data is not multiplexed, print out sample metadata
+if (!has_multiplex) {
+  print_sample_metadata(processed_meta)
+} else {
+  # otherwise print out an info box that no sample metadata will be displayed
+  knitr::asis_output(
+    glue::glue("
+      <div class=\"alert alert-info\">
+
+      This library is multiplexed and contains data from more than one sample.
+      Demultiplexing has not been performed, so sample metadata will not be displayed.
+      </div>
+    ")
+  )
+}
+```
+
+
 <!------- Call the celltypes_qc report section from the main report ----------->
 ```{r, child='celltypes_qc.rmd'}
 ```