Skip to content

Commit

Permalink
Merge pull request #772 from AlexsLemonade/development
Browse files Browse the repository at this point in the history
Sync changes from `development` into `main`
  • Loading branch information
allyhawkins authored Jul 16, 2024
2 parents a9dc826 + 574d4b9 commit f215046
Show file tree
Hide file tree
Showing 20 changed files with 216 additions and 59 deletions.
14 changes: 4 additions & 10 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,22 @@ repos:
args: [--update-only, --title=**Table of Contents**]
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff for linting and formatting python
rev: v0.3.3
rev: v0.5.0
hooks:
# Run the linter.
- id: ruff
# Run the formatter.
# Run the formatter.
- id: ruff-format
- repo: https://github.com/lorenzwalthert/precommit
# R styling and linting
rev: v0.4.0
rev: v0.4.2
hooks:
- id: style-files
args: [--style_pkg=styler, --style_fun=tidyverse_style]
# - id: lintr #skip R linting for now...
- id: parsable-R
- repo: https://github.com/pre-commit/mirrors-prettier
# Format YAML and other languages
rev: v3.0.3
hooks:
- id: prettier
exclude: '\.md$'
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v4.6.0
hooks:
- id: check-added-large-files
args: ["--maxkb=200"]
Expand Down
28 changes: 14 additions & 14 deletions bin/generate_bulk_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,14 @@ bulk_metadata_df <- library_metadata |>
scpca_project_id %in% opt$project_id
) |>
dplyr::select(
scpca_sample_id, scpca_library_id, scpca_project_id,
technology, seq_unit
scpca_project_id, scpca_sample_id, scpca_library_id,
seq_unit, technology
) |>
# rename column names to match format of metadata files from other modalities
dplyr::rename(
project_id = scpca_project_id,
sample_id = scpca_sample_id,
library_id = scpca_library_id,
project_id = scpca_project_id
) |>
# add columns with processing information and date processed (same for all libraries )
dplyr::mutate(
genome_assembly = opt$genome_assembly,
workflow = opt$workflow_url,
workflow_version = opt$workflow_version,
workflow_commit = opt$workflow_commit
library_id = scpca_library_id
)


Expand All @@ -128,10 +121,11 @@ get_processing_info <- function(library_id) {

library_processing <- data.frame(
library_id = library_id,
salmon_version = cmd_info$salmon_version,
mapping_index = cmd_info$index,
total_reads = meta_info$num_processed,
mapped_reads = meta_info$num_mapped,
genome_assembly = opt$genome_assembly,
mapping_index = cmd_info$index,
salmon_version = cmd_info$salmon_version,
date_processed = lubridate::format_ISO8601(date_processed, usetz = TRUE)
)

Expand All @@ -141,7 +135,13 @@ get_processing_info <- function(library_id) {
bulk_processing_metadata <- purrr::map_dfr(library_ids, get_processing_info)

bulk_metadata_df <- bulk_metadata_df |>
dplyr::left_join(bulk_processing_metadata, by = c("library_id"))
dplyr::left_join(bulk_processing_metadata, by = c("library_id")) |>
# add columns with processing information and date processed (same for all libraries )
dplyr::mutate(
workflow = opt$workflow_url,
workflow_version = opt$workflow_version,
workflow_commit = opt$workflow_commit
)

# write out file
readr::write_tsv(bulk_metadata_df, file = opt$metadata_output)
1 change: 1 addition & 0 deletions bin/move_counts_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
# move logcounts to X and rename
object.X = object.layers["logcounts"]
object.uns["X_name"] = "logcounts"
del object.layers["logcounts"]

# export object
object.write_h5ad(args.anndata_file, compression="gzip" if args.compress else None)
9 changes: 7 additions & 2 deletions config/containers.config
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
// Docker container images
SCPCATOOLS_CONTAINER = 'ghcr.io/alexslemonade/scpca-tools:v0.3.2'
SCPCATOOLS_CONTAINER = 'ghcr.io/alexslemonade/scpcatools:v0.4.0'
SCPCATOOLS_SLIM_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-slim:v0.4.0'
SCPCATOOLS_ANNDATA_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-anndata:v0.4.0'
SCPCATOOLS_REPORTS_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-reports:v0.4.0'
SCPCATOOLS_SEURAT_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-seurat:v0.4.0'
SCPCATOOLS_SCVI_CONTAINER = 'ghcr.io/alexslemonade/scpcatools-scvi:v0.4.0'

ALEVINFRY_CONTAINER = 'quay.io/biocontainers/alevin-fry:0.7.0--h9f5acd7_1'
BCFTOOLS_CONTAINER = 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0'
Expand All @@ -8,7 +13,7 @@ FASTP_CONTAINER = 'quay.io/biocontainers/fastp:0.23.0--h79da9fb_0'
SALMON_CONTAINER = 'quay.io/biocontainers/salmon:1.9.0--h7e5ed60_1'
SAMTOOLS_CONTAINER = 'quay.io/biocontainers/samtools:1.14--hb421002_0'
STAR_CONTAINER = 'quay.io/biocontainers/star:2.7.9a--h9ee0642_0'
TIDYVERSE_CONTAINER = 'rocker/tidyverse:4.3.1'
TIDYVERSE_CONTAINER = 'rocker/tidyverse:4.4.0'
VIREO_CONTAINER = 'ghcr.io/alexslemonade/vireo-snp:v0.5.7'

// 10X software containers not set by default
Expand Down
4 changes: 3 additions & 1 deletion config/profile_ccdl.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ params {
results_dir = "${params.outdir}/results"

// a set of run_ids for testing. used only by the main workflow
run_ids = "SCPCR000001,SCPCS000101"
// one single-cell with bulk, one CITE, one spatial, one multiplexed
run_ids = "SCPCS000001,SCPCS000050,SCPCS000203,SCPCL000537"

// include all runs in a merged project. used only by the merged workflow
merge_run_ids = "All"

Expand Down
6 changes: 3 additions & 3 deletions external-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,12 @@ Using the above command will run the workflow from the `main` branch of the work
To update to the latest released version you can run `nextflow pull AlexsLemonade/scpca-nf` before the `nextflow run` command.

To be sure that you are using a consistent version, you can specify use of a release tagged version of the workflow, set below with the `-r` flag.
The command below will pull the `scpca-nf` workflow directly from Github using the `v0.8.1` version.
The command below will pull the `scpca-nf` workflow directly from Github using the `v0.8.2` version.
Released versions can be found on the [`scpca-nf` repository releases page](https://github.com/AlexsLemonade/scpca-nf/releases).

```sh
nextflow run AlexsLemonade/scpca-nf \
-r v0.8.1 \
-r v0.8.2 \
-config <path to config file> \
-profile <name of profile>
```
Expand Down Expand Up @@ -325,7 +325,7 @@ If you will be analyzing spatial expression data, you will also need the Cell Ra

If your compute nodes do not have internet access, you will likely have to pre-pull the required container images as well.
When doing this, it is important to be sure that you also specify the revision (version tag) of the `scpca-nf` workflow that you are using.
For example, if you would run `nextflow run AlexsLemonade/scpca-nf -r v0.8.1`, then you will want to set `-r v0.8.1` for `get_refs.py` as well to be sure you have the correct containers.
For example, if you would run `nextflow run AlexsLemonade/scpca-nf -r v0.8.2`, then you will want to set `-r v0.8.2` for `get_refs.py` as well to be sure you have the correct containers.
By default, `get_refs.py` will download files and images associated with the latest release.

If your system uses Docker, you can add the `--docker` flag:
Expand Down
2 changes: 1 addition & 1 deletion internal-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ Please refer to our [`CONTRIBUTING.md`](CONTRIBUTING.md#stub-workflows) for more
When running the workflow for a project or group of samples that is ready to be released on ScPCA portal, please use the tag for the latest release:

```
nextflow run AlexsLemonade/scpca-nf -r v0.8.1 -profile ccdl,batch --project SCPCP000000
nextflow run AlexsLemonade/scpca-nf -r v0.8.2 -profile ccdl,batch --project SCPCP000000
```

### Processing example data
Expand Down
6 changes: 3 additions & 3 deletions merge.nf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ if (param_error) {

// merge individual SCE objects into one SCE object
process merge_sce {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
tag "${merge_group_id}"
label 'mem_max'
label 'long_running'
Expand Down Expand Up @@ -60,7 +60,7 @@ process merge_sce {

// create merge report
process generate_merge_report {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_REPORTS_CONTAINER
tag "${merge_group_id}"
publishDir "${params.results_dir}/${merge_group_id}/merged"
label 'mem_max'
Expand Down Expand Up @@ -88,7 +88,7 @@ process generate_merge_report {
}

process export_anndata {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_ANNDATA_CONTAINER
label 'mem_max'
label 'long_running'
tag "${merge_group_id}"
Expand Down
4 changes: 2 additions & 2 deletions modules/bulk-salmon.nf
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ process salmon {
}

process merge_bulk_quants {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
label 'mem_8'
publishDir "${params.results_dir}/${meta.project_id}", mode: 'copy'
publishDir "${params.results_dir}/${meta.project_id}/bulk", mode: 'copy'
tag "${meta.project_id}"
input:
tuple val(meta), path(salmon_directories), path(t2g_bulk)
Expand Down
4 changes: 2 additions & 2 deletions modules/classify-celltypes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ process classify_singler {


process classify_cellassign {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SCVI_CONTAINER
publishDir (
path: "${meta.celltype_checkpoints_dir}",
mode: 'copy',
Expand Down Expand Up @@ -103,7 +103,7 @@ process classify_cellassign {
}

process add_celltypes_to_sce {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
label 'mem_4'
label 'cpus_2'
tag "${meta.library_id}"
Expand Down
2 changes: 1 addition & 1 deletion modules/cluster-sce.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// perform graph-based clustering on a processed SCE object
process cluster_sce {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
label 'mem_8'
tag "${meta.library_id}"
input:
Expand Down
2 changes: 1 addition & 1 deletion modules/export-anndata.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

// process for converting rds files containing an SCE to h5 containing anndata containing the RNA data
process export_anndata {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_ANNDATA_CONTAINER
label 'mem_16'
tag "${meta.library_id}"
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
Expand Down
2 changes: 1 addition & 1 deletion modules/qc-report.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// generate QC report from unfiltered and filtered SCE.rds files using scpcaTools

process sce_qc_report {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_REPORTS_CONTAINER
label 'mem_16'
tag "${meta.library_id}"
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
Expand Down
12 changes: 6 additions & 6 deletions modules/sce-processing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// RNA only libraries
process make_unfiltered_sce {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
label 'mem_8'
tag "${meta.library_id}"
input:
Expand Down Expand Up @@ -48,7 +48,7 @@ process make_unfiltered_sce {
process make_merged_unfiltered_sce {
label 'mem_8'
tag "${rna_meta.library_id}"
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
input:
tuple val(feature_meta), path(feature_alevin_dir),
val(rna_meta), path(alevin_dir),
Expand Down Expand Up @@ -105,7 +105,7 @@ process make_merged_unfiltered_sce {
}

process filter_sce {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
label 'mem_8'
tag "${meta.library_id}"
input:
Expand Down Expand Up @@ -138,7 +138,7 @@ process filter_sce {
}

process genetic_demux_sce {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
label 'mem_8'
tag "${meta.library_id}"
input:
Expand All @@ -164,7 +164,7 @@ process genetic_demux_sce {
}

process cellhash_demux_sce {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SEURAT_CONTAINER
label 'mem_8'
tag "${meta.library_id}"
input:
Expand Down Expand Up @@ -192,7 +192,7 @@ process cellhash_demux_sce {
}

process post_process_sce {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
label 'mem_8'
tag "${meta.library_id}"
input:
Expand Down
2 changes: 1 addition & 1 deletion modules/spaceranger.nf
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ process spaceranger {
}

process spaceranger_publish {
container params.SCPCATOOLS_CONTAINER
container params.SCPCATOOLS_SLIM_CONTAINER
tag "${meta.library_id}"
publishDir "${params.results_dir}/${meta.project_id}/${meta.sample_id}", mode: 'copy'
input:
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ manifest {
homePage = 'https://github.com/AlexsLemonade/scpca-nf'
mainScript = 'main.nf'
defaultBranch = 'main'
version = 'v0.8.1'
version = 'v0.8.2'
}

// global parameters for workflows
Expand Down
24 changes: 22 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,27 @@
"properties": {
"SCPCATOOLS_CONTAINER": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpca-tools:edge"
"default": "ghcr.io/alexslemonade/scpcatools:edge"
},
"SCPCATOOLS_SLIM_CONTAINER": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpcatools-slim:edge"
},
"SCPCATOOLS_ANNDATA_CONTAINER": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpcatools-anndata:edge"
},
"SCPCATOOLS_REPORTS_CONTAINER": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpcatools-reports:edge"
},
"SCPCATOOLS_SEURAT_CONTAINER": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpcatools-seurat:edge"
},
"SCPCATOOLS_SCVI_CONTAINER": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpcatools-scvi:edge"
},
"ALEVINFRY_CONTAINER": {
"type": "string",
Expand Down Expand Up @@ -325,7 +345,7 @@
},
"TIDYVERSE_CONTAINER": {
"type": "string",
"default": "rocker/tidyverse:4.3.1"
"default": "rocker/tidyverse:4.4.0"
},
"VIREO_CONTAINER": {
"type": "string",
Expand Down
28 changes: 28 additions & 0 deletions templates/qc_report/celltypes_supplemental_report.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,34 @@ glue::glue("
```


# Sample metadata

The below table summarizes clinical metadata for the sample associated with this library.
Blue hyperlinks are present for any terms with an ontology term identifier associated with the displayed human readable value.
These links will direct you to a web page with information about that ontology term identifier.

```{r}
# extract sce metadata containing processing information as table
processed_meta <- metadata(processed_sce)
# if data is not multiplexed, print out sample metadata
if (!has_multiplex) {
print_sample_metadata(processed_meta)
} else {
# otherwise print out an info box that no sample metadata will be displayed
knitr::asis_output(
glue::glue("
<div class=\"alert alert-info\">
This library is multiplexed and contains data from more than one sample.
Demultiplexing has not been performed, so sample metadata will not be displayed.
</div>
")
)
}
```


<!------- Call the celltypes_qc report section from the main report ----------->
```{r, child='celltypes_qc.rmd'}
```
Expand Down
Loading

0 comments on commit f215046

Please sign in to comment.