Skip to content
This repository has been archived by the owner on Jun 13, 2024. It is now read-only.

Updating with changes from combiz fork #30

Open
wants to merge 41 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
31cdaf8
version bump
combiz Sep 15, 2021
b5ae692
pre-release changelog for dev
combiz Sep 15, 2021
6cafee7
bug fixes
nfancy Sep 18, 2021
a12f9d0
improved config formatting
combiz Sep 29, 2021
753696f
Reverted params to non-groovy native
combiz Sep 29, 2021
744915a
Merge branch 'dev' of github.com:combiz/nf-core-scflow into dev
combiz Sep 29, 2021
0cfab5b
made versions fail non-breaking
combiz Sep 30, 2021
fef8cc7
added initial gcp config resource config
combiz Sep 30, 2021
7dcd761
updated gcp config assignment
combiz Sep 30, 2021
1344087
increased resources for gcp tiny process
combiz Sep 30, 2021
3d0431c
specified disk storage resources for gcp processes
combiz Sep 30, 2021
41e8490
updated disk storage resources for gcp processes
combiz Sep 30, 2021
e0e6fa6
updated disk storage resources for gcp processes v2
combiz Sep 30, 2021
3f43262
updated disk storage resources for gcp processes v3
combiz Sep 30, 2021
629cddf
updated disk storage resources for gcp processes v4
combiz Sep 30, 2021
9e24658
added google bootDiskSize param for gcp
combiz Oct 1, 2021
c82576f
fix for celltype_mappings file staging on cloud
combiz Oct 1, 2021
2fe47af
fix for celltype_mappings file staging on cloud
combiz Oct 1, 2021
a6c280b
fix for celltype_mappings file staging on cloud v2
combiz Oct 1, 2021
3d8aee5
fix for celltype_mappings file staging on cloud v3
combiz Oct 1, 2021
3b68c45
fix for celltype_mappings file staging on cloud v4
combiz Oct 1, 2021
e1460c6
Revised results output paths to omit ../ for cloud compatibility
combiz Oct 5, 2021
ed044bb
minor updates in cluster.nf
nfancy Oct 11, 2021
37e9e70
merged conflict
nfancy Oct 11, 2021
1b2fc11
lint checks
nfancy Oct 11, 2021
63028a1
EClint checks
nfancy Oct 11, 2021
5d218ab
EClint checks
nfancy Oct 11, 2021
08e388c
EClint checks
nfancy Oct 11, 2021
4eb8e30
EClint checks
nfancy Oct 11, 2021
1d1c325
Merge pull request #2 from combiz/dev-nf
combiz Oct 11, 2021
8cba62d
Check inputs update to allow filepaths prefixed with gs:// or s3:// f…
combiz Oct 13, 2021
b1d9eeb
Merge branch 'dev' of github.com:combiz/nf-core-scflow into dev
combiz Oct 13, 2021
efb29e1
updated GCP resources
combiz Oct 13, 2021
b8fd2ac
updated integrate multithreading params for GCP
combiz Oct 13, 2021
61a6f6a
adjusted GCP resources
combiz Oct 13, 2021
9dd43e0
adjusted GCP resources for high memory
combiz Oct 13, 2021
d56f374
reduced max cores for integration due to parallel issues in R
combiz Oct 13, 2021
13148a4
updated gcp resources for high memory processes
combiz Oct 13, 2021
b8e7a37
updated gcp resources for celltype mapping
combiz Oct 20, 2021
a651eab
Updated GCP resources for process_tiny
combiz Oct 22, 2021
3747261
Updated GCP instance type for DGE
combiz Oct 25, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v0.7.0dev - [date]
## v0.1.0dev

Initial release of nf-core/scflow, created with the [nf-core](https://nf-co.re/) template.

Expand Down
Empty file added assets/NO_FILE.tsv
Empty file.
4 changes: 3 additions & 1 deletion bin/check_inputs.r
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ manifest <- read.delim(args$manifest)
# check manifest paths exist

check_exists <- function(filepath) {
RCurl::url.exists(filepath) | dir.exists(filepath)
RCurl::url.exists(filepath) |
dir.exists(filepath) |
any(startsWith(filepath, c("gs://", "s3://")))
}

dir_exists <- purrr::pmap_lgl(manifest, ~ check_exists(as.character(..2)))
Expand Down
68 changes: 52 additions & 16 deletions bin/scflow_dge.r
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,14 @@ required$add_argument(
help = "p-value cutoff for DE [default %(default)s]"
)

required$add_argument(
"--n_label",
type = "integer",
default = 5,
metavar = "number",
help = "Number of genes to be highlighted on volcano plot"
)

required$add_argument(
"--ensembl_mappings",
help = "path to ensembl mappings file",
Expand Down Expand Up @@ -179,7 +187,9 @@ args$pseudobulk <- as.logical(args$pseudobulk)
args$force_run <- as.logical(args$force_run)
if (tolower(args$random_effects_var) == "null") args$random_effects_var <- NULL

args$max_cores <- if (toupper(args$max_cores) == "NULL") NULL else {
args$max_cores <- if (toupper(args$max_cores) == "NULL") {
NULL
} else {
as.numeric(as.character(args$max_cores))
}

Expand All @@ -202,6 +212,7 @@ cli::cli_alert(sprintf(
n_cores
))


library(scFlow)

# ____________________________________________________________________________
Expand All @@ -221,8 +232,10 @@ if (args$pseudobulk) {
sce_subset <- pseudobulk_sce(
sce_subset,
keep_vars = c(
args$dependent_var, args$confounding_vars, args$random_effects_var
),
args$dependent_var,
args$confounding_vars,
args$random_effects_var
),
assay_name = "counts",
celltype_var = args$celltype_var,
sample_var = args$sample_var
Expand All @@ -249,26 +262,49 @@ de_results <- perform_de(
species = getOption("scflow_species")
)

file_name <- paste0(args$celltype, "_",
args$de_method, pb_str, "_")
file_name <- paste0(
args$celltype, "_",
args$de_method, pb_str, "_"
)

for (result in names(de_results)) {
if (dim(de_results[[result]])[[1]] > 0) {
write.table(de_results[[result]],
file = file.path(getwd(),
paste0(file_name, result, "_DE.tsv")),
quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE)
file = file.path(
getwd(),
paste0(file_name, result, "_DE.tsv")
),
quote = FALSE, sep = "\t", col.names = TRUE, row.names = FALSE
)

report_de(de_results[[result]],
report_folder_path = file.path(getwd()),
report_file = paste0(file_name, result, "_scflow_de_report"))
fc_threshold = args$fc_threshold,
pval_cutoff = args$pval_cutoff,
n_label = args$n_label,
report_folder_path = file.path(getwd()),
report_file = paste0(file_name, result, "_scflow_de_report")
)

print("report generated")
png(file.path(getwd(),
paste0(file_name, result, "_volcano_plot.png")),
width = 247, height = 170, units = "mm", res = 600)
print(attr(de_results[[result]], "plot"))
dev.off()

p <- scFlow::volcano_plot(
dt = de_results[[result]],
fc_threshold = args$fc_threshold,
pval_cutoff = args$pval_cutoff,
n_label = args$n_label
)

ggplot2::ggsave(
filename = file.path(
getwd(),
paste0(file_name, result, "_volcano_plot.png")
),
plot = p,
width = 7, height = 5, units = "in", dpi = 600
)

print("Volcano plot generated")
} else {
print(sprintf("No DE genes found for %s", result))
}
}
}
64 changes: 47 additions & 17 deletions bin/scflow_finalize_sce.r
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
# ____________________________________________________________________________
# Initialization ####

options(mc.cores = future::availableCores())

## ............................................................................
## Load packages ####
library(argparse)
library(scFlow)
library(magrittr)
library(SingleCellExperiment)

## ............................................................................
## Parse command-line arguments ####
Expand Down Expand Up @@ -106,6 +105,13 @@ required$add_argument(
metavar = "N"
)

required$add_argument(
"--max_cores",
default = NULL,
help = "override for lower cpu core usage",
metavar = "N",
required = TRUE
)

### . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ..
### Pre-process args ####
Expand All @@ -117,6 +123,33 @@ args$metric_vars <- strsplit(args$metric_vars, ",")[[1]]
options("scflow_reddimplot_pointsize" = args$reddimplot_pointsize)
options("scflow_reddimplot_alpha" = args$reddimplot_alpha)

args$max_cores <- if (toupper(args$max_cores) == "NULL") {
NULL
} else {
as.numeric(as.character(args$max_cores))
}

# ____________________________________________________________________________
# Delay Package Loading for Optional Max Cores Override

n_cores <- future::availableCores(methods = "mc.cores")

if (is.null(args$max_cores)) {
options(mc.cores = n_cores)
} else {
options(mc.cores = min(args$max_cores, n_cores))
}

cli::cli_alert(sprintf(
"Using %s cores on system with %s available cores.",
getOption("mc.cores"),
n_cores
))

library(scFlow)
library(magrittr)
library(SingleCellExperiment)

## ............................................................................
## Start ####

Expand Down Expand Up @@ -163,26 +196,28 @@ colnames(celltypes) <- c("celltype", "n_cells")
write.table(
data.frame(celltypes),
file = "celltypes.tsv",
row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")
row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t"
)

### Save Marker Gene Plots
folder_path <- file.path(getwd(), "celltype_marker_plots")
dir.create(folder_path)

for (group in names(sce@metadata$markers)) {
pwidth <- max(10,
length(
unique(sce@metadata$markers[[group]]$marker_plot$data$Group)
)
pwidth <- max(
10,
length(unique(sce@metadata$markers[[group]]$marker_plot$data$Group))
)
pheight <- length(
unique(sce@metadata$markers[[group]]$marker_plot$data$Gene)
)
pheight <- length(unique(sce@metadata$markers[[group]]$marker_plot$data$Gene))

p <- sce@metadata$markers[[group]]$marker_plot

plot_file_name <- paste0(group, "_markers")

# save PNG
png(file.path(folder_path, paste0(plot_file_name, ".png")),
width = pwidth * 12, height = pheight * 5, units = "mm", res = 600)
width = pwidth * 12, height = pheight * 5, units = "mm", res = 600
)
print(p)
dev.off()

Expand All @@ -195,14 +230,12 @@ for (group in names(sce@metadata$markers)) {
units = "mm",
scale = 1
)

}

### Save Marker Gene Tables
folder_path <- file.path(getwd(), "celltype_marker_tables")
dir.create(folder_path)
for (group in names(sce@metadata$markers)) {

marker_test_file_name <- paste0(group, "_markers_test.tsv")
top_markers_file_name <- paste0(group, "_top_markers.tsv")

Expand All @@ -221,7 +254,6 @@ for (group in names(sce@metadata$markers)) {
col.names = TRUE,
sep = "\t"
)

}


Expand All @@ -231,5 +263,3 @@ write_sce(
folder_path = file.path(getwd(), "final_sce")
)

## ............................................................................
## Clean up ####
Loading