From 1a10669d304cbd207b459c60aabaf2c15829205b Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Tue, 5 Mar 2024 08:31:40 -0500 Subject: [PATCH 1/4] remove miQC data from processed object --- .pre-commit-config.yaml | 2 +- bin/post_process_sce.R | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9c7e23ee..94e039b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,7 +32,7 @@ repos: args: [--update-only, --title=**Table of Contents**] - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff for linting and formatting python - rev: v0.2.1 + rev: v0.3.0 hooks: # Run the linter. - id: ruff diff --git a/bin/post_process_sce.R b/bin/post_process_sce.R index 4d82f855..862546fa 100755 --- a/bin/post_process_sce.R +++ b/bin/post_process_sce.R @@ -144,6 +144,10 @@ if (alt_exp %in% altExpNames(sce)) { # filter sce using criteria in scpca_filter (not adt_scpca_filter) processed_sce <- sce[, which(sce$scpca_filter == "Keep")] +# drop miQC model from processed object +metadata(processed_sce)$miQC_model <- NULL + + # replace existing stats with recalculated gene stats drop_cols <- colnames(rowData(processed_sce, alt)) %in% c("mean", "detected") rowData(processed_sce) <- rowData(processed_sce)[!drop_cols] From 291e71337b164cce13f1a6476d7ac4896242adba Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Tue, 5 Mar 2024 08:52:26 -0500 Subject: [PATCH 2/4] BZ2 compress all the steps --- bin/add_celltypes_to_sce.R | 2 +- bin/add_demux_sce.R | 2 +- bin/add_submitter_annotations.R | 2 +- bin/classify_SingleR.R | 2 +- bin/cluster_sce.R | 2 +- bin/filter_sce.R | 2 +- bin/generate_unfiltered_sce.R | 2 +- bin/post_process_sce.R | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/bin/add_celltypes_to_sce.R b/bin/add_celltypes_to_sce.R index 220ca99e..43b61d37 100755 --- a/bin/add_celltypes_to_sce.R +++ b/bin/add_celltypes_to_sce.R @@ -247,4 +247,4 @@ if (!is.null(opt$cellassign_predictions)) { } # export annotated object with cellassign assignments -readr::write_rds(sce, opt$output_sce_file, compress = "gz") +readr::write_rds(sce, opt$output_sce_file, compress = "bz2") diff --git a/bin/add_demux_sce.R b/bin/add_demux_sce.R index bec58f61..d47ead56 100755 --- a/bin/add_demux_sce.R +++ b/bin/add_demux_sce.R @@ -125,4 +125,4 @@ if (length(cellhash_ids) > 1) { } # write filtered sce to output -readr::write_rds(sce, opt$output_sce_file, compress = "gz") +readr::write_rds(sce, opt$output_sce_file, compress = "bz2") diff --git a/bin/add_submitter_annotations.R b/bin/add_submitter_annotations.R index b067644a..7b9c6af4 100755 --- a/bin/add_submitter_annotations.R +++ b/bin/add_submitter_annotations.R @@ -107,4 +107,4 @@ colData(sce) <- DataFrame( metadata(sce)$celltype_methods <- c(metadata(sce)$celltype_methods, "submitter") # Write SCE back to file -readr::write_rds(sce, opt$sce_file, compress = "gz") +readr::write_rds(sce, opt$sce_file, compress = "bz2") diff --git a/bin/classify_SingleR.R b/bin/classify_SingleR.R index 29b955af..99c35fa2 100755 --- a/bin/classify_SingleR.R +++ b/bin/classify_SingleR.R @@ -106,5 +106,5 @@ metadata(singler_results)$cell_ontology_df <- singler_model$cell_ontology_df readr::write_rds( singler_results, opt$output_singler_results_file, - compress = "gz" + compress = "bz2" ) diff --git a/bin/cluster_sce.R b/bin/cluster_sce.R index c49fa916..7d82cc03 100755 --- a/bin/cluster_sce.R +++ b/bin/cluster_sce.R @@ -101,4 +101,4 @@ if (!opt$pca_name %in% reducedDimNames(sce)) { } # export ------------------- -readr::write_rds(sce, opt$output_sce_file, compress = "gz") +readr::write_rds(sce, opt$output_sce_file, compress = "bz2") diff --git a/bin/filter_sce.R b/bin/filter_sce.R index 83ec8901..e507186e 100755 --- a/bin/filter_sce.R +++ b/bin/filter_sce.R @@ -202,4 +202,4 @@ if (!is.null(ambient_profile)) { # write filtered sce to output -readr::write_rds(filtered_sce, opt$filtered_file, compress = "gz") +readr::write_rds(filtered_sce, opt$filtered_file, compress = "bz2") diff --git a/bin/generate_unfiltered_sce.R b/bin/generate_unfiltered_sce.R index 63943dbe..c4a014ab 100755 --- a/bin/generate_unfiltered_sce.R +++ b/bin/generate_unfiltered_sce.R @@ -205,4 +205,4 @@ if (length(sample_type) == 1) { metadata(unfiltered_sce)$sample_type <- sample_type # write to rds -readr::write_rds(unfiltered_sce, opt$unfiltered_file, compress = "gz") +readr::write_rds(unfiltered_sce, opt$unfiltered_file, compress = "bz2") diff --git a/bin/post_process_sce.R b/bin/post_process_sce.R index 862546fa..88e66bb0 100755 --- a/bin/post_process_sce.R +++ b/bin/post_process_sce.R @@ -277,7 +277,7 @@ if (length(reducedDimNames(processed_sce)) == 0) { # Export -------------- # write out filtered SCE with additional filtering column -readr::write_rds(sce, opt$out_filtered_sce_file, compress = "gz") +readr::write_rds(sce, opt$out_filtered_sce_file, compress = "bz2") # write out processed SCE -readr::write_rds(processed_sce, opt$out_processed_sce_file, compress = "gz") +readr::write_rds(processed_sce, opt$out_processed_sce_file, compress = "bz2") From 9e2d2173f27e8b9d8d1a77783c4bbb429fb3e138 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Tue, 5 Mar 2024 09:12:19 -0500 Subject: [PATCH 3/4] remove miQC_model from merge --- bin/merge_sces.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/merge_sces.R b/bin/merge_sces.R index 27297a43..ddf4f62a 100755 --- a/bin/merge_sces.R +++ b/bin/merge_sces.R @@ -175,7 +175,7 @@ if ("cellassign" %in% all_celltypes) { # Update some SCE information ------------------------------------------------- # - Add a new colData column with any additional modalities -# - Remove cluster parameters from metadata +# - Remove cluster parameters and miQC model from metadata sce_list <- sce_list |> purrr::map(\(sce){ additional_modalities <- altExpNames(sce) @@ -187,6 +187,7 @@ sce_list <- sce_list |> metadata(sce)$cluster_algorithm <- NULL metadata(sce)$cluster_weighting <- NULL metadata(sce)$cluster_nn <- NULL + metadata(sce)$miQC_model <- NULL return(sce) }) From 2ce8f5e5c8a700db363010ed12b1bb66f1e42240 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Tue, 5 Mar 2024 09:12:39 -0500 Subject: [PATCH 4/4] update argument text --- bin/merge_sces.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/merge_sces.R b/bin/merge_sces.R index ddf4f62a..fbe876de 100755 --- a/bin/merge_sces.R +++ b/bin/merge_sces.R @@ -9,12 +9,12 @@ option_list <- list( make_option( opt_str = c("--input_library_ids"), type = "character", - help = "Comma separated list of library IDs corresponding to the libraries being integrated." + help = "Comma separated list of library IDs corresponding to the libraries being merged." ), make_option( opt_str = c("--input_sce_files"), type = "character", - help = "Comma separated list of input sce file paths corresponding to the sces being integrated." + help = "Comma separated list of input sce file paths corresponding to the sces being merged." ), make_option( opt_str = c("-o", "--output_sce_file"), @@ -68,7 +68,7 @@ if (is.null(opt$input_sce_files)) { } if (length(input_sce_files) == 1) { - stop("Only 1 input file provided, no merging or integration will be performed for this group") + stop("Only 1 input file provided, no merging will be performed for this group") } # use library ids to name list of input files