Skip to content

Commit

Permalink
Merge pull request #712 from AlexsLemonade/jashapiro/compress-simplify
Browse files Browse the repository at this point in the history
Reduce SCE file sizes
  • Loading branch information
jashapiro authored Mar 5, 2024
2 parents 579f246 + 2ce8f5e commit 4246991
Show file tree
Hide file tree
Showing 10 changed files with 19 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ repos:
args: [--update-only, --title=**Table of Contents**]
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff for linting and formatting python
rev: v0.2.1
rev: v0.3.0
hooks:
# Run the linter.
- id: ruff
Expand Down
2 changes: 1 addition & 1 deletion bin/add_celltypes_to_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,4 +247,4 @@ if (!is.null(opt$cellassign_predictions)) {
}

# export annotated object with cellassign assignments
readr::write_rds(sce, opt$output_sce_file, compress = "gz")
readr::write_rds(sce, opt$output_sce_file, compress = "bz2")
2 changes: 1 addition & 1 deletion bin/add_demux_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,4 @@ if (length(cellhash_ids) > 1) {
}

# write filtered sce to output
readr::write_rds(sce, opt$output_sce_file, compress = "gz")
readr::write_rds(sce, opt$output_sce_file, compress = "bz2")
2 changes: 1 addition & 1 deletion bin/add_submitter_annotations.R
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,4 @@ colData(sce) <- DataFrame(
metadata(sce)$celltype_methods <- c(metadata(sce)$celltype_methods, "submitter")

# Write SCE back to file
readr::write_rds(sce, opt$sce_file, compress = "gz")
readr::write_rds(sce, opt$sce_file, compress = "bz2")
2 changes: 1 addition & 1 deletion bin/classify_SingleR.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,5 @@ metadata(singler_results)$cell_ontology_df <- singler_model$cell_ontology_df
readr::write_rds(
singler_results,
opt$output_singler_results_file,
compress = "gz"
compress = "bz2"
)
2 changes: 1 addition & 1 deletion bin/cluster_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,4 @@ if (!opt$pca_name %in% reducedDimNames(sce)) {
}

# export -------------------
readr::write_rds(sce, opt$output_sce_file, compress = "gz")
readr::write_rds(sce, opt$output_sce_file, compress = "bz2")
2 changes: 1 addition & 1 deletion bin/filter_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -202,4 +202,4 @@ if (!is.null(ambient_profile)) {


# write filtered sce to output
readr::write_rds(filtered_sce, opt$filtered_file, compress = "gz")
readr::write_rds(filtered_sce, opt$filtered_file, compress = "bz2")
2 changes: 1 addition & 1 deletion bin/generate_unfiltered_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,4 @@ if (length(sample_type) == 1) {
metadata(unfiltered_sce)$sample_type <- sample_type

# write to rds
readr::write_rds(unfiltered_sce, opt$unfiltered_file, compress = "gz")
readr::write_rds(unfiltered_sce, opt$unfiltered_file, compress = "bz2")
9 changes: 5 additions & 4 deletions bin/merge_sces.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ option_list <- list(
make_option(
opt_str = c("--input_library_ids"),
type = "character",
help = "Comma separated list of library IDs corresponding to the libraries being integrated."
help = "Comma separated list of library IDs corresponding to the libraries being merged."
),
make_option(
opt_str = c("--input_sce_files"),
type = "character",
help = "Comma separated list of input sce file paths corresponding to the sces being integrated."
help = "Comma separated list of input sce file paths corresponding to the sces being merged."
),
make_option(
opt_str = c("-o", "--output_sce_file"),
Expand Down Expand Up @@ -68,7 +68,7 @@ if (is.null(opt$input_sce_files)) {
}

if (length(input_sce_files) == 1) {
stop("Only 1 input file provided, no merging or integration will be performed for this group")
stop("Only 1 input file provided, no merging will be performed for this group")
}

# use library ids to name list of input files
Expand Down Expand Up @@ -175,7 +175,7 @@ if ("cellassign" %in% all_celltypes) {

# Update some SCE information -------------------------------------------------
# - Add a new colData column with any additional modalities
# - Remove cluster parameters from metadata
# - Remove cluster parameters and miQC model from metadata
sce_list <- sce_list |>
purrr::map(\(sce){
additional_modalities <- altExpNames(sce)
Expand All @@ -187,6 +187,7 @@ sce_list <- sce_list |>
metadata(sce)$cluster_algorithm <- NULL
metadata(sce)$cluster_weighting <- NULL
metadata(sce)$cluster_nn <- NULL
metadata(sce)$miQC_model <- NULL

return(sce)
})
Expand Down
8 changes: 6 additions & 2 deletions bin/post_process_sce.R
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ if (alt_exp %in% altExpNames(sce)) {
# filter sce using criteria in scpca_filter (not adt_scpca_filter)
processed_sce <- sce[, which(sce$scpca_filter == "Keep")]

# drop miQC model from processed object
metadata(processed_sce)$miQC_model <- NULL


# replace existing stats with recalculated gene stats
drop_cols <- colnames(rowData(processed_sce, alt)) %in% c("mean", "detected")
rowData(processed_sce) <- rowData(processed_sce)[!drop_cols]
Expand Down Expand Up @@ -273,7 +277,7 @@ if (length(reducedDimNames(processed_sce)) == 0) {
# Export --------------

# write out filtered SCE with additional filtering column
readr::write_rds(sce, opt$out_filtered_sce_file, compress = "gz")
readr::write_rds(sce, opt$out_filtered_sce_file, compress = "bz2")

# write out processed SCE
readr::write_rds(processed_sce, opt$out_processed_sce_file, compress = "gz")
readr::write_rds(processed_sce, opt$out_processed_sce_file, compress = "bz2")

0 comments on commit 4246991

Please sign in to comment.