Skip to content

Commit

Permalink
Decoupling rule modules into individual components (#33)
Browse files Browse the repository at this point in the history
* fixed minor pathing bugs

* separated cp_process module

* update logs documentation

* added documentation

* edit typos

* update cp_process workflow

* update pycytominer version

* file typo fixed
  • Loading branch information
axiomcura authored Mar 27, 2023
1 parent bfcfd18 commit 9612414
Show file tree
Hide file tree
Showing 12 changed files with 221 additions and 105 deletions.
2 changes: 1 addition & 1 deletion cytosnake/cli/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def __call__(self, parser, args, values, option_string=None):
f"Unable to find '{values}'. Please specify a supported workflow: {supported_wf}"
)
# grabbing and setting the new value with the extracted path
values = load_workflow_path(values)
values = str(load_workflow_path(values))

# return new attributes of the `workflow` parameter
setattr(args, self.dest, values)
Expand Down
2 changes: 1 addition & 1 deletion cytosnake/helpers/helper_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_barcodes() -> str:
"""
# Barcodes are optional. If not added, set to "None"
try:
barcode_path = PATHS["project_dir"]["data_dir_conts"]["barcode"]
barcode_path = PATHS["project_dir"]["data_directory_contents"]["barcode"]
except KeyError:
barcode_path = None

Expand Down
2 changes: 1 addition & 1 deletion workflows/envs/cytominer_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ dependencies:
- pyyaml
- pip
- pip:
- git+https://github.com/cytomining/pycytominer.git@b2c6cc4580cf9e1c040a7370b99976916a22e756
- git+https://github.com/cytomining/pycytominer.git@c90438fd7c11ad8b1689c21db16dab1a5280de6c
51 changes: 51 additions & 0 deletions workflows/rules/aggregate.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
rule module: aggregate.smk
Utilize's pycytominer's aggregate module:
https://github.com/cytomining/pycytominer/blob/c90438fd7c11ad8b1689c21db16dab1a5280de6c/pycytominer/aggregate.py
Aggregates single-cell profiles into aggregated profiles based on a given strata
For example, users can configure `Metadata_Well` as their strata in order to
aggregate single-cell data into the Well level.
Parameters:
-----------
input:
sql_file: single-cell dataset
barcodes: file containing unique barcodes that maps to a specific plate
metadata: directory containing metadata associated with the aggregate
profile
output:
aggregated_profile: aggregated profiles
cell_counts: CSV file that contains how many cells were counted per well
Returns
-------
aggregated profiles and cell count data stored in the `results/` directory
# --------------------
"""


configfile: "configs/configuration.yaml"


rule aggregate:
input:
sql_files=PLATE_DATA,
barcodes=BARCODES,
metadata=METADATA_DIR,
output:
aggregate_profile=AGGREGATE_DATA,
cell_counts=CELL_COUNTS,
log:
"logs/aggregate_{file_name}.log",
conda:
"../envs/cytominer_env.yaml"
params:
aggregate_config=config["config_paths"]["single_cell"],
script:
"../scripts/aggregate_cells.py"
46 changes: 46 additions & 0 deletions workflows/rules/annotate.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
rule module: annotate.smk
Utilizes pycytominer's annotate module:
https://github.com/cytomining/pycytominer/blob/master/pycytominer/annotate.py
Annotates profiles with given metadata.
Parameters
----------
input:
aggregate_profile: aggregated profile dataset
barcodes: file containing unique barcodes that maps to a specific plate
metadata: directory containing metadata associated with the aggregate
profile
output:
generates an annotated profile.
Returns:
--------
Generates an annotated profile stored in the `results/` directory
"""


configfile: "configs/configuration.yaml"


rule annotate:
input:
aggregate_profile=AGGREGATE_DATA,
barcodes=BARCODES,
metadata=METADATA_DIR,
output:
ANNOTATED_DATA,
conda:
"../envs/cytominer_env.yaml"
log:
"logs/annotate_{file_name}.log",
params:
annotate_config=config["config_paths"]["annotate"],
script:
"../scripts/annotate.py"
38 changes: 23 additions & 15 deletions workflows/rules/feature_select.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
"""
rule module: feature_select.smk
Utilizes pycytominer's feature select module:
https://github.com/cytomining/pycytominer/blob/master/pycytominer/feature_select.py
Performs feature selection based on this given profiles. PyCytominer contains
different operations to conduct its feature selection: variance_threshold,
correlation_threshold, drop_na_columns, drop_outliers, and noise_removal.
Parameters:
-----------
Input:
Cell morphology profiles
Output:
Selected features from profiles
Returns
-------
CSV file containing selected features. Stored in the `results/` directory.
"""


configfile: "configs/configuration.yaml"


Expand All @@ -14,18 +37,3 @@ rule feature_select:
"../envs/cytominer_env.yaml"
script:
"../scripts/feature_select.py"


rule create_consensus:
input:
SELECTED_FEATURE_DATA_EXPAND,
output:
CONSENSUS_DATA,
params:
consensus_configs=config["config_paths"]["consensus_config"],
log:
"logs/create_consensus.log",
conda:
"../envs/cytominer_env.yaml"
script:
"../scripts/consensus.py"
38 changes: 38 additions & 0 deletions workflows/rules/generate_consensus.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
rule module: generate_consensus.smk
Utilize's pycytominer's consensus module:
https://github.com/cytomining/pycytominer/blob/master/pycytominer/consensus.py
Creates consensus profiles that reflects unique signatures associated with
external factors.
Parameters:
----------
input:
Selected features profile
output:
Consensus profile
Return:
-------
Consensus profile stored in the `results/` directory
"""


configfile: "configs/configuration.yaml"


rule create_consensus:
input:
SELECTED_FEATURE_DATA_EXPAND,
output:
CONSENSUS_DATA,
params:
consensus_configs=config["config_paths"]["consensus_config"],
log:
"logs/create_consensus.log",
conda:
"../envs/cytominer_env.yaml"
script:
"../scripts/consensus.py"
20 changes: 14 additions & 6 deletions workflows/rules/merge_logs.smk
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
"""
Documentation:
Rule collects all generated logs from all porcessess and merges
them into a single log file.
rule module: merge_logs.smk
individual log files are stored into an archive file along with
the generated merged log.
Collects all log files generated within each rule module and merges it into
one log file
The archive file is taged with (Month-day-year)-(hour-min-sec)
The log file is tagged with (Month-day-year)-(hour-min-sec)
Example: 072922-083033_archived_logs
Parameters:
Inputs:
No user defined outputs, searches individual logs in the `logs/` folder
Output:
Merged log file
Returns
Merged log file stored in the `logs/` directory
"""


Expand Down
40 changes: 40 additions & 0 deletions workflows/rules/normalize.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
rule module: normalize.smk
Utlizes pycytominer's normalization module:
https://github.com/cytomining/pycytominer/blob/c90438fd7c11ad8b1689c21db16dab1a5280de6c/pycytominer/normalize.py
Normalizing single-cell or aggregate features. Current default normalization
method is `standardize` other methods include:
parameters
----------
input
single-cell or aggregated profiles
output
normalized single-cell or aggregate dataset.
Output
------
Generates an annotated profile stored in the `results/` directory
"""


configfile: "configs/configuration.yaml"


rule normalize:
input:
ANNOTATED_DATA,
output:
NORMALIZED_DATA,
conda:
"../envs/cytominer_env.yaml"
log:
"logs/normalized_{file_name}.log",
params:
normalize_config=config["config_paths"]["normalize"],
script:
"../scripts/normalize.py"
78 changes: 0 additions & 78 deletions workflows/rules/preprocessing.smk

This file was deleted.

1 change: 0 additions & 1 deletion workflows/scripts/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from pathlib import Path

import pandas as pd
import snakemake
import yaml
from pycytominer import consensus
from pycytominer.operations import get_na_columns
Expand Down
8 changes: 6 additions & 2 deletions workflows/workflow/cp_process.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ import glob
from cytosnake.helpers import helper_funcs as hf


# importing Modules
# importing rule modules
include: "../rules/common.smk"
include: "../rules/preprocessing.smk"
include: "../rules/aggregate.smk"
include: "../rules/annotate.smk"
include: "../rules/normalize.smk"
include: "../rules/feature_select.smk"
include: "../rules/generate_consensus.smk"


# expected outputs from workflow
Expand All @@ -16,3 +19,4 @@ rule all:
ANNOTATED_DATA_EXPAND,
NORMALIZED_DATA_EXPAND,
SELECTED_FEATURE_DATA_EXPAND,
CONSENSUS_DATA,

0 comments on commit 9612414

Please sign in to comment.