-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from axiomcura/main
Feature Select Implementation to CytoPipe
- Loading branch information
Showing
9 changed files
with
206 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
aggregate_configs: | ||
params: | ||
strata: ["Metadata_Plate", "Metadata_Well"] | ||
features: "infer" | ||
operation: "median" | ||
output_file: "none" | ||
compute_object_count: False | ||
object_feature: "Metadata_ObjectNumber" | ||
subset_data_df: "none" | ||
compression_options: None | ||
float_format: None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
feature_select_configs: | ||
params: | ||
features: infer | ||
image_features: False | ||
samples: all | ||
operation: | ||
- variance_threshold | ||
- drop_na_columns | ||
- correlation_threshold | ||
- blocklist | ||
- drop_outliers | ||
- noise_removal | ||
na_cutoff: 0.05 | ||
corr_threshold: 0.9 | ||
corr_method: pearson | ||
freq_cut: 0.05 | ||
unique_cut: 0.1 | ||
compression_options: gzip | ||
float_format: None | ||
blocklist_file: None | ||
outlier_cutoff: 15 | ||
noise_removal_perturb_groups: None | ||
noise_removal_stdev_cutoff: None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,4 @@ channels: | |
dependencies: | ||
- python>=3.7.0 | ||
- conda-forge::pycytominer=0.1 | ||
- pyyaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
configfile: "configs/configuration.yaml" | ||
|
||
|
||
rule feature_select: | ||
input: | ||
expand("results/preprocessing/{plate_id}_normalized.csv.gz", plate_id=PLATE_IDS), | ||
output: | ||
expand( | ||
"results/preprocessing/{plate_id}_feature_select.csv.gz", | ||
plate_id=PLATE_IDS, | ||
), | ||
params: | ||
feature_select_config=config["config_paths"]["feature_select"], | ||
conda: | ||
"../envs/cytominer_env.yaml" | ||
script: | ||
"../scripts/feature_select.py" | ||
|
||
|
||
rule create_consensus: | ||
input: | ||
expand( | ||
"results/preprocessing/{plate_id}_feature_select.csv.gz", | ||
plate_id=PLATE_IDS, | ||
), | ||
output: | ||
"results/preprocessing/consensus.csv.gz", | ||
params: | ||
aggregate_config=config["config_paths"]["aggregate"], | ||
conda: | ||
"../envs/cytominer_env.yaml" | ||
script: | ||
"../scripts/consensus.py" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from pathlib import Path | ||
from re import A | ||
import numpy as np | ||
import pandas as pd | ||
|
||
from pycytominer.consensus import modz | ||
from pycytominer import aggregate | ||
from pycytominer.operations import get_na_columns | ||
|
||
|
||
def concatenate_data(profile_list: list) -> pd.DataFrame: | ||
"""Concatenates all normalized aggregated features into one | ||
pandas DataFrame | ||
Parameters | ||
---------- | ||
profiles : list | ||
list of paths pointing to normalized aggregated features | ||
Returns | ||
------- | ||
pd.DataFrame | ||
concatenated normalized aggregated features | ||
""" | ||
|
||
concat_df = pd.concat( | ||
[pd.read_csv(profile_path) for profile_path in profile_list], sort=True | ||
).rename( | ||
{ | ||
"Image_Metadata_Plate": "Metadata_Plate", | ||
"Image_Metadata_Well": "Metadata_Well", | ||
}, | ||
axis="columns", | ||
) | ||
|
||
# realignment of the meta data column names | ||
concat_metadata_cols = concat_df.columns[ | ||
concat_df.columns.str.startswith("Metadata") | ||
] | ||
concat_metadata_df = concat_df.loc[:, concat_metadata_cols] | ||
concat_df = concat_df.drop(concat_metadata_cols, axis="columns") | ||
concat_df = pd.concat([concat_metadata_df, concat_df]) | ||
|
||
# dropping columns with na values | ||
na_cols = get_na_columns(concat_df, cutoff=0) | ||
concat_df = concat_df.drop(na_cols, axis="columns") | ||
|
||
return concat_df | ||
|
||
|
||
if __name__ in "__main__": | ||
|
||
inputs = [str(f_in) for f_in in snakemake.input] | ||
output = str(snakemake.output) | ||
|
||
# concatenated all Normalized aggregated profiles | ||
concat_dataset = concatenate_data(inputs) | ||
concat_dataset.to_csv(output, compression="gzip") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
from pathlib import Path | ||
import yaml | ||
from pycytominer.feature_select import feature_select | ||
|
||
|
||
def feature_selection(normalized_profile, out_file): | ||
"""Performs feature selection based on the given parameters explained | ||
in the configs/analysis_configs/feature_selection_configs.yaml file. | ||
Parameters | ||
---------- | ||
normalized_profile : str | ||
path that points to normalized profile | ||
out_file : str | ||
Name of generated outfile | ||
Returns | ||
------- | ||
Generates output | ||
""" | ||
|
||
# loading paramters | ||
feature_select_ep = Path(snakemake.params["feature_select_config"]) | ||
feature_select_config_path = feature_select_ep.absolute() | ||
with open(feature_select_config_path, "r") as yaml_contents: | ||
feature_select_config = yaml.safe_load(yaml_contents)["feature_select_configs"][ | ||
"params" | ||
] | ||
|
||
feature_select( | ||
normalized_profile, | ||
features=feature_select_config["features"], | ||
image_features=feature_select_config["image_features"], | ||
samples=feature_select_config["samples"], | ||
operation=feature_select_config["operation"], | ||
na_cutoff=feature_select_config["na_cutoff"], | ||
corr_threshold=feature_select_config["corr_threshold"], | ||
corr_method=feature_select_config["corr_method"], | ||
freq_cut=feature_select_config["freq_cut"], | ||
unique_cut=feature_select_config["unique_cut"], | ||
compression_options=feature_select_config["compression_options"], | ||
float_format=feature_select_config["float_format"], | ||
blocklist_file=feature_select_config["blocklist_file"], | ||
outlier_cutoff=feature_select_config["outlier_cutoff"], | ||
noise_removal_perturb_groups=feature_select_config[ | ||
"noise_removal_perturb_groups" | ||
], | ||
noise_removal_stdev_cutoff=feature_select_config["noise_removal_stdev_cutoff"], | ||
output_file=out_file, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
norm_data = [str(f_in) for f_in in snakemake.input] | ||
out_files = [str(f_out) for f_out in snakemake.output] | ||
io_files = zip(norm_data, out_files) | ||
|
||
# iteratively passing normalized data | ||
for norm_data, out_file in io_files: | ||
feature_selection(norm_data, out_file) |