WayScience · axiomcura · May 9, 2023 · Mar 14, 2023 · Mar 28, 2023 · Apr 12, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,10 +1,12 @@
 ---
 repos:
   # remove unused imports
+  # additional configs are in the pyproject.toml file
   - repo: https://github.com/hadialqattan/pycln.git
     rev: v2.1.3
     hooks:
       - id: pycln
+        args: [--config=pyproject.toml]
 
   # import formatter with black configurations
   - repo: https://github.com/pycqa/isort
@@ -17,28 +19,36 @@ repos:
   # Code formatter for both python files and jupyter notebooks
   # support pep 8 standards
   - repo: https://github.com/psf/black
-    rev: 22.10.0
+    rev: 23.3.0
     hooks:
       - id: black-jupyter
       - id: black
         language_version: python3.10
 
   # AI based formatter to improve readability
   - repo: https://github.com/sourcery-ai/sourcery
-    rev: v1.1.0
+    rev: v1.2.0
     hooks:
       - id: sourcery
-        args: [--diff=git diff HEAD, --no-summary]
+        args: [--diff=git diff HEAD, --fix, --no-summary]
+
+  # adding ruff with auto fix on
+  # additional configs are in the pyproject.toml file
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: "v0.0.265"
+    hooks:
+      - id: ruff
+        args: [--config=pyproject.toml, --fix, --exit-non-zero-on-fix]
 
   # snakemake formatting
   - repo: https://github.com/snakemake/snakefmt
-    rev: v0.8.0
+    rev: v0.8.4
     hooks:
       - id: snakefmt
 
   # additional hooks found with in the pre-commit lib
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.4.0
+    rev: v4.4.0
     hooks:
       - id: trailing-whitespace # removes trailing white spaces
       - id: mixed-line-ending # removes mixed end of line

diff --git a/configs/wf_configs/cp_process.yaml b/configs/wf_configs/cp_process.yaml
@@ -0,0 +1,115 @@
+name: cp_process
+
+# Documentation
+docs: |
+  Description:
+  ------------
+  Traditional workflow
+
+  Workflow Steps:
+  ---------------
+  Below the workflow steps are separated in chunks.
+
+  aggregate_configs:
+    aggregates single-cell morphology at a given level of aggregation. For example
+    one can aggregate single cell at the well level. This means that all cells within
+    a well will be aggregated into a single data point containing all morphology features.
+
+  annotate_configs:
+    Adds meta data to the given dataset. This includes information regarding to well
+    position, types of perturbations, etc.
+
+  normalize_configs:
+    Applies normalization to given dataset
+
+  feature_select_configs:
+    Select features from given dataset
+
+  consensus_configs:
+    Creates a consensus profile. Consensus profiles are unique signatures that are mapped
+    to a given perturbations.
+
+annotate_configs:
+  params:
+    join_on:
+      - Metadata_well_position
+      - Image_Metadata_Well
+    add_metadata_id_to_platemap: True
+    format_broad_cmap: False
+    clean_cellprofiler: True
+    external_metadata: "none"
+    external_join_left: "none"
+    external_join_right: "none"
+    compression_options:
+      method: "gzip"
+      mtime: 1
+    float_format: null
+    cmap_args: {}
+
+aggregate_configs:
+  params:
+  strata:
+    - Metadata_Plate
+    - Metadata_Well
+  features: infer
+  operation: median
+  output_file: none
+  compute_object_count: False
+  object_feature: Metadata_ObjectNumber
+  subset_data_df: none
+  compression_options:
+    method: gzip
+    mtime: 1
+  float_format: null
+
+normalize_configs:
+  params:
+    features: infer
+    image_features: False
+    meta_features: infer
+    samples: all
+    method: mad_robustize
+    compression_options:
+      method: gzip
+      mtime: 1
+    float_format: null
+    mad_robustize_epsilon: 1.0e-18
+    spherize_center: True
+    spherize_method: ZCA-cor
+    spherize_epsilon: 1.0e-6
+
+feature_select_configs:
+  params:
+    features: infer
+    image_features: False
+    samples: all
+    operation:
+      - variance_threshold
+      - drop_na_columns
+      - correlation_threshold
+      - drop_outliers
+      - blocklist
+    na_cutoff: 0.05
+    corr_threshold: 0.9
+    corr_method: pearson
+    freq_cut: 0.05
+    unique_cut: 0.1
+    compression_options:
+      method: gzip
+      mtime: 1
+    float_format: null
+    blocklist_file: null
+    outlier_cutoff: 15
+    noise_removal_perturb_groups: null
+    noise_removal_stdev_cutoff: null
+
+consensus_config:
+  params:
+    replicate_columns:
+      - Metadata_cell_line
+      - Metadata_pert_name
+    operation: median
+    features: infer
+    compression_options: null
+    float_format: null
+    modz_args: { "method": "spearman" }
diff --git a/cytosnake/cli/args.py b/cytosnake/cli/args.py
@@ -52,12 +52,12 @@ class WorkflowSearchPath(argparse.Action):
     """
 
     def __call__(self, parser, args, values, option_string=None):
-
         # checking if user provided workflow exists
         supported_wf = supported_workflows()
         if values not in supported_wf:
             raise InvalidWorkflowException(
-                f"Unable to find '{values}'. Please specify a supported workflow: {supported_wf}"
+                f"Unable to find '{values}'."
+                f"Please specify a supported workflow: {supported_wf}"
             )
         # grabbing and setting the new value with the extracted path
         values = str(load_workflow_path(values))

diff --git a/cytosnake/cli/cmd.py b/cytosnake/cli/cmd.py
@@ -15,6 +15,7 @@
 from cytosnake.cli.exec.workflow_exec import workflow_executor
 from cytosnake.cli.setup_init import init_cp_data, init_dp_data
 from cytosnake.common.errors import ProjectExistsError, WorkflowFailedException
+from cytosnake.guards.input_guards import check_init_parameter_inputs
 from cytosnake.utils import cyto_paths
 from cytosnake.utils.cytosnake_setup import setup_cytosnake_env
 
@@ -63,6 +64,9 @@ def run_cmd() -> None:
             logging.info(msg="Formatting input files")
             init_args = args_handler.parse_init_args()
 
+            # before setup, check the logic of the input parameters
+            check_init_parameter_inputs(user_params=init_args)
+
             # identifying which data type was added and how to set it up
             match init_args.datatype:
                 case "cell_profiler":
@@ -87,7 +91,6 @@ def run_cmd() -> None:
         # Executed if the user is using the `run` mode. This will execute the
         # workflow that are found within the `workflows` folder
         case "run":
-
             # display run help documentation
             if args_handler.mode_help is True:
                 print(run_doc)
@@ -120,5 +123,4 @@ def run_cmd() -> None:
 
 
 if __name__ == "__main__":
-
     run_cmd()
diff --git a/cytosnake/common/errors.py b/cytosnake/common/errors.py
@@ -91,6 +91,10 @@ class ExtensionError(BaseValueError):
     """Raised when invalid extensions are captured"""
 
 
+class BarcodeRequiredError(BaseFileNotFound):
+    """Raised when a barcode file is required"""
+
+
 # -----------------------
 # Error handling functions
 # -----------------------

diff --git a/cytosnake/guards/input_guards.py b/cytosnake/guards/input_guards.py
@@ -0,0 +1,65 @@
+"""
+module: input_guards.py
+
+This module will handle the CytoSnake's CLI logic mostly interacting with user defined
+parameters from CytoSnake's CLI.
+
+There the logic establishes some rules of what inputs are required or what functionality
+is or not allowed.
+"""
+import pathlib
+from typing import TypeVar
+
+from cytosnake.common.errors import BarcodeRequiredError
+
+# declaring user based type hinting
+NameSpace = TypeVar("NameSpace")
+
+
+def is_barcode_required(user_params: NameSpace) -> bool:
+    """
+    user_params: NameSpace
+        Argparse.NameSpace object that contains all user provided parameters
+
+    Returns
+    -------
+    bool
+        With the given parameter inputs, True if barcodes are required else False
+    """
+
+    # getting both barcode and metadata from cli inputs
+    barcode_param = user_params.barcode
+    metadata_path = pathlib.Path(user_params.metadata).resolve(strict=True)
+
+    # counting number of platemaps in metadata
+    plate_maps_path = (metadata_path / "platemap").resolve(strict=True)
+    n_platemaps = len(list(plate_maps_path.glob("*")))
+
+    # if the metadata directory has more than 1 plate maps and no barcode file return
+    # True.
+    # This indicates that a barcode is required
+    return n_platemaps > 1 and barcode_param is None
+
+
+def check_init_parameter_inputs(user_params: NameSpace) -> bool:
+    """Main wrapper to check `init` mode parameter logic.
+
+    Parameters
+    ----------
+    user_params : NameSpace
+        Argparse.NameSpace object that contains all user provided parameters.
+
+    Returns
+    -------
+    bool
+        True if all logic checks passed
+
+    Raises
+    ------
+    BarcodeRequiredError
+        Raised if a multiple platemaps are found but no barcode file was provided
+    """
+
+    # checking if barcode is required
+    if is_barcode_required(user_params=user_params):
+        raise BarcodeRequiredError("Barcode is required, multiple platemaps found")
diff --git a/cytosnake/helpers/helper_funcs.py b/cytosnake/helpers/helper_funcs.py
@@ -9,8 +9,6 @@
 from pathlib import Path
 from typing import Optional
 
-from snakemake.io import expand
-
 from cytosnake.guards.path_guards import is_valid_path
 from cytosnake.utils.config_utils import load_general_configs, load_meta_path_configs
 

diff --git a/cytosnake/tests/functional/datasets/dummyfiles/barcode.txt b/cytosnake/tests/functional/datasets/dummyfiles/barcode.txt
diff --git a/cytosnake/tests/functional/datasets/dummyfiles/metadata/platemap/platemap1.csv b/cytosnake/tests/functional/datasets/dummyfiles/metadata/platemap/platemap1.csv
diff --git a/cytosnake/tests/functional/datasets/dummyfiles/metadata/platemap/platemap2.csv b/cytosnake/tests/functional/datasets/dummyfiles/metadata/platemap/platemap2.csv
diff --git a/cytosnake/tests/functional/datasets/dummyfiles/plate_data1.sqlite b/cytosnake/tests/functional/datasets/dummyfiles/plate_data1.sqlite
diff --git a/cytosnake/tests/functional/datasets/dummyfiles/plate_data2.sqlite b/cytosnake/tests/functional/datasets/dummyfiles/plate_data2.sqlite