Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add barcode logic to CytoSnake's CLI #46

Merged
merged 21 commits into from
May 9, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
---
repos:
# remove unused imports
# additional configs are in the pyproject.toml file
- repo: https://github.com/hadialqattan/pycln.git
rev: v2.1.3
hooks:
- id: pycln
args: [--config=pyproject.toml]

# import formatter with black configurations
- repo: https://github.com/pycqa/isort
Expand All @@ -17,28 +19,36 @@ repos:
# Code formatter for both python files and jupyter notebooks
# support pep 8 standards
- repo: https://github.com/psf/black
rev: 22.10.0
rev: 23.3.0
hooks:
- id: black-jupyter
- id: black
language_version: python3.10

# AI based formatter to improve readability
- repo: https://github.com/sourcery-ai/sourcery
rev: v1.1.0
rev: v1.2.0
hooks:
- id: sourcery
args: [--diff=git diff HEAD, --fix, --no-summary]

# adding ruff with auto fix on
# additional configs are in the pyproject.toml file
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.0.265"
hooks:
- id: ruff
args: [--config=pyproject.toml, --fix, --exit-non-zero-on-fix]

# snakemake formatting
- repo: https://github.com/snakemake/snakefmt
rev: v0.8.0
rev: v0.8.4
hooks:
- id: snakefmt

# additional hooks found with in the pre-commit lib
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
rev: v4.4.0
hooks:
- id: trailing-whitespace # removes trailing white spaces
- id: mixed-line-ending # removes mixed end of line
Expand Down
4 changes: 2 additions & 2 deletions cytosnake/cli/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ class WorkflowSearchPath(argparse.Action):
"""

def __call__(self, parser, args, values, option_string=None):

# checking if user provided workflow exists
supported_wf = supported_workflows()
if values not in supported_wf:
raise InvalidWorkflowException(
f"Unable to find '{values}'. Please specify a supported workflow: {supported_wf}"
f"Unable to find '{values}'."
f"Please specify a supported workflow: {supported_wf}"
)
# grabbing and setting the new value with the extracted path
values = str(load_workflow_path(values))
Expand Down
7 changes: 2 additions & 5 deletions cytosnake/cli/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
import sys
from pathlib import Path

# cytosnake imports
from cytosnake.cli.args import CliControlPanel
from cytosnake.cli.cli_docs import cli_docs, init_doc, run_doc
from cytosnake.cli.exec.workflow_exec import workflow_executor
from cytosnake.cli.setup_init import init_cp_data, init_dp_data
from cytosnake.common.errors import ProjectExistsError, WorkflowFailedException

# cytosnake imports
from cytosnake.guards.input_guards import check_init_parameter_inputs
from cytosnake.utils import cyto_paths
from cytosnake.utils.cytosnake_setup import setup_cytosnake_env
Expand Down Expand Up @@ -65,7 +64,7 @@ def run_cmd() -> None:
logging.info(msg="Formatting input files")
init_args = args_handler.parse_init_args()

# before setup up, check the logic of the input parameters
# before setup, check the logic of the input parameters
check_init_parameter_inputs(user_params=init_args)

# identifying which data type was added and how to set it up
Expand All @@ -92,7 +91,6 @@ def run_cmd() -> None:
# Executed if the user is using the `run` mode. This will execute the
# workflow that are found within the `workflows` folder
case "run":

# display run help documentation
if args_handler.mode_help is True:
print(run_doc)
Expand Down Expand Up @@ -125,5 +123,4 @@ def run_cmd() -> None:


if __name__ == "__main__":

run_cmd()
10 changes: 5 additions & 5 deletions cytosnake/guards/input_guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def is_barcode_required(user_params: NameSpace) -> bool:
metadata_path = pathlib.Path(user_params.metadata).resolve(strict=True)

# counting number of platemaps in metadata
plate_maps_path = metadata_path / "platemaps"
plate_maps_path = (metadata_path / "platemap").resolve(strict=True)
n_platemaps = len(list(plate_maps_path.glob("*")))

# if the metadata directory has more than 1 plate maps and no barcode file return
Expand All @@ -46,7 +46,7 @@ def check_init_parameter_inputs(user_params: NameSpace) -> bool:

Parameters
----------
args : NameSpace
user_params : NameSpace
Argparse.NameSpace object that contains all user provided parameters.

Returns
Expand All @@ -57,9 +57,9 @@ def check_init_parameter_inputs(user_params: NameSpace) -> bool:
Raises
------
BarcodeRequiredError
Raised if a multiple platemaps are found but not barcode file was provided
Raised if a multiple platemaps are found but no barcode file was provided
"""

# checking if barcode is required
if not is_barcode_required:
BarcodeRequiredError("Barcode is required, multiple platemaps found")
if is_barcode_required(user_params=user_params):
raise BarcodeRequiredError("Barcode is required, multiple platemaps found")
2 changes: 0 additions & 2 deletions cytosnake/helpers/helper_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from pathlib import Path
from typing import Optional

from snakemake.io import expand

from cytosnake.guards.path_guards import is_valid_path
from cytosnake.utils.config_utils import load_general_configs, load_meta_path_configs

Expand Down
Empty file.
Empty file.
Empty file.
216 changes: 216 additions & 0 deletions cytosnake/tests/functional/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
"""
module: test_cli.py

This testing module composes of functional tests that contains checks for both positive
negative cases when using CytoSnake's CLI

A positive case indicates that given the user parameters we expect it to run
successfully.

A negative case indicates that given with the user parameters, our tests are able to
capture the errors.

Ultimately, test_cli.py will contains functional test to all modes that CytoSnake
contains.
"""

import os
import pathlib
import shutil
import subprocess
from typing import Optional

# import tempfile
# import pytest
# import subprocess
# import test_functions # This will contains helper functions for testing
axiomcura marked this conversation as resolved.
Show resolved Hide resolved
from cytosnake.common import errors


# -----------------
# Helper functions
# -----------------
class CleanUpHandler:
axiomcura marked this conversation as resolved.
Show resolved Hide resolved
"""Used to clean up directories in every single test run"""

def __init__(self, tmp_path):
self.tmp_path = tmp_path
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider the use of tempfile here to assist with the creation of temporary files or directories.


def __call__(self) -> None:
shutil.rmtree(self.tmp_path)


def transfer_data(
test_dir: pathlib.Path,
n_plates: int,
n_platemaps: int,
metadata_dir_name: Optional[str] = "metadata",
testing_data_dir="dummyfiles",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider the use of tempfile here (or within the function) to assist with the creation of temporary files or directories.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll take this to account, however, the testing module already contains a set of dummy datasets. This implementation will be improved once developing CytoSnake's testing framework #49

) -> None:
"""Wrapper function that transfer datasets found within the pytest module and
transfers it to the assigned directory where pytest is conducting the functional
tests.

Parameters
----------
test_dir : LocalPath
`PyTest.LocalPath` object that contains the path were the test is being
conducted
axiomcura marked this conversation as resolved.
Show resolved Hide resolved

Return
------
None
Transfers datafiles from the PyTest module to the testing directory
"""

# get files to transfer
dataset_dir = pathlib.Path(f"./datasets/{testing_data_dir}").resolve(strict=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pytest is sometimes used to run the entirety of the "tests" directory without being located within the relative structure of sub-folders. That said, consider making sure this line (and possibly others) are able to run without changes or document the expectations of this test file here (or alternatively within the docstring for the module).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see. Thanks for pointing this out. I doubled checked and added some documentation in regards to this!


# grabbing all input paths
sqlite_file_paths = list(dataset_dir.glob("*sqlite"))[:n_plates]
platemaps_dir = dataset_dir / "metadata" / "platemap"
plate_map_files = [
str(_path.absolute()) for _path in platemaps_dir.glob("platemap*")
][:n_platemaps]
barcode = dataset_dir / "barcode.txt"

# create a metadata_dir in tmp_dir
if not isinstance(metadata_dir_name, str):
raise ValueError("metadata dir name must be a string")

tmpdir_metadata_path = test_dir / metadata_dir_name / "platemap"
tmpdir_metadata_path.mkdir(exist_ok=True, parents=True)

# transferring all files to tmp dir
for _path in plate_map_files:
shutil.copy(_path, str(tmpdir_metadata_path))
for _path in sqlite_file_paths:
shutil.copy(_path, test_dir)
shutil.copy(barcode, test_dir)


def get_raised_error(traceback: str) -> str:
"""Parses traceback and attempts to obtain raised exception error.

Traceback is parsed in this order:
1. split by new lines
2. grab the last line as it contains the raised exception and message
3. split by ":" to separate exception name and exception message
4. grab the first element since it contains that path to exception
5. split by "." and grab last element, which is the exception name

Parameters
----------
traceback : str
complete traceback generated by executing CLI

Returns
-------
str
return raised exception error
"""

# returns exception name, refer to function documentation to understand
# the order of parsing the traceback to obtain exception name.
return traceback.splitlines()[-1].split(":")[0].split(".")[-1]


# --------------------------
# init mode functional tests
# --------------------------
# The tests below focuses on only executing the init mode.
def test_barcode_logic_no_barcode_one_platemap(tmp_path, request) -> None:
"""Positive case: This tests expects a successful run where the user provides
multiple plate datasets, plate map, and no barcode. Since this is only one plate_map
, this means that the generated dataset came from one experiment and multiple
samples (plates) were used to generated the datasets.
"""
# starting path
test_module = str(pathlib.Path().absolute())

# transfer dummy data to tmpdir
transfer_data(test_dir=tmp_path, n_plates=2, n_platemaps=1)

# change directory to tmpdir
os.chdir(tmp_path)

# execute CytoSnake
cmd = "cytosnake init -d *.sqlite -m metadata".split()
proc = subprocess.run(cmd, capture_output=True, text=True, check=False)

# leave test directory
os.chdir(test_module)

# clean directory,
cleanup_handler = CleanUpHandler(tmp_path)
request.addfinalizer(cleanup_handler)

# checking for success return code
assert proc.returncode == 0


def test_barcode_logic_barcode_multi_platemaps(tmp_path, request) -> None:
"""Positive case: This tests expects a successful run where the user provides
multiple plate datasets, multiple plate map, and barcode. Since this is only one
plate_map , this means that the generated dataset came from one experiment and
multiple samples (plates) were used to generated the datasets.
"""
# PyTest module directory
test_module = str(pathlib.Path().absolute())

# transfer dummy data to tmpdir
transfer_data(test_dir=tmp_path, n_plates=2, n_platemaps=2)

# change directory to tmpdir
os.chdir(tmp_path)

# execute CytoSnake
cmd = "cytosnake init -d *.sqlite -m metadata -b barcode.txt".split()
proc = subprocess.run(cmd, capture_output=True, text=True, check=False)

# leave testing dir
os.chdir(test_module)

# clean directory,
cleanup_handler = CleanUpHandler(tmp_path)
request.addfinalizer(cleanup_handler)

# checking for success return code
assert proc.returncode == 0


def test_barcode_logic_no_barcode_multi_platemaps(tmp_path, request) -> None:
"""Negative case: This test expects a failed run where the user provides multiple
plate datasets, multiple plate maps (multi-experiments), and no barcode. Since
there are plate maps, this indicates that the generated datasets came from multiple
experiments.

Checks:
-------
non-zero return code
BarCodeRequiredError raised
"""
# PyTest module directory
test_module = str(pathlib.Path().absolute())

# transfer dummy data to tmpdir
axiomcura marked this conversation as resolved.
Show resolved Hide resolved
transfer_data(test_dir=tmp_path, n_plates=2, n_platemaps=2)

# change directory to tmpdir
os.chdir(tmp_path)

# execute CytoSnake
cmd = "cytosnake init -d *.sqlite -m metadata"
proc = subprocess.run(cmd, shell=True, capture_output=True, text=True, check=False)
raised_error = get_raised_error(proc.stderr)

# leave testing dir
os.chdir(test_module)
Comment on lines +209 to +218
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If possible, consider sending the subdirectory information directly to CytoSnake, avoiding the need to change directory both into and out of the tmpdir. This might be my own misunderstanding, so please feel free to ignore if this isn't possible or useful in the context of this test.

Alternatively, seeing how this pattern repeats, it may be useful to create a context manager for remembering to move back to a directory after changes have occurred. See this SO reference for one example.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the resource. This will be considered in #49


# clean directory,
cleanup_handler = CleanUpHandler(tmp_path)
request.addfinalizer(cleanup_handler)

# checking for sucess return code
axiomcura marked this conversation as resolved.
Show resolved Hide resolved
assert proc.returncode == 1
assert raised_error == errors.BarcodeRequiredError.__name__
1 change: 0 additions & 1 deletion cytosnake/utils/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ def load_data_path_configs():


def load_workflow_paths_config() -> dict:

# load in _path.yaml and select key where all workflow paths are
loaded_meta_paths = load_meta_path_configs()
return loaded_meta_paths["workflow_dir"]["workflow"]
Expand Down
1 change: 0 additions & 1 deletion cytosnake/utils/cytosnake_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def transport_project_files() -> None:
# - configs: contains yaml files providing workflow and cli configurations
target_dirs = ["workflows", "configs"]
for target_dir in target_dirs:

# construct source directory path
src_path = pkg_path / target_dir
target_dst = proj_path / target_dir
Expand Down
Loading