From 1b7844cf8d6fdd2b631e0b731edf75fb756390ea Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 10 Jun 2024 12:21:05 -0600 Subject: [PATCH 01/40] Create qcdataframe.py --- src/cosmicqc/qcdataframe.py | 63 +++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 src/cosmicqc/qcdataframe.py diff --git a/src/cosmicqc/qcdataframe.py b/src/cosmicqc/qcdataframe.py new file mode 100644 index 0000000..ba25c6f --- /dev/null +++ b/src/cosmicqc/qcdataframe.py @@ -0,0 +1,63 @@ +""" +Defines a QCDataFrame class for use in coSMicQC. +""" + +import pandas as pd +from typing import Union + +class QCDataFrame: + """ + A class to handle and load different types of data files into a pandas DataFrame. + + This class can initialize with either a pandas DataFrame or a file path (CSV, TSV, + TXT, or Parquet). When initialized with a file path, it reads the data into a + pandas DataFrame. + + Attributes: + reference (str): + A string indicating the type of data source, either 'pd.DataFrame' + or the file path. + data (pd.DataFrame): + The loaded data in a pandas DataFrame. + + Methods: + __call__(): + Returns the underlying pandas DataFrame. + """ + + def __init__(self, data: Union[pd.DataFrame, str], **kwargs) -> None: + """ + Initializes the QCDataFrame with either a DataFrame or a file path. + + Args: + data (Union[pd.DataFrame, str]): + The data source, either a pandas DataFrame or a file path. + **kwargs: + Additional keyword arguments to pass to the pandas read functions. + """ + if isinstance(data, pd.DataFrame): + # if data is a pd.DataFrame, remember this within the reference attr + self.reference = "pd.DataFrame" + self.data = data + elif isinstance(data, str): + # if the data is a string, remember the original source + # through a reference attr + self.reference = data + + # Read the data from the file based on its extension + if data.endswith(".csv"): + self.data = pd.read_csv(data, **kwargs) + elif data.endswith(".tsv") or data.endswith(".txt"): + self.data = pd.read_csv(data, delimiter="\t", **kwargs) + elif data.endswith(".parquet"): + self.data = pd.read_parquet(data, **kwargs) + + def __call__(self) -> pd.DataFrame: + """ + Returns the underlying pandas DataFrame. + + Returns: + pd.DataFrame: The data in a pandas DataFrame. + """ + return self.data + From 978484127fd4bf00a96547d0b8b08bcd68495755 Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 10 Jun 2024 12:33:33 -0600 Subject: [PATCH 02/40] linting --- src/cosmicqc/qcdataframe.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/cosmicqc/qcdataframe.py b/src/cosmicqc/qcdataframe.py index ba25c6f..7a229fb 100644 --- a/src/cosmicqc/qcdataframe.py +++ b/src/cosmicqc/qcdataframe.py @@ -2,8 +2,10 @@ Defines a QCDataFrame class for use in coSMicQC. """ +from typing import Any, Dict, Self, Union + import pandas as pd -from typing import Union + class QCDataFrame: """ @@ -25,7 +27,9 @@ class QCDataFrame: Returns the underlying pandas DataFrame. """ - def __init__(self, data: Union[pd.DataFrame, str], **kwargs) -> None: + def __init__( + self: Self, data: Union[pd.DataFrame, str], **kwargs: Dict[str, Any] + ) -> None: """ Initializes the QCDataFrame with either a DataFrame or a file path. @@ -52,7 +56,7 @@ def __init__(self, data: Union[pd.DataFrame, str], **kwargs) -> None: elif data.endswith(".parquet"): self.data = pd.read_parquet(data, **kwargs) - def __call__(self) -> pd.DataFrame: + def __call__(self: Self) -> pd.DataFrame: """ Returns the underlying pandas DataFrame. @@ -60,4 +64,3 @@ def __call__(self) -> pd.DataFrame: pd.DataFrame: The data in a pandas DataFrame. """ return self.data - From c39e2b0c4f000289ffcb87b107ce964bb1ef80fe Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 10 Jun 2024 14:04:42 -0600 Subject: [PATCH 03/40] add qcdataframe --- src/cosmicqc/__init__.py | 1 + src/cosmicqc/analyze.py | 32 ++++++++++++++------ src/cosmicqc/qcdataframe.py | 58 ++++++++++++++++++++++++++++++++++--- tests/conftest.py | 41 ++++++++++++++++++++++++++ tests/test_analyze.py | 18 +++++++++++- tests/test_qcdataframe.py | 42 +++++++++++++++++++++++++++ 6 files changed, 178 insertions(+), 14 deletions(-) create mode 100644 tests/test_qcdataframe.py diff --git a/src/cosmicqc/__init__.py b/src/cosmicqc/__init__.py index aca6bd8..82f4e10 100644 --- a/src/cosmicqc/__init__.py +++ b/src/cosmicqc/__init__.py @@ -3,6 +3,7 @@ """ from .analyze import find_outliers +from .qcdataframe import QCDataFrame # note: version placeholder is updated during build # by poetry-dynamic-versioning. diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index 9f33c4b..70833a6 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -11,13 +11,15 @@ import yaml from scipy.stats import zscore as scipy_zscore +from .qcdataframe import QCDataFrame + DEFAULT_QC_THRESHOLD_FILE = ( f"{pathlib.Path(__file__).parent!s}/data/qc_nuclei_thresholds_default.yml" ) def identify_outliers( - df: pd.DataFrame, + df: Union[QCDataFrame, pd.DataFrame, str], feature_thresholds: Union[Dict[str, float], str], feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, include_threshold_scores: bool = False, @@ -30,8 +32,8 @@ def identify_outliers( threshold of 0 as that would represent the whole dataset. Args: - df: pd.DataFrame - Data frame with converted output from CytoTable. + df: Union[QCDataFrame, pd.DataFrame, str] + DataFrame or file with converted output from CytoTable. metadata_columns: List[str] List of metadata columns that should be outputted with the outlier data. feature_thresholds: Dict[str, float] @@ -52,6 +54,10 @@ def identify_outliers( or not for use within other functions. """ + # interpret the df as QCDataFrame + if not isinstance(df, QCDataFrame): + df = QCDataFrame(data=df) + # create a copy of the dataframe to ensure # we don't modify the supplied dataframe inplace. outlier_df = df.copy() @@ -107,7 +113,7 @@ def identify_outliers( def find_outliers( - df: pd.DataFrame, + df: Union[QCDataFrame, pd.DataFrame, str], metadata_columns: List[str], feature_thresholds: Union[Dict[str, float], str], feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, @@ -117,8 +123,8 @@ def find_outliers( with only the outliers and provided metadata columns. Args: - df: pd.DataFrame - Data frame with converted output from CytoTable. + df: Union[QCDataFrame, pd.DataFrame, str] + DataFrame or file with converted output from CytoTable. metadata_columns: List[str] List of metadata columns that should be outputted with the outlier data. feature_thresholds: Dict[str, float] @@ -138,6 +144,10 @@ def find_outliers( Outlier data frame for the given conditions. """ + # interpret the df as QCDataFrame + if not isinstance(df, QCDataFrame): + df = QCDataFrame(data=df) + if isinstance(feature_thresholds, str): feature_thresholds = read_thresholds_set_from_file( feature_thresholds=feature_thresholds, @@ -169,7 +179,7 @@ def find_outliers( def label_outliers( - df: pd.DataFrame, + df: Union[QCDataFrame, pd.DataFrame, str], feature_thresholds: Optional[Union[Dict[str, float], str]] = None, feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, include_threshold_scores: bool = False, @@ -179,8 +189,8 @@ def label_outliers( where a cell passed or failed the quality control condition(s). Args: - df: pd.DataFrame - Data frame with converted output from CytoTable. + df: Union[QCDataFrame, pd.DataFrame, str] + DataFrame or file with converted output from CytoTable. feature_thresholds: Dict[str, float] One of two options: A dictionary with the feature name(s) as the key(s) and their assigned @@ -201,6 +211,10 @@ def label_outliers( Full dataframe with optional scores and outlier boolean column. """ + # interpret the df as QCDataFrame + if not isinstance(df, QCDataFrame): + df = QCDataFrame(data=df) + # for single outlier processing if isinstance(feature_thresholds, (str, dict)): # return the outlier dataframe for one threshold rule diff --git a/src/cosmicqc/qcdataframe.py b/src/cosmicqc/qcdataframe.py index 7a229fb..3ed24fa 100644 --- a/src/cosmicqc/qcdataframe.py +++ b/src/cosmicqc/qcdataframe.py @@ -2,6 +2,7 @@ Defines a QCDataFrame class for use in coSMicQC. """ +import pathlib from typing import Any, Dict, Self, Union import pandas as pd @@ -39,23 +40,38 @@ def __init__( **kwargs: Additional keyword arguments to pass to the pandas read functions. """ + + # print(data) + # print(type(data)) + # print(isinstance(data, QCDataFrame)) + if isinstance(data, pd.DataFrame): # if data is a pd.DataFrame, remember this within the reference attr self.reference = "pd.DataFrame" self.data = data - elif isinstance(data, str): + + elif isinstance(data, pathlib.Path | str): # if the data is a string, remember the original source # through a reference attr self.reference = data + # interpret the data through pathlib + data_path = pathlib.Path(data) + # Read the data from the file based on its extension - if data.endswith(".csv"): + if data_path.suffix == ".csv": + # read as a CSV self.data = pd.read_csv(data, **kwargs) - elif data.endswith(".tsv") or data.endswith(".txt"): + elif data_path.suffix in (".tsv", ".txt"): + # read as a TSV self.data = pd.read_csv(data, delimiter="\t", **kwargs) - elif data.endswith(".parquet"): + elif data_path.suffix == ".parquet": + # read as a Parquet file self.data = pd.read_parquet(data, **kwargs) + else: + raise ValueError("Unsupported file format for QCDataFrame.") + def __call__(self: Self) -> pd.DataFrame: """ Returns the underlying pandas DataFrame. @@ -64,3 +80,37 @@ def __call__(self: Self) -> pd.DataFrame: pd.DataFrame: The data in a pandas DataFrame. """ return self.data + + def __repr__(self: Self) -> pd.DataFrame: + """ + Returns the underlying pandas DataFrame. + + Returns: + pd.DataFrame: The data in a pandas DataFrame. + """ + return self.data + + def __getattr__(self: Self, attr: str) -> Any: # noqa: ANN401 + """ + Intercept attribute accesses and delegate them to the underlying + pandas DataFrame. + + Args: + attr (str): The name of the attribute being accessed. + + Returns: + Any: The value of the attribute from the pandas DataFrame. + """ + return getattr(self.data, attr) + + def __getitem__(self: Self, key: Union[int, str]) -> Any: # noqa: ANN401 + """ + Returns an element or a slice of the underlying pandas DataFrame. + + Args: + key: The key or slice to access the data. + + Returns: + pd.DataFrame or any: The selected element or slice of data. + """ + return self.data[key] diff --git a/tests/conftest.py b/tests/conftest.py index a8f796a..967ad2a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,8 @@ https://docs.pytest.org/en/7.1.x/explanation/fixtures.html """ +import pathlib + import pandas as pd import pytest @@ -24,3 +26,42 @@ def fixture_basic_outlier_dataframe(): Creates basic example data for use in tests """ return pd.DataFrame({"example_feature": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + + +@pytest.fixture(name="basic_outlier_csv") +def fixture_basic_outlier_csv( + tmp_path: pathlib.Path, basic_outlier_dataframe: pd.DataFrame +): + """ + Creates basic example data csv for use in tests + """ + + basic_outlier_dataframe.to_csv(csv_path := tmp_path / "example.csv") + + return csv_path + + +@pytest.fixture(name="basic_outlier_tsv") +def fixture_basic_outlier_tsv( + tmp_path: pathlib.Path, basic_outlier_dataframe: pd.DataFrame +): + """ + Creates basic example data tsv for use in tests + """ + + basic_outlier_dataframe.to_csv(tsv_path := tmp_path / "example.tsv", sep="\t") + + return tsv_path + + +@pytest.fixture(name="basic_outlier_parquet") +def fixture_basic_outlier_parquet( + tmp_path: pathlib.Path, basic_outlier_dataframe: pd.DataFrame +): + """ + Creates basic example data parquet for use in tests + """ + + basic_outlier_dataframe.to_parquet(parquet_path := tmp_path / "example.parquet") + + return parquet_path diff --git a/tests/test_analyze.py b/tests/test_analyze.py index 6c7a879..a55cf6b 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -7,7 +7,7 @@ from cosmicqc import analyze -def test_find_outliers_basic(basic_outlier_dataframe: pd.DataFrame): +def test_find_outliers_basic_dataframe(basic_outlier_dataframe: pd.DataFrame): """ Testing find_outliers with basic/simulated data. """ @@ -27,6 +27,21 @@ def test_find_outliers_basic(basic_outlier_dataframe: pd.DataFrame): } +def test_find_outliers_basic_csv(basic_outlier_csv: str): + """ + Testing find_outliers with csv data. + """ + + # assert that we have the output we expect + assert analyze.find_outliers( + df=basic_outlier_csv, + feature_thresholds={"example_feature": 1}, + metadata_columns=[], + ).to_dict(orient="dict") == { + "example_feature": {8: 9, 9: 10}, + } + + def test_find_outliers_cfret(cytotable_CFReT_data_df: pd.DataFrame): """ Testing find_outliers with CytoTable CFReT data. @@ -315,6 +330,7 @@ def test_find_outliers_dict_and_default_config_cfret( def test_label_outliers( basic_outlier_dataframe: pd.DataFrame, + basic_outlier_csv: str, cytotable_CFReT_data_df: pd.DataFrame, ): """ diff --git a/tests/test_qcdataframe.py b/tests/test_qcdataframe.py new file mode 100644 index 0000000..1124305 --- /dev/null +++ b/tests/test_qcdataframe.py @@ -0,0 +1,42 @@ +""" +Tests cosmicqc qcdataframe module +""" + +import pandas as pd +from cosmicqc.qcdataframe import QCDataFrame + +def test_qcdataframe_init_with_dataframe(basic_outlier_dataframe: pd.DataFrame): + """ + Tests QCDataFrame with pd.DataFrame input. + """ + qc_df = QCDataFrame(data=basic_outlier_dataframe) + assert qc_df.reference == "pd.DataFrame" + assert qc_df.equals(basic_outlier_dataframe) + +def test_qcdataframe_init_with_csv(basic_outlier_csv:str): + """ + Tests QCDataFrame with CSV input. + """ + qc_df = QCDataFrame(data=basic_outlier_csv) + expected_df = pd.read_csv(basic_outlier_csv) + assert qc_df.reference == basic_outlier_csv + assert qc_df.equals(expected_df) + +def test_qcdataframe_init_with_tsv(basic_outlier_tsv:str): + """ + Tests QCDataFrame with TSV input. + """ + qc_df = QCDataFrame(data=basic_outlier_tsv) + expected_df = pd.read_csv(basic_outlier_tsv, delimiter='\t') + assert qc_df.reference == basic_outlier_tsv + assert qc_df.equals(expected_df) + +def test_qcdataframe_init_with_parquet(basic_outlier_parquet:str): + """ + Tests QCDataFrame with TSV input. + """ + qc_df = QCDataFrame(data=basic_outlier_parquet) + expected_df = pd.read_parquet(basic_outlier_parquet) + assert qc_df.reference == basic_outlier_parquet + assert qc_df.equals(expected_df) + From f3003e1de72a83a281308149acefc29fe4e63683 Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 10 Jun 2024 14:04:54 -0600 Subject: [PATCH 04/40] linting --- tests/test_qcdataframe.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/test_qcdataframe.py b/tests/test_qcdataframe.py index 1124305..1b1fad4 100644 --- a/tests/test_qcdataframe.py +++ b/tests/test_qcdataframe.py @@ -5,6 +5,7 @@ import pandas as pd from cosmicqc.qcdataframe import QCDataFrame + def test_qcdataframe_init_with_dataframe(basic_outlier_dataframe: pd.DataFrame): """ Tests QCDataFrame with pd.DataFrame input. @@ -13,7 +14,8 @@ def test_qcdataframe_init_with_dataframe(basic_outlier_dataframe: pd.DataFrame): assert qc_df.reference == "pd.DataFrame" assert qc_df.equals(basic_outlier_dataframe) -def test_qcdataframe_init_with_csv(basic_outlier_csv:str): + +def test_qcdataframe_init_with_csv(basic_outlier_csv: str): """ Tests QCDataFrame with CSV input. """ @@ -22,16 +24,18 @@ def test_qcdataframe_init_with_csv(basic_outlier_csv:str): assert qc_df.reference == basic_outlier_csv assert qc_df.equals(expected_df) -def test_qcdataframe_init_with_tsv(basic_outlier_tsv:str): + +def test_qcdataframe_init_with_tsv(basic_outlier_tsv: str): """ Tests QCDataFrame with TSV input. """ qc_df = QCDataFrame(data=basic_outlier_tsv) - expected_df = pd.read_csv(basic_outlier_tsv, delimiter='\t') + expected_df = pd.read_csv(basic_outlier_tsv, delimiter="\t") assert qc_df.reference == basic_outlier_tsv assert qc_df.equals(expected_df) -def test_qcdataframe_init_with_parquet(basic_outlier_parquet:str): + +def test_qcdataframe_init_with_parquet(basic_outlier_parquet: str): """ Tests QCDataFrame with TSV input. """ @@ -39,4 +43,3 @@ def test_qcdataframe_init_with_parquet(basic_outlier_parquet:str): expected_df = pd.read_parquet(basic_outlier_parquet) assert qc_df.reference == basic_outlier_parquet assert qc_df.equals(expected_df) - From b97f3a57aaa273b19fd61276db5bfe391b21de6d Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 10 Jun 2024 16:14:27 -0600 Subject: [PATCH 05/40] adding tests --- src/cosmicqc/qcdataframe.py | 4 ---- tests/conftest.py | 6 +++--- tests/test_analyze.py | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/cosmicqc/qcdataframe.py b/src/cosmicqc/qcdataframe.py index 3ed24fa..8498311 100644 --- a/src/cosmicqc/qcdataframe.py +++ b/src/cosmicqc/qcdataframe.py @@ -41,10 +41,6 @@ def __init__( Additional keyword arguments to pass to the pandas read functions. """ - # print(data) - # print(type(data)) - # print(isinstance(data, QCDataFrame)) - if isinstance(data, pd.DataFrame): # if data is a pd.DataFrame, remember this within the reference attr self.reference = "pd.DataFrame" diff --git a/tests/conftest.py b/tests/conftest.py index 967ad2a..ad0c17e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -36,7 +36,7 @@ def fixture_basic_outlier_csv( Creates basic example data csv for use in tests """ - basic_outlier_dataframe.to_csv(csv_path := tmp_path / "example.csv") + basic_outlier_dataframe.to_csv(csv_path := tmp_path / "example.csv", index=False) return csv_path @@ -49,7 +49,7 @@ def fixture_basic_outlier_tsv( Creates basic example data tsv for use in tests """ - basic_outlier_dataframe.to_csv(tsv_path := tmp_path / "example.tsv", sep="\t") + basic_outlier_dataframe.to_csv(tsv_path := tmp_path / "example.tsv", sep="\t", index=False) return tsv_path @@ -62,6 +62,6 @@ def fixture_basic_outlier_parquet( Creates basic example data parquet for use in tests """ - basic_outlier_dataframe.to_parquet(parquet_path := tmp_path / "example.parquet") + basic_outlier_dataframe.to_parquet(parquet_path := tmp_path / "example.parquet", index=False) return parquet_path diff --git a/tests/test_analyze.py b/tests/test_analyze.py index a55cf6b..26ea6db 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -337,6 +337,21 @@ def test_label_outliers( Tests label_outliers """ + pd.testing.assert_frame_equal( + analyze.label_outliers( + df=basic_outlier_dataframe, + feature_thresholds={"example_feature": 1}, + include_threshold_scores=True, + ), + analyze.label_outliers( + df=basic_outlier_csv, + feature_thresholds={"example_feature": 1}, + include_threshold_scores=True, + ), + ) + + + # test basic single-column result with zscores assert analyze.label_outliers( df=basic_outlier_dataframe, From 1069a4af958de2ff12041d4282ca55c457ce7143 Mon Sep 17 00:00:00 2001 From: d33bs Date: Mon, 10 Jun 2024 16:14:38 -0600 Subject: [PATCH 06/40] linting --- tests/conftest.py | 8 ++++++-- tests/test_analyze.py | 2 -- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index ad0c17e..e3fb24c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -49,7 +49,9 @@ def fixture_basic_outlier_tsv( Creates basic example data tsv for use in tests """ - basic_outlier_dataframe.to_csv(tsv_path := tmp_path / "example.tsv", sep="\t", index=False) + basic_outlier_dataframe.to_csv( + tsv_path := tmp_path / "example.tsv", sep="\t", index=False + ) return tsv_path @@ -62,6 +64,8 @@ def fixture_basic_outlier_parquet( Creates basic example data parquet for use in tests """ - basic_outlier_dataframe.to_parquet(parquet_path := tmp_path / "example.parquet", index=False) + basic_outlier_dataframe.to_parquet( + parquet_path := tmp_path / "example.parquet", index=False + ) return parquet_path diff --git a/tests/test_analyze.py b/tests/test_analyze.py index 26ea6db..21244b9 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -350,8 +350,6 @@ def test_label_outliers( ), ) - - # test basic single-column result with zscores assert analyze.label_outliers( df=basic_outlier_dataframe, From d52a89f639fe9395f0a3384677333996a39b8233 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 11 Jun 2024 08:48:27 -0600 Subject: [PATCH 07/40] update name, tests --- src/cosmicqc/__init__.py | 4 +- src/cosmicqc/analyze.py | 32 +++++++------- .../{qcdataframe.py => scdataframe.py} | 33 ++++++++------ tests/test_analyze.py | 16 +++++++ tests/test_qcdataframe.py | 44 +++++++++---------- 5 files changed, 76 insertions(+), 53 deletions(-) rename src/cosmicqc/{qcdataframe.py => scdataframe.py} (77%) diff --git a/src/cosmicqc/__init__.py b/src/cosmicqc/__init__.py index 82f4e10..05b2bbe 100644 --- a/src/cosmicqc/__init__.py +++ b/src/cosmicqc/__init__.py @@ -3,8 +3,8 @@ """ from .analyze import find_outliers -from .qcdataframe import QCDataFrame +from .scdataframe import SCDataFrame # note: version placeholder is updated during build # by poetry-dynamic-versioning. -__version__ = "0.0.1" +__version__ = "0.0.0" diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index 70833a6..d616f78 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -11,7 +11,7 @@ import yaml from scipy.stats import zscore as scipy_zscore -from .qcdataframe import QCDataFrame +from .scdataframe import SCDataFrame DEFAULT_QC_THRESHOLD_FILE = ( f"{pathlib.Path(__file__).parent!s}/data/qc_nuclei_thresholds_default.yml" @@ -19,7 +19,7 @@ def identify_outliers( - df: Union[QCDataFrame, pd.DataFrame, str], + df: Union[SCDataFrame, pd.DataFrame, str], feature_thresholds: Union[Dict[str, float], str], feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, include_threshold_scores: bool = False, @@ -32,7 +32,7 @@ def identify_outliers( threshold of 0 as that would represent the whole dataset. Args: - df: Union[QCDataFrame, pd.DataFrame, str] + df: Union[SCDataFrame, pd.DataFrame, str] DataFrame or file with converted output from CytoTable. metadata_columns: List[str] List of metadata columns that should be outputted with the outlier data. @@ -54,9 +54,9 @@ def identify_outliers( or not for use within other functions. """ - # interpret the df as QCDataFrame - if not isinstance(df, QCDataFrame): - df = QCDataFrame(data=df) + # interpret the df as SCDataFrame + if not isinstance(df, SCDataFrame): + df = SCDataFrame(data=df) # create a copy of the dataframe to ensure # we don't modify the supplied dataframe inplace. @@ -113,7 +113,7 @@ def identify_outliers( def find_outliers( - df: Union[QCDataFrame, pd.DataFrame, str], + df: Union[SCDataFrame, pd.DataFrame, str], metadata_columns: List[str], feature_thresholds: Union[Dict[str, float], str], feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, @@ -123,7 +123,7 @@ def find_outliers( with only the outliers and provided metadata columns. Args: - df: Union[QCDataFrame, pd.DataFrame, str] + df: Union[SCDataFrame, pd.DataFrame, str] DataFrame or file with converted output from CytoTable. metadata_columns: List[str] List of metadata columns that should be outputted with the outlier data. @@ -144,9 +144,9 @@ def find_outliers( Outlier data frame for the given conditions. """ - # interpret the df as QCDataFrame - if not isinstance(df, QCDataFrame): - df = QCDataFrame(data=df) + # interpret the df as SCDataFrame + if not isinstance(df, SCDataFrame): + df = SCDataFrame(data=df) if isinstance(feature_thresholds, str): feature_thresholds = read_thresholds_set_from_file( @@ -179,7 +179,7 @@ def find_outliers( def label_outliers( - df: Union[QCDataFrame, pd.DataFrame, str], + df: Union[SCDataFrame, pd.DataFrame, str], feature_thresholds: Optional[Union[Dict[str, float], str]] = None, feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, include_threshold_scores: bool = False, @@ -189,7 +189,7 @@ def label_outliers( where a cell passed or failed the quality control condition(s). Args: - df: Union[QCDataFrame, pd.DataFrame, str] + df: Union[SCDataFrame, pd.DataFrame, str] DataFrame or file with converted output from CytoTable. feature_thresholds: Dict[str, float] One of two options: @@ -211,9 +211,9 @@ def label_outliers( Full dataframe with optional scores and outlier boolean column. """ - # interpret the df as QCDataFrame - if not isinstance(df, QCDataFrame): - df = QCDataFrame(data=df) + # interpret the df as SCDataFrame + if not isinstance(df, SCDataFrame): + df = SCDataFrame(data=df) # for single outlier processing if isinstance(feature_thresholds, (str, dict)): diff --git a/src/cosmicqc/qcdataframe.py b/src/cosmicqc/scdataframe.py similarity index 77% rename from src/cosmicqc/qcdataframe.py rename to src/cosmicqc/scdataframe.py index 8498311..da0e1c1 100644 --- a/src/cosmicqc/qcdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -1,5 +1,5 @@ """ -Defines a QCDataFrame class for use in coSMicQC. +Defines a SCDataFrame class for use in coSMicQC. """ import pathlib @@ -8,7 +8,7 @@ import pandas as pd -class QCDataFrame: +class SCDataFrame: """ A class to handle and load different types of data files into a pandas DataFrame. @@ -17,8 +17,8 @@ class QCDataFrame: pandas DataFrame. Attributes: - reference (str): - A string indicating the type of data source, either 'pd.DataFrame' + data_source (str): + A string indicating the data source, either 'pd.DataFrame' or the file path. data (pd.DataFrame): The loaded data in a pandas DataFrame. @@ -26,13 +26,19 @@ class QCDataFrame: Methods: __call__(): Returns the underlying pandas DataFrame. + __repr__(): + Returns representation of underlying pandas DataFrame. + __getattr__(): + Returns underlying attributes of pandas DataFrame. + __getitem__(): + Returns slice of data from pandas DataFrame. """ def __init__( self: Self, data: Union[pd.DataFrame, str], **kwargs: Dict[str, Any] ) -> None: """ - Initializes the QCDataFrame with either a DataFrame or a file path. + Initializes the SCDataFrame with either a DataFrame or a file path. Args: data (Union[pd.DataFrame, str]): @@ -42,14 +48,14 @@ def __init__( """ if isinstance(data, pd.DataFrame): - # if data is a pd.DataFrame, remember this within the reference attr - self.reference = "pd.DataFrame" + # if data is a pd.DataFrame, remember this within the data_source attr + self.data_source = "pd.DataFrame" self.data = data elif isinstance(data, pathlib.Path | str): # if the data is a string, remember the original source - # through a reference attr - self.reference = data + # through a data_source attr + self.data_source = data # interpret the data through pathlib data_path = pathlib.Path(data) @@ -64,9 +70,10 @@ def __init__( elif data_path.suffix == ".parquet": # read as a Parquet file self.data = pd.read_parquet(data, **kwargs) - + else: + raise ValueError("Unsupported file format for SCDataFrame.") else: - raise ValueError("Unsupported file format for QCDataFrame.") + raise ValueError("Unsupported input type for SCDataFrame.") def __call__(self: Self) -> pd.DataFrame: """ @@ -79,12 +86,12 @@ def __call__(self: Self) -> pd.DataFrame: def __repr__(self: Self) -> pd.DataFrame: """ - Returns the underlying pandas DataFrame. + Returns the representation of underlying pandas DataFrame. Returns: pd.DataFrame: The data in a pandas DataFrame. """ - return self.data + return repr(self.data) def __getattr__(self: Self, attr: str) -> Any: # noqa: ANN401 """ diff --git a/tests/test_analyze.py b/tests/test_analyze.py index 21244b9..6ade9a2 100644 --- a/tests/test_analyze.py +++ b/tests/test_analyze.py @@ -337,6 +337,7 @@ def test_label_outliers( Tests label_outliers """ + # compare the dataframe vs csv output to make sure they are equivalent pd.testing.assert_frame_equal( analyze.label_outliers( df=basic_outlier_dataframe, @@ -451,12 +452,27 @@ def test_label_outliers( def test_identify_outliers( basic_outlier_dataframe: pd.DataFrame, + basic_outlier_csv: str, cytotable_CFReT_data_df: pd.DataFrame, ): """ Tests identify_outliers """ + # show that dataframe and csv output are the same + pd.testing.assert_frame_equal( + analyze.identify_outliers( + df=basic_outlier_dataframe, + feature_thresholds={"example_feature": 1}, + include_threshold_scores=True, + ), + analyze.identify_outliers( + df=basic_outlier_csv, + feature_thresholds={"example_feature": 1}, + include_threshold_scores=True, + ), + ) + assert analyze.identify_outliers( df=basic_outlier_dataframe, feature_thresholds={"example_feature": 1}, diff --git a/tests/test_qcdataframe.py b/tests/test_qcdataframe.py index 1b1fad4..d2ec019 100644 --- a/tests/test_qcdataframe.py +++ b/tests/test_qcdataframe.py @@ -1,45 +1,45 @@ """ -Tests cosmicqc qcdataframe module +Tests cosmicqc SCDataFrame module """ import pandas as pd -from cosmicqc.qcdataframe import QCDataFrame +from cosmicqc.scdataframe import SCDataFrame -def test_qcdataframe_init_with_dataframe(basic_outlier_dataframe: pd.DataFrame): +def test_SCDataFrame_init_with_dataframe(basic_outlier_dataframe: pd.DataFrame): """ - Tests QCDataFrame with pd.DataFrame input. + Tests SCDataFrame with pd.DataFrame input. """ - qc_df = QCDataFrame(data=basic_outlier_dataframe) - assert qc_df.reference == "pd.DataFrame" - assert qc_df.equals(basic_outlier_dataframe) + sc_df = SCDataFrame(data=basic_outlier_dataframe) + assert sc_df.data_source == "pd.DataFrame" + assert sc_df.equals(basic_outlier_dataframe) -def test_qcdataframe_init_with_csv(basic_outlier_csv: str): +def test_SCDataFrame_init_with_csv(basic_outlier_csv: str): """ - Tests QCDataFrame with CSV input. + Tests SCDataFrame with CSV input. """ - qc_df = QCDataFrame(data=basic_outlier_csv) + sc_df = SCDataFrame(data=basic_outlier_csv) expected_df = pd.read_csv(basic_outlier_csv) - assert qc_df.reference == basic_outlier_csv - assert qc_df.equals(expected_df) + assert sc_df.data_source == basic_outlier_csv + assert sc_df.equals(expected_df) -def test_qcdataframe_init_with_tsv(basic_outlier_tsv: str): +def test_SCDataFrame_init_with_tsv(basic_outlier_tsv: str): """ - Tests QCDataFrame with TSV input. + Tests SCDataFrame with TSV input. """ - qc_df = QCDataFrame(data=basic_outlier_tsv) + sc_df = SCDataFrame(data=basic_outlier_tsv) expected_df = pd.read_csv(basic_outlier_tsv, delimiter="\t") - assert qc_df.reference == basic_outlier_tsv - assert qc_df.equals(expected_df) + assert sc_df.data_source == basic_outlier_tsv + assert sc_df.equals(expected_df) -def test_qcdataframe_init_with_parquet(basic_outlier_parquet: str): +def test_SCDataFrame_init_with_parquet(basic_outlier_parquet: str): """ - Tests QCDataFrame with TSV input. + Tests SCDataFrame with TSV input. """ - qc_df = QCDataFrame(data=basic_outlier_parquet) + sc_df = SCDataFrame(data=basic_outlier_parquet) expected_df = pd.read_parquet(basic_outlier_parquet) - assert qc_df.reference == basic_outlier_parquet - assert qc_df.equals(expected_df) + assert sc_df.data_source == basic_outlier_parquet + assert sc_df.equals(expected_df) From 1bf8262dcdf160592ffef24cdb0c62b9c0c277a4 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 11 Jun 2024 08:58:09 -0600 Subject: [PATCH 08/40] add back compat for self type --- src/cosmicqc/scdataframe.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index da0e1c1..37e52a7 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -3,10 +3,14 @@ """ import pathlib -from typing import Any, Dict, Self, Union +from typing import Any, Dict, TypeVar, Union import pandas as pd +# provide backwards compatibility for Self type in earlier Python versions. +# see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods +Self_SCDataFrame = TypeVar("Self_SCDataFrame", bound="SCDataFrame") + class SCDataFrame: """ @@ -35,7 +39,7 @@ class SCDataFrame: """ def __init__( - self: Self, data: Union[pd.DataFrame, str], **kwargs: Dict[str, Any] + self: Self_SCDataFrame, data: Union[pd.DataFrame, str], **kwargs: Dict[str, Any] ) -> None: """ Initializes the SCDataFrame with either a DataFrame or a file path. @@ -75,7 +79,7 @@ def __init__( else: raise ValueError("Unsupported input type for SCDataFrame.") - def __call__(self: Self) -> pd.DataFrame: + def __call__(self: Self_SCDataFrame) -> pd.DataFrame: """ Returns the underlying pandas DataFrame. @@ -84,7 +88,7 @@ def __call__(self: Self) -> pd.DataFrame: """ return self.data - def __repr__(self: Self) -> pd.DataFrame: + def __repr__(self: Self_SCDataFrame) -> pd.DataFrame: """ Returns the representation of underlying pandas DataFrame. @@ -93,7 +97,7 @@ def __repr__(self: Self) -> pd.DataFrame: """ return repr(self.data) - def __getattr__(self: Self, attr: str) -> Any: # noqa: ANN401 + def __getattr__(self: Self_SCDataFrame, attr: str) -> Any: # noqa: ANN401 """ Intercept attribute accesses and delegate them to the underlying pandas DataFrame. @@ -106,7 +110,7 @@ def __getattr__(self: Self, attr: str) -> Any: # noqa: ANN401 """ return getattr(self.data, attr) - def __getitem__(self: Self, key: Union[int, str]) -> Any: # noqa: ANN401 + def __getitem__(self: Self_SCDataFrame, key: Union[int, str]) -> Any: # noqa: ANN401 """ Returns an element or a slice of the underlying pandas DataFrame. From 772f89560e7fac449cd4bf6396779ef5182b46d9 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 11 Jun 2024 09:07:13 -0600 Subject: [PATCH 09/40] back compat for isinstance --- src/cosmicqc/scdataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index 37e52a7..3fc349c 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -56,7 +56,7 @@ def __init__( self.data_source = "pd.DataFrame" self.data = data - elif isinstance(data, pathlib.Path | str): + elif isinstance(data, pathlib.Path) or isinstance(data, str): # noqa: PLR1701, SIM101 # if the data is a string, remember the original source # through a data_source attr self.data_source = data From d0ea33c83e114a1454ed7448e957da97b0b18e8d Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 11 Jun 2024 09:08:18 -0600 Subject: [PATCH 10/40] linting --- src/cosmicqc/scdataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index 3fc349c..f94bdd2 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -56,7 +56,7 @@ def __init__( self.data_source = "pd.DataFrame" self.data = data - elif isinstance(data, pathlib.Path) or isinstance(data, str): # noqa: PLR1701, SIM101 + elif isinstance(data, pathlib.Path) or isinstance(data, str): # noqa: PLR1701, SIM101 # if the data is a string, remember the original source # through a data_source attr self.data_source = data From fe3fcf07dbedb13e33617d4add73e22753251443 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 11 Jun 2024 15:13:02 -0600 Subject: [PATCH 11/40] add cli for cosmicqc --- poetry.lock | 104 ++++++++++++++++++++++++++++---------------- pyproject.toml | 4 ++ src/cosmicqc/cli.py | 25 +++++++++++ 3 files changed, 95 insertions(+), 38 deletions(-) create mode 100644 src/cosmicqc/cli.py diff --git a/poetry.lock b/poetry.lock index a932b27..a8a823e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -25,6 +25,20 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "fire" +version = "0.6.0" +description = "A library for automatically generating command line interfaces." +optional = false +python-versions = "*" +files = [ + {file = "fire-0.6.0.tar.gz", hash = "sha256:54ec5b996ecdd3c0309c800324a0703d6da512241bc73b553db959d98de0aa66"}, +] + +[package.dependencies] +six = "*" +termcolor = "*" + [[package]] name = "iniconfig" version = "2.0.0" @@ -75,47 +89,47 @@ files = [ [[package]] name = "numpy" -version = "1.26.3" +version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "numpy-1.26.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf"}, - {file = "numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd"}, - {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6"}, - {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b"}, - {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178"}, - {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485"}, - {file = "numpy-1.26.3-cp310-cp310-win32.whl", hash = "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3"}, - {file = "numpy-1.26.3-cp310-cp310-win_amd64.whl", hash = "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce"}, - {file = "numpy-1.26.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374"}, - {file = "numpy-1.26.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6"}, - {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2"}, - {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda"}, - {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e"}, - {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00"}, - {file = "numpy-1.26.3-cp311-cp311-win32.whl", hash = "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b"}, - {file = "numpy-1.26.3-cp311-cp311-win_amd64.whl", hash = "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4"}, - {file = "numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13"}, - {file = "numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e"}, - {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3"}, - {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419"}, - {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166"}, - {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36"}, - {file = "numpy-1.26.3-cp312-cp312-win32.whl", hash = "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511"}, - {file = "numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b"}, - {file = "numpy-1.26.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f"}, - {file = "numpy-1.26.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f"}, - {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b"}, - {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137"}, - {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58"}, - {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb"}, - {file = "numpy-1.26.3-cp39-cp39-win32.whl", hash = "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03"}, - {file = "numpy-1.26.3-cp39-cp39-win_amd64.whl", hash = "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5"}, - {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] [[package]] @@ -526,6 +540,20 @@ files = [ {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +[[package]] +name = "termcolor" +version = "2.4.0" +description = "ANSI color formatting for output in terminal" +optional = false +python-versions = ">=3.8" +files = [ + {file = "termcolor-2.4.0-py3-none-any.whl", hash = "sha256:9297c0df9c99445c2412e832e882a7884038a25617c60cea2ad69488d4040d63"}, + {file = "termcolor-2.4.0.tar.gz", hash = "sha256:aab9e56047c8ac41ed798fa36d892a37aca6b3e9159f3e0c24bc64a9b3ac7b7a"}, +] + +[package.extras] +tests = ["pytest", "pytest-cov"] + [[package]] name = "tomli" version = "2.0.1" @@ -551,4 +579,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.13" -content-hash = "a001251df36357dba7b70e5c246a76022a313fb9e120ddc5e95f4cc1efa778b7" +content-hash = "d905f4221fc9dac67766e214aa716fafe85597c04a86494d3bdbbefb70f04fbf" diff --git a/pyproject.toml b/pyproject.toml index f3231ed..d7ec4c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,10 +19,14 @@ scipy = [ ] pyarrow = "^16.0.0" pyyaml = "^6.0.1" +fire = "^0.6.0" [tool.poetry.group.dev.dependencies] pytest = "^8.2.0" +[tool.poetry.scripts] +cosmicqc = "cosmicqc.cli:cli_analyze" + [tool.isort] profile = "black" diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py new file mode 100644 index 0000000..869ad4c --- /dev/null +++ b/src/cosmicqc/cli.py @@ -0,0 +1,25 @@ +""" +Setup coSMicQC CLI through python-fire +""" + +import fire +from . import analyze + +def cli_analyze(): + """ + Run the analyze module functions through python-fire CLI + + This function serves as the CLI entry point for functions + within the analyze module. + """ + fire.Fire(analyze) + +if __name__ == "__main__": + """ + Setup the CLI with python-fire for the coSMicQC package. + + This enables running the functions identify_outliers, find_outliers, + and label_outliers from the command line interface through analyze + """ + + fire.Fire(analyze) From 30aae3eaff45ad19cf7e57ab7085b18cf6178603 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 11 Jun 2024 15:13:35 -0600 Subject: [PATCH 12/40] linting --- src/cosmicqc/cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py index 869ad4c..454bf55 100644 --- a/src/cosmicqc/cli.py +++ b/src/cosmicqc/cli.py @@ -3,9 +3,11 @@ """ import fire + from . import analyze -def cli_analyze(): + +def cli_analyze() -> None: """ Run the analyze module functions through python-fire CLI @@ -14,6 +16,7 @@ def cli_analyze(): """ fire.Fire(analyze) + if __name__ == "__main__": """ Setup the CLI with python-fire for the coSMicQC package. From 2bd3d5f79e37e7384ca09e2db82d745257eb7d20 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 11 Jun 2024 16:43:53 -0600 Subject: [PATCH 13/40] add tests and wrappers --- example.csv | 11 +++ poetry.lock | 162 ++++++++++++++++++++++++++++++--------- pyproject.toml | 1 + src/cosmicqc/__init__.py | 2 +- src/cosmicqc/analyze.py | 19 +++-- src/cosmicqc/utils.py | 25 ++++++ tests/__init__.py | 0 tests/test_cli.py | 54 +++++++++++++ tests/utils.py | 20 +++++ 9 files changed, 248 insertions(+), 46 deletions(-) create mode 100644 example.csv create mode 100644 src/cosmicqc/utils.py create mode 100644 tests/__init__.py create mode 100644 tests/test_cli.py create mode 100644 tests/utils.py diff --git a/example.csv b/example.csv new file mode 100644 index 0000000..08055b3 --- /dev/null +++ b/example.csv @@ -0,0 +1,11 @@ +,example_feature +0,1 +1,2 +2,3 +3,4 +4,5 +5,6 +6,7 +7,8 +8,9 +9,10 diff --git a/poetry.lock b/poetry.lock index a8a823e..593a1ee 100644 --- a/poetry.lock +++ b/poetry.lock @@ -50,6 +50,92 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "jinja2" +version = "3.1.4" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, + {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "markupsafe" +version = "2.1.5" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, + {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, +] + [[package]] name = "numpy" version = "1.24.4" @@ -89,47 +175,47 @@ files = [ [[package]] name = "numpy" -version = "1.26.4" +version = "1.26.3" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, - {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, - {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, - {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, - {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, - {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, - {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, - {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, - {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, - {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, - {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, - {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, - {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, - {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, - {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, - {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, - {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, - {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, - {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, - {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, - {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, - {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, - {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, - {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, - {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, - {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, - {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, - {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, - {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, - {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, - {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, - {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, - {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, - {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, - {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, - {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, + {file = "numpy-1.26.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf"}, + {file = "numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd"}, + {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6"}, + {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b"}, + {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178"}, + {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485"}, + {file = "numpy-1.26.3-cp310-cp310-win32.whl", hash = "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3"}, + {file = "numpy-1.26.3-cp310-cp310-win_amd64.whl", hash = "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce"}, + {file = "numpy-1.26.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374"}, + {file = "numpy-1.26.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6"}, + {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2"}, + {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda"}, + {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e"}, + {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00"}, + {file = "numpy-1.26.3-cp311-cp311-win32.whl", hash = "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b"}, + {file = "numpy-1.26.3-cp311-cp311-win_amd64.whl", hash = "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4"}, + {file = "numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13"}, + {file = "numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e"}, + {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3"}, + {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419"}, + {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166"}, + {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36"}, + {file = "numpy-1.26.3-cp312-cp312-win32.whl", hash = "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511"}, + {file = "numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b"}, + {file = "numpy-1.26.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f"}, + {file = "numpy-1.26.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f"}, + {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b"}, + {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137"}, + {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58"}, + {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb"}, + {file = "numpy-1.26.3-cp39-cp39-win32.whl", hash = "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03"}, + {file = "numpy-1.26.3-cp39-cp39-win_amd64.whl", hash = "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2"}, + {file = "numpy-1.26.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e"}, + {file = "numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0"}, + {file = "numpy-1.26.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5"}, + {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"}, ] [[package]] @@ -579,4 +665,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.13" -content-hash = "d905f4221fc9dac67766e214aa716fafe85597c04a86494d3bdbbefb70f04fbf" +content-hash = "097ed443c49a9a397d357d315b58cd467ca0e90047f8c624b77a3d4569c14ef2" diff --git a/pyproject.toml b/pyproject.toml index d7ec4c6..1d1e05c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ scipy = [ pyarrow = "^16.0.0" pyyaml = "^6.0.1" fire = "^0.6.0" +jinja2 = "^3.1.4" [tool.poetry.group.dev.dependencies] pytest = "^8.2.0" diff --git a/src/cosmicqc/__init__.py b/src/cosmicqc/__init__.py index 05b2bbe..447c688 100644 --- a/src/cosmicqc/__init__.py +++ b/src/cosmicqc/__init__.py @@ -7,4 +7,4 @@ # note: version placeholder is updated during build # by poetry-dynamic-versioning. -__version__ = "0.0.0" +__version__ = "0.0.5.post12.dev0+30aae3e" diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index d616f78..4baecee 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -6,18 +6,18 @@ import pathlib from functools import reduce from typing import Dict, List, Optional, Union - +import sys import pandas as pd import yaml from scipy.stats import zscore as scipy_zscore from .scdataframe import SCDataFrame +from .utils import print_if_cli DEFAULT_QC_THRESHOLD_FILE = ( f"{pathlib.Path(__file__).parent!s}/data/qc_nuclei_thresholds_default.yml" ) - def identify_outliers( df: Union[SCDataFrame, pd.DataFrame, str], feature_thresholds: Union[Dict[str, float], str], @@ -34,8 +34,6 @@ def identify_outliers( Args: df: Union[SCDataFrame, pd.DataFrame, str] DataFrame or file with converted output from CytoTable. - metadata_columns: List[str] - List of metadata columns that should be outputted with the outlier data. feature_thresholds: Dict[str, float] One of two options: A dictionary with the feature name(s) as the key(s) and their assigned @@ -47,6 +45,9 @@ def identify_outliers( feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, An optional feature thresholds file where thresholds may be defined within a file. + include_threshold_scores: bool + Whether to include the threshold scores in addition to whether + the threshold set passes per row. Returns: Union[pd.Series, pd.DataFrame]: @@ -54,6 +55,10 @@ def identify_outliers( or not for use within other functions. """ + print("THRESHOOLD FILE:",) + print(df, feature_thresholds, feature_thresholds_file, include_threshold_scores) + + # interpret the df as SCDataFrame if not isinstance(df, SCDataFrame): df = SCDataFrame(data=df) @@ -95,7 +100,7 @@ def identify_outliers( condition = outlier_df[zscore_columns[feature]] < threshold conditions.append(condition) - return ( + return print_if_cli( # create a boolean pd.series identifier for dataframe # based on all conditions for use within other functions. reduce(operator.and_, conditions) @@ -175,7 +180,7 @@ def find_outliers( columns_to_include = list(feature_thresholds.keys()) + metadata_columns # Return outliers DataFrame with specified columns - return outliers_df[columns_to_include] + return print_if_cli(outliers_df[columns_to_include]) def label_outliers( @@ -265,7 +270,7 @@ def label_outliers( axis=1, ) # return a dataframe with a deduplicated columns by name - return labeled_df.loc[:, ~labeled_df.columns.duplicated()] + return print_if_cli(labeled_df.loc[:, ~labeled_df.columns.duplicated()]) def read_thresholds_set_from_file( diff --git a/src/cosmicqc/utils.py b/src/cosmicqc/utils.py new file mode 100644 index 0000000..15eb35d --- /dev/null +++ b/src/cosmicqc/utils.py @@ -0,0 +1,25 @@ +""" +Utility functions for coSMicQC +""" + +import sys +from typing import Optional +from .scdataframe import SCDataFrame + +def print_if_cli(data: SCDataFrame) -> Optional[SCDataFrame]: + """ + Prints the data if the script is invoked from the cosmicqc command-line interface. + + Args: + data (SCDataFrame): + The data to print or return. + + Returns: + Optional[SCDataFrame]: + Returns the SCDataFrame if not run through CLI + otherwise prints the SCDataFrame and returns None. + """ + if any("bin/cosmicqc" in path for path in sys.argv): + print(data) + return False + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..e33e791 --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,54 @@ +import subprocess +import pytest +import json +import os +import pathlib +from .utils import run_cli_command + + + +def test_cli_util(): + """ + Test the `identify_outliers` function of the CLI. + """ + + command = f"""echo 'hello world'""" + stdout, stderr, returncode = run_cli_command(command) + + assert returncode == 0 + +def test_cli_identify_outliers(basic_outlier_csv): + """ + Test the `identify_outliers` function of the CLI. + """ + + command = f"""cosmicqc identify_outliers --df {basic_outlier_csv} --feature_thresholds {"col1":1.0}""" + stdout, stderr, returncode = run_cli_command(command) + + print(stderr) + print(stdout) + + assert returncode == 0 + assert "outlier_custom" in stdout or "outlier_custom" in stderr + +def test_cli_find_outliers(basic_outlier_csv): + """ + Test the `find_outliers` function of the CLI. + """ + + command = f"""cosmicqc find_outliers {basic_outlier_csv} --metadata_columns '[\"col1\"]' --feature_thresholds '{"col1": 1.0}'""" + stdout, stderr, returncode = run_cli_command(command) + + assert returncode == 0 + assert "outlier_custom" in stdout or "outlier_custom" in stderr + +def test_cli_label_outliers(basic_outlier_csv): + """ + Test the `label_outliers` function of the CLI. + """ + + command = f"""cosmicqc label_outliers {basic_outlier_csv} --feature_thresholds '{"col1": 1.0}'""" + stdout, stderr, returncode = run_cli_command(command) + + assert returncode == 0 + assert "outlier_custom" in stdout or "outlier_custom" in stderr diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..d7cb882 --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,20 @@ +""" +Utilities for running pytest tests in coSMicQC +""" + +import subprocess +from typing import List + +def run_cli_command(command: str): + """ + Run a CLI command using subprocess and capture the output and return code. + + Args: + command (list): The command to run as a list of strings. + + Returns: + tuple: (stdout, stderr, returncode) + """ + + result = subprocess.run(command.split(" "), capture_output=True, text=True) + return result.stdout, result.stderr, result.returncode From b5c9997aba5d4cb89034188bf721d67dcaa50602 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 11 Jun 2024 19:55:14 -0600 Subject: [PATCH 14/40] linting and test adjustment --- src/cosmicqc/analyze.py | 8 +++++--- src/cosmicqc/utils.py | 5 +++-- tests/test_cli.py | 41 +++++++++++++++++++++++++---------------- tests/utils.py | 6 ++++-- 4 files changed, 37 insertions(+), 23 deletions(-) diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index 4baecee..52e7969 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -6,7 +6,7 @@ import pathlib from functools import reduce from typing import Dict, List, Optional, Union -import sys + import pandas as pd import yaml from scipy.stats import zscore as scipy_zscore @@ -18,6 +18,7 @@ f"{pathlib.Path(__file__).parent!s}/data/qc_nuclei_thresholds_default.yml" ) + def identify_outliers( df: Union[SCDataFrame, pd.DataFrame, str], feature_thresholds: Union[Dict[str, float], str], @@ -55,10 +56,11 @@ def identify_outliers( or not for use within other functions. """ - print("THRESHOOLD FILE:",) + print( + "THRESHOOLD FILE:", + ) print(df, feature_thresholds, feature_thresholds_file, include_threshold_scores) - # interpret the df as SCDataFrame if not isinstance(df, SCDataFrame): df = SCDataFrame(data=df) diff --git a/src/cosmicqc/utils.py b/src/cosmicqc/utils.py index 15eb35d..1ec1183 100644 --- a/src/cosmicqc/utils.py +++ b/src/cosmicqc/utils.py @@ -4,8 +4,10 @@ import sys from typing import Optional + from .scdataframe import SCDataFrame + def print_if_cli(data: SCDataFrame) -> Optional[SCDataFrame]: """ Prints the data if the script is invoked from the cosmicqc command-line interface. @@ -21,5 +23,4 @@ def print_if_cli(data: SCDataFrame) -> Optional[SCDataFrame]: """ if any("bin/cosmicqc" in path for path in sys.argv): print(data) - return False - + return data diff --git a/tests/test_cli.py b/tests/test_cli.py index e33e791..2952ba5 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,29 +1,28 @@ -import subprocess -import pytest -import json -import os -import pathlib from .utils import run_cli_command - def test_cli_util(): """ Test the `identify_outliers` function of the CLI. """ - command = f"""echo 'hello world'""" + command = """echo 'hello world'""" stdout, stderr, returncode = run_cli_command(command) assert returncode == 0 -def test_cli_identify_outliers(basic_outlier_csv): + +def test_cli_identify_outliers(basic_outlier_csv: str): """ Test the `identify_outliers` function of the CLI. """ - command = f"""cosmicqc identify_outliers --df {basic_outlier_csv} --feature_thresholds {"col1":1.0}""" - stdout, stderr, returncode = run_cli_command(command) + stdout, stderr, returncode = run_cli_command( + ( + f"""cosmicqc identify_outliers --df {basic_outlier_csv}""" + """ --feature_thresholds {"example_feature":1.0}""" + ) + ) print(stderr) print(stdout) @@ -31,24 +30,34 @@ def test_cli_identify_outliers(basic_outlier_csv): assert returncode == 0 assert "outlier_custom" in stdout or "outlier_custom" in stderr -def test_cli_find_outliers(basic_outlier_csv): + +def test_cli_find_outliers(basic_outlier_csv: str): """ Test the `find_outliers` function of the CLI. """ - command = f"""cosmicqc find_outliers {basic_outlier_csv} --metadata_columns '[\"col1\"]' --feature_thresholds '{"col1": 1.0}'""" - stdout, stderr, returncode = run_cli_command(command) + stdout, stderr, returncode = run_cli_command( + ( + f"""cosmicqc find_outliers {basic_outlier_csv}""" + """ --metadata_columns '[\"col1\"]' --feature_thresholds '{"example_feature": 1.0}'""" + ) + ) assert returncode == 0 assert "outlier_custom" in stdout or "outlier_custom" in stderr -def test_cli_label_outliers(basic_outlier_csv): + +def test_cli_label_outliers(basic_outlier_csv: str): """ Test the `label_outliers` function of the CLI. """ - command = f"""cosmicqc label_outliers {basic_outlier_csv} --feature_thresholds '{"col1": 1.0}'""" - stdout, stderr, returncode = run_cli_command(command) + stdout, stderr, returncode = run_cli_command( + ( + f"""cosmicqc label_outliers {basic_outlier_csv}""" + """ --feature_thresholds '{"example_feature": 1.0}'""" + ) + ) assert returncode == 0 assert "outlier_custom" in stdout or "outlier_custom" in stderr diff --git a/tests/utils.py b/tests/utils.py index d7cb882..c3abfc3 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -3,7 +3,7 @@ """ import subprocess -from typing import List + def run_cli_command(command: str): """ @@ -16,5 +16,7 @@ def run_cli_command(command: str): tuple: (stdout, stderr, returncode) """ - result = subprocess.run(command.split(" "), capture_output=True, text=True) + result = subprocess.run( + command.split(" "), capture_output=True, text=True, check=False + ) return result.stdout, result.stderr, result.returncode From bf40aedc90d33eac5eb9353c28f500ef0e463924 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 12 Jun 2024 06:11:33 -0600 Subject: [PATCH 15/40] attempting wrapper --- src/cosmicqc/analyze.py | 8 ++++---- src/cosmicqc/cli.py | 20 ++++++++++++++++++++ src/cosmicqc/utils.py | 15 ++++++++++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index 52e7969..8cfe8a8 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -12,7 +12,7 @@ from scipy.stats import zscore as scipy_zscore from .scdataframe import SCDataFrame -from .utils import print_if_cli +from .cli import cli_df_to_string DEFAULT_QC_THRESHOLD_FILE = ( f"{pathlib.Path(__file__).parent!s}/data/qc_nuclei_thresholds_default.yml" @@ -102,7 +102,7 @@ def identify_outliers( condition = outlier_df[zscore_columns[feature]] < threshold conditions.append(condition) - return print_if_cli( + return ( # create a boolean pd.series identifier for dataframe # based on all conditions for use within other functions. reduce(operator.and_, conditions) @@ -182,7 +182,7 @@ def find_outliers( columns_to_include = list(feature_thresholds.keys()) + metadata_columns # Return outliers DataFrame with specified columns - return print_if_cli(outliers_df[columns_to_include]) + return outliers_df[columns_to_include] def label_outliers( @@ -272,7 +272,7 @@ def label_outliers( axis=1, ) # return a dataframe with a deduplicated columns by name - return print_if_cli(labeled_df.loc[:, ~labeled_df.columns.duplicated()]) + return labeled_df.loc[:, ~labeled_df.columns.duplicated()] def read_thresholds_set_from_file( diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py index 454bf55..2da1cd6 100644 --- a/src/cosmicqc/cli.py +++ b/src/cosmicqc/cli.py @@ -4,8 +4,23 @@ import fire +from functools import wraps from . import analyze +def cli_df_to_string(func: object) -> object: + """ + See https://github.com/google/python-fire/issues/274 + for why we need this (for now) + """ + @wraps(func) + def wrapper(*args, **kwargs): + import sys + print(sys.argv) + if any("bin/cosmicqc" in path for path in sys.argv): + return str(func(*args, **kwargs)) + else: + return func(*args, **kwargs) + return wrapper def cli_analyze() -> None: """ @@ -14,6 +29,11 @@ def cli_analyze() -> None: This function serves as the CLI entry point for functions within the analyze module. """ + + for key, value in analyze.__dict__.items( ): + if not key.startswith('_') and hasattr(value, '__call__' ): + setattr(analyze, key, cli_df_to_string(value)) + fire.Fire(analyze) diff --git a/src/cosmicqc/utils.py b/src/cosmicqc/utils.py index 1ec1183..924b3e5 100644 --- a/src/cosmicqc/utils.py +++ b/src/cosmicqc/utils.py @@ -5,6 +5,8 @@ import sys from typing import Optional +from functools import wraps + from .scdataframe import SCDataFrame @@ -23,4 +25,15 @@ def print_if_cli(data: SCDataFrame) -> Optional[SCDataFrame]: """ if any("bin/cosmicqc" in path for path in sys.argv): print(data) - return data + else: + return data + +def df_to_string(func): + """ + See https://github.com/google/python-fire/issues/274 + for why we need this (for now) + """ + @wraps(func) + def wrapper(*args, **kwargs): + return str(func(*args, **kwargs)) + return wrapper From feec9a4897b2286b8264731ca452472a43265fe5 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 12 Jun 2024 08:54:32 -0600 Subject: [PATCH 16/40] patch python-fire; fix tests --- src/cosmicqc/analyze.py | 6 --- src/cosmicqc/cli.py | 99 +++++++++++++++++++++++++++++++++-------- src/cosmicqc/utils.py | 39 ---------------- tests/test_cli.py | 52 +++++++++++++++++----- 4 files changed, 123 insertions(+), 73 deletions(-) delete mode 100644 src/cosmicqc/utils.py diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index 8cfe8a8..08649f9 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -12,7 +12,6 @@ from scipy.stats import zscore as scipy_zscore from .scdataframe import SCDataFrame -from .cli import cli_df_to_string DEFAULT_QC_THRESHOLD_FILE = ( f"{pathlib.Path(__file__).parent!s}/data/qc_nuclei_thresholds_default.yml" @@ -56,11 +55,6 @@ def identify_outliers( or not for use within other functions. """ - print( - "THRESHOOLD FILE:", - ) - print(df, feature_thresholds, feature_thresholds_file, include_threshold_scores) - # interpret the df as SCDataFrame if not isinstance(df, SCDataFrame): df = SCDataFrame(data=df) diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py index 2da1cd6..fb7ebcf 100644 --- a/src/cosmicqc/cli.py +++ b/src/cosmicqc/cli.py @@ -2,25 +2,92 @@ Setup coSMicQC CLI through python-fire """ +import inspect +import sys +import types +from typing import Optional + import fire +from fire import helptext, inspectutils, value_types +from fire.core import Display, FireError, _DictAsString, _OneLineResult +from fire.trace import FireTrace -from functools import wraps from . import analyze -def cli_df_to_string(func: object) -> object: - """ - See https://github.com/google/python-fire/issues/274 - for why we need this (for now) + +# referenced from https://github.com/google/python-fire/pull/446 +# to be removed after python-fire merges changes (uncertain of timeline) +def HasCustomRepr(component: object) -> bool: + """Reproduces above HasCustomStr function to determine if component has a + custom __repr__ method. + + ... + + Args: + component: The object to check for a custom __repr__ method. + Returns: + Whether `component` has a custom __repr__ method. """ - @wraps(func) - def wrapper(*args, **kwargs): - import sys - print(sys.argv) - if any("bin/cosmicqc" in path for path in sys.argv): - return str(func(*args, **kwargs)) - else: - return func(*args, **kwargs) - return wrapper + if hasattr(component, "__repr__"): + class_attrs = inspectutils.GetClassAttrsDict(type(component)) or {} + repr_attr = class_attrs.get("__repr__") + if repr_attr and repr_attr.defining_class is not object: + return True + return False + + +# referenced with modifications from https://github.com/google/python-fire/pull/446 +# to be removed after python-fire merges changes (uncertain of timeline) +# ruff: noqa: C901 +def _PrintResult( + component_trace: FireTrace, verbose: bool = False, serialize: Optional[bool] = None +) -> None: + """Prints the result of the Fire call to stdout in a human readable way.""" + # TODO(dbieber): Design human readable deserializable serialization method + # and move serialization to its own module. + result = component_trace.GetResult() + # Allow users to modify the return value of the component and provide + # custom formatting. + if serialize: + if not callable(serialize): + raise FireError( + "The argument `serialize` must be empty or callable:", serialize + ) + result = serialize(result) + if value_types.HasCustomStr(result): + # If the object has a custom __str__ method, rather than one inherited from + # object, then we use that to serialize the object. + print(str(result)) + return + + elif HasCustomRepr(result): + # Same as above, but for __repr__. + # For pandas.DataFrame, __str__ is inherited from object, but __repr__ has + # a custom implementation (see pandas.core.frame.DataFrame.__repr__) + print(str(result)) + return + + if isinstance(result, (list, set, frozenset, types.GeneratorType)): + for i in result: + print(_OneLineResult(i)) + elif inspect.isgeneratorfunction(result): + raise NotImplementedError + elif isinstance(result, dict) and value_types.IsSimpleGroup(result): + print(_DictAsString(result, verbose)) + elif isinstance(result, tuple): + print(_OneLineResult(result)) + elif isinstance(result, value_types.VALUE_TYPES): + if result is not None: + print(result) + else: + help_text = helptext.HelpText(result, trace=component_trace, verbose=verbose) + output = [help_text] + Display(output, out=sys.stdout) + + +# replace the _PrintResult function with a fix for pandas dataframes +fire.core._PrintResult = _PrintResult + def cli_analyze() -> None: """ @@ -30,10 +97,6 @@ def cli_analyze() -> None: within the analyze module. """ - for key, value in analyze.__dict__.items( ): - if not key.startswith('_') and hasattr(value, '__call__' ): - setattr(analyze, key, cli_df_to_string(value)) - fire.Fire(analyze) diff --git a/src/cosmicqc/utils.py b/src/cosmicqc/utils.py deleted file mode 100644 index 924b3e5..0000000 --- a/src/cosmicqc/utils.py +++ /dev/null @@ -1,39 +0,0 @@ -""" -Utility functions for coSMicQC -""" - -import sys -from typing import Optional - -from functools import wraps - -from .scdataframe import SCDataFrame - - -def print_if_cli(data: SCDataFrame) -> Optional[SCDataFrame]: - """ - Prints the data if the script is invoked from the cosmicqc command-line interface. - - Args: - data (SCDataFrame): - The data to print or return. - - Returns: - Optional[SCDataFrame]: - Returns the SCDataFrame if not run through CLI - otherwise prints the SCDataFrame and returns None. - """ - if any("bin/cosmicqc" in path for path in sys.argv): - print(data) - else: - return data - -def df_to_string(func): - """ - See https://github.com/google/python-fire/issues/274 - for why we need this (for now) - """ - @wraps(func) - def wrapper(*args, **kwargs): - return str(func(*args, **kwargs)) - return wrapper diff --git a/tests/test_cli.py b/tests/test_cli.py index 2952ba5..1513b74 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -24,11 +24,21 @@ def test_cli_identify_outliers(basic_outlier_csv: str): ) ) - print(stderr) - print(stdout) - assert returncode == 0 - assert "outlier_custom" in stdout or "outlier_custom" in stderr + assert ( + stdout.strip() + == """0 False +1 False +2 False +3 False +4 False +5 False +6 False +7 False +8 True +9 True +Name: Z_Score_example_feature, dtype: bool""".strip() + ) def test_cli_find_outliers(basic_outlier_csv: str): @@ -38,13 +48,22 @@ def test_cli_find_outliers(basic_outlier_csv: str): stdout, stderr, returncode = run_cli_command( ( - f"""cosmicqc find_outliers {basic_outlier_csv}""" - """ --metadata_columns '[\"col1\"]' --feature_thresholds '{"example_feature": 1.0}'""" + f"""cosmicqc find_outliers --df {basic_outlier_csv}""" + """ --metadata_columns [] --feature_thresholds {"example_feature":1.0}""" ) ) assert returncode == 0 - assert "outlier_custom" in stdout or "outlier_custom" in stderr + assert ( + stdout.strip() + == """Number of outliers: 2 +Outliers Range: +example_feature Min: 9 +example_feature Max: 10 + example_feature +8 9 +9 10""".strip() + ) def test_cli_label_outliers(basic_outlier_csv: str): @@ -54,10 +73,23 @@ def test_cli_label_outliers(basic_outlier_csv: str): stdout, stderr, returncode = run_cli_command( ( - f"""cosmicqc label_outliers {basic_outlier_csv}""" - """ --feature_thresholds '{"example_feature": 1.0}'""" + f"""cosmicqc label_outliers --df {basic_outlier_csv}""" + """ --feature_thresholds {"example_feature":1.0}""" ) ) assert returncode == 0 - assert "outlier_custom" in stdout or "outlier_custom" in stderr + assert ( + stdout.strip() + == """example_feature outlier_custom +0 1 False +1 2 False +2 3 False +3 4 False +4 5 False +5 6 False +6 7 False +7 8 False +8 9 True +9 10 True""".strip() + ) From d7519a8614703bbea51c7330881e361f42c24b7a Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 12 Jun 2024 09:08:10 -0600 Subject: [PATCH 17/40] add docstring to top of test --- tests/test_cli.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_cli.py b/tests/test_cli.py index 1513b74..f7cfaac 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,3 +1,7 @@ +""" +Tests cosmicqc cli module +""" + from .utils import run_cli_command From f8773b26de54a6ac306ae9ac1c1a220aa21ae427 Mon Sep 17 00:00:00 2001 From: d33bs Date: Fri, 14 Jun 2024 15:42:39 -0600 Subject: [PATCH 18/40] add csv.gz compatibility --- src/cosmicqc/scdataframe.py | 3 +++ tests/conftest.py | 15 +++++++++++++++ tests/test_qcdataframe.py | 10 ++++++++++ 3 files changed, 28 insertions(+) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index f94bdd2..a06b643 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -68,6 +68,9 @@ def __init__( if data_path.suffix == ".csv": # read as a CSV self.data = pd.read_csv(data, **kwargs) + elif data_path.suffixes == [".csv", ".gz"]: + # read as a CSV.GZ file + self.data = pd.read_csv(data, compression="gzip", **kwargs) elif data_path.suffix in (".tsv", ".txt"): # read as a TSV self.data = pd.read_csv(data, delimiter="\t", **kwargs) diff --git a/tests/conftest.py b/tests/conftest.py index e3fb24c..ed8591e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -41,6 +41,21 @@ def fixture_basic_outlier_csv( return csv_path +@pytest.fixture(name="basic_outlier_csv_gz") +def fixture_basic_outlier_csv_gz( + tmp_path: pathlib.Path, basic_outlier_dataframe: pd.DataFrame +): + """ + Creates basic example data csv for use in tests + """ + + basic_outlier_dataframe.to_csv( + csv_gz_path := tmp_path / "example.csv.gz", index=False, compression="gzip" + ) + + return csv_gz_path + + @pytest.fixture(name="basic_outlier_tsv") def fixture_basic_outlier_tsv( tmp_path: pathlib.Path, basic_outlier_dataframe: pd.DataFrame diff --git a/tests/test_qcdataframe.py b/tests/test_qcdataframe.py index d2ec019..75e7448 100644 --- a/tests/test_qcdataframe.py +++ b/tests/test_qcdataframe.py @@ -25,6 +25,16 @@ def test_SCDataFrame_init_with_csv(basic_outlier_csv: str): assert sc_df.equals(expected_df) +def test_SCDataFrame_init_with_csv_gz(basic_outlier_csv_gz: str): + """ + Tests SCDataFrame with CSV input. + """ + sc_df = SCDataFrame(data=basic_outlier_csv_gz) + expected_df = pd.read_csv(basic_outlier_csv_gz) + assert sc_df.data_source == basic_outlier_csv_gz + assert sc_df.equals(expected_df) + + def test_SCDataFrame_init_with_tsv(basic_outlier_tsv: str): """ Tests SCDataFrame with TSV input. From 0ad522f32b8008b2176a9f41a47f6754bde6f637 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 18 Jun 2024 09:17:31 -0600 Subject: [PATCH 19/40] add export capabilities --- src/cosmicqc/scdataframe.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index a06b643..730718a 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -81,6 +81,32 @@ def __init__( raise ValueError("Unsupported file format for SCDataFrame.") else: raise ValueError("Unsupported input type for SCDataFrame.") + + def export(self: Self_SCDataFrame, file_path: str, **kwargs: Dict[str, Any]) -> None: + """ + Exports the underlying pandas DataFrame to a file. + + Args: + file_path (str): The path where the DataFrame should be saved. + **kwargs: Additional keyword arguments to pass to the pandas to_* methods. + """ + + data_path = pathlib.Path(file_path) + + # export to csv + if data_path.suffix == ".csv": + self.data.to_csv(file_path, **kwargs) + + # export to tsv + elif data_path.suffix in (".tsv", ".txt"): + self.data.to_csv(file_path, sep='\t', **kwargs) + + # export to parquet + elif data_path.suffix == ".parquet": + self.data.to_parquet(file_path, **kwargs) + + else: + raise ValueError("Unsupported file format for export.") def __call__(self: Self_SCDataFrame) -> pd.DataFrame: """ @@ -100,10 +126,10 @@ def __repr__(self: Self_SCDataFrame) -> pd.DataFrame: """ return repr(self.data) - def __getattr__(self: Self_SCDataFrame, attr: str) -> Any: # noqa: ANN401 + def __getattr__(self, attr: str) -> Any: # noqa: ANN401 """ Intercept attribute accesses and delegate them to the underlying - pandas DataFrame. + pandas DataFrame, except for custom methods. Args: attr (str): The name of the attribute being accessed. @@ -111,6 +137,8 @@ def __getattr__(self: Self_SCDataFrame, attr: str) -> Any: # noqa: ANN401 Returns: Any: The value of the attribute from the pandas DataFrame. """ + if attr in self.__dict__: + return self.__dict__[attr] return getattr(self.data, attr) def __getitem__(self: Self_SCDataFrame, key: Union[int, str]) -> Any: # noqa: ANN401 From 6d33de4e8571bfb780fa64ce277814b56c84adbf Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 18 Jun 2024 09:18:00 -0600 Subject: [PATCH 20/40] rename file to correct module name --- tests/{test_qcdataframe.py => test_scdataframe.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_qcdataframe.py => test_scdataframe.py} (100%) diff --git a/tests/test_qcdataframe.py b/tests/test_scdataframe.py similarity index 100% rename from tests/test_qcdataframe.py rename to tests/test_scdataframe.py From 34cf9bfc796f084a205a0e74075c41f2236e8c81 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 18 Jun 2024 09:58:03 -0600 Subject: [PATCH 21/40] add export capabilities --- src/cosmicqc/scdataframe.py | 17 +++++----- tests/test_scdataframe.py | 62 ++++++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 14 deletions(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index 730718a..4a4acae 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -81,8 +81,10 @@ def __init__( raise ValueError("Unsupported file format for SCDataFrame.") else: raise ValueError("Unsupported input type for SCDataFrame.") - - def export(self: Self_SCDataFrame, file_path: str, **kwargs: Dict[str, Any]) -> None: + + def export( + self: Self_SCDataFrame, file_path: str, **kwargs: Dict[str, Any] + ) -> None: """ Exports the underlying pandas DataFrame to a file. @@ -94,17 +96,14 @@ def export(self: Self_SCDataFrame, file_path: str, **kwargs: Dict[str, Any]) -> data_path = pathlib.Path(file_path) # export to csv - if data_path.suffix == ".csv": + if ".csv" in data_path.suffixes: self.data.to_csv(file_path, **kwargs) - # export to tsv - elif data_path.suffix in (".tsv", ".txt"): - self.data.to_csv(file_path, sep='\t', **kwargs) - + elif any(elem in data_path.suffixes for elem in (".tsv", ".txt")): + self.data.to_csv(file_path, sep="\t", **kwargs) # export to parquet elif data_path.suffix == ".parquet": self.data.to_parquet(file_path, **kwargs) - else: raise ValueError("Unsupported file format for export.") @@ -126,7 +125,7 @@ def __repr__(self: Self_SCDataFrame) -> pd.DataFrame: """ return repr(self.data) - def __getattr__(self, attr: str) -> Any: # noqa: ANN401 + def __getattr__(self: Self_SCDataFrame, attr: str) -> Any: # noqa: ANN401 """ Intercept attribute accesses and delegate them to the underlying pandas DataFrame, except for custom methods. diff --git a/tests/test_scdataframe.py b/tests/test_scdataframe.py index 75e7448..78e5dd9 100644 --- a/tests/test_scdataframe.py +++ b/tests/test_scdataframe.py @@ -2,54 +2,106 @@ Tests cosmicqc SCDataFrame module """ +import pathlib + import pandas as pd from cosmicqc.scdataframe import SCDataFrame +from pyarrow import parquet -def test_SCDataFrame_init_with_dataframe(basic_outlier_dataframe: pd.DataFrame): +def test_SCDataFrame_with_dataframe( + tmp_path: pathlib.Path, basic_outlier_dataframe: pd.DataFrame +): """ Tests SCDataFrame with pd.DataFrame input. """ + sc_df = SCDataFrame(data=basic_outlier_dataframe) + + # test that we ingested the data properly assert sc_df.data_source == "pd.DataFrame" assert sc_df.equals(basic_outlier_dataframe) + # test export + basic_outlier_dataframe.to_parquet( + control_path := f"{tmp_path}/df_input_example.parquet" + ) + sc_df.export(test_path := f"{tmp_path}/df_input_example1.parquet") -def test_SCDataFrame_init_with_csv(basic_outlier_csv: str): + assert parquet.read_table(control_path).equals(parquet.read_table(test_path)) + + +def test_SCDataFrame_with_csv(tmp_path: pathlib.Path, basic_outlier_csv: str): """ Tests SCDataFrame with CSV input. """ + sc_df = SCDataFrame(data=basic_outlier_csv) expected_df = pd.read_csv(basic_outlier_csv) + + # test that we ingested the data properly assert sc_df.data_source == basic_outlier_csv assert sc_df.equals(expected_df) + # test export + sc_df.export(test_path := f"{tmp_path}/df_input_example.csv", index=False) + + pd.testing.assert_frame_equal(expected_df, pd.read_csv(test_path)) + -def test_SCDataFrame_init_with_csv_gz(basic_outlier_csv_gz: str): +def test_SCDataFrame_with_csv_gz(tmp_path: pathlib.Path, basic_outlier_csv_gz: str): """ Tests SCDataFrame with CSV input. """ + sc_df = SCDataFrame(data=basic_outlier_csv_gz) expected_df = pd.read_csv(basic_outlier_csv_gz) + + # test that we ingested the data properly assert sc_df.data_source == basic_outlier_csv_gz assert sc_df.equals(expected_df) + # test export + sc_df.export(test_path := f"{tmp_path}/df_input_example.csv.gz", index=False) -def test_SCDataFrame_init_with_tsv(basic_outlier_tsv: str): + pd.testing.assert_frame_equal( + expected_df, pd.read_csv(test_path, compression="gzip") + ) + + +def test_SCDataFrame_with_tsv(tmp_path: pathlib.Path, basic_outlier_tsv: str): """ Tests SCDataFrame with TSV input. """ + sc_df = SCDataFrame(data=basic_outlier_tsv) expected_df = pd.read_csv(basic_outlier_tsv, delimiter="\t") + + # test that we ingested the data properly assert sc_df.data_source == basic_outlier_tsv assert sc_df.equals(expected_df) + # test export + sc_df.export(test_path := f"{tmp_path}/df_input_example.tsv", index=False) + + pd.testing.assert_frame_equal(expected_df, pd.read_csv(test_path, sep="\t")) -def test_SCDataFrame_init_with_parquet(basic_outlier_parquet: str): + +def test_SCDataFrame_with_parquet(tmp_path: pathlib.Path, basic_outlier_parquet: str): """ Tests SCDataFrame with TSV input. """ + sc_df = SCDataFrame(data=basic_outlier_parquet) expected_df = pd.read_parquet(basic_outlier_parquet) + + # test that we ingested the data properly assert sc_df.data_source == basic_outlier_parquet assert sc_df.equals(expected_df) + + # test export + sc_df.export(test_path := f"{tmp_path}/df_input_example2.parquet") + + assert parquet.read_table(basic_outlier_parquet).equals( + parquet.read_table(test_path) + ) From 09d815c384d6e52ffe47c1ce97bc0349fb420b7f Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 18 Jun 2024 15:23:07 -0600 Subject: [PATCH 22/40] add output capabilities --- src/cosmicqc/analyze.py | 37 +++++++++++++++++++++++++++++++++---- src/cosmicqc/cli.py | 2 +- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index 08649f9..da43fcc 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -23,6 +23,7 @@ def identify_outliers( feature_thresholds: Union[Dict[str, float], str], feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, include_threshold_scores: bool = False, + export_path: Optional[str] = None, ) -> Union[pd.Series, pd.DataFrame]: """ This function uses z-scoring to format the data for detecting outlier @@ -48,6 +49,9 @@ def identify_outliers( include_threshold_scores: bool Whether to include the threshold scores in addition to whether the threshold set passes per row. + export_path: Optional[str] = None + An optional path to export the data using SCDataFrame export + capabilities. If None no export is performed. Returns: Union[pd.Series, pd.DataFrame]: @@ -96,7 +100,7 @@ def identify_outliers( condition = outlier_df[zscore_columns[feature]] < threshold conditions.append(condition) - return ( + result = ( # create a boolean pd.series identifier for dataframe # based on all conditions for use within other functions. reduce(operator.and_, conditions) @@ -112,12 +116,18 @@ def identify_outliers( ) ) + if export_path is not None: + SCDataFrame(data=result).export(file_path=export_path) + + return result + def find_outliers( df: Union[SCDataFrame, pd.DataFrame, str], metadata_columns: List[str], feature_thresholds: Union[Dict[str, float], str], feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, + export_path: Optional[str] = None, ) -> pd.DataFrame: """ This function uses identify_outliers to return a dataframe @@ -139,6 +149,9 @@ def find_outliers( feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, An optional feature thresholds file where thresholds may be defined within a file. + export_path: Optional[str] = None + An optional path to export the data using SCDataFrame export + capabilities. If None no export is performed. Returns: pd.DataFrame: @@ -175,8 +188,14 @@ def find_outliers( # Include metadata columns in the output DataFrame columns_to_include = list(feature_thresholds.keys()) + metadata_columns + result = outliers_df[columns_to_include] + + # export the file if specified + if export_path is not None: + SCDataFrame(data=result).export(file_path=export_path) + # Return outliers DataFrame with specified columns - return outliers_df[columns_to_include] + return result def label_outliers( @@ -184,6 +203,7 @@ def label_outliers( feature_thresholds: Optional[Union[Dict[str, float], str]] = None, feature_thresholds_file: Optional[str] = DEFAULT_QC_THRESHOLD_FILE, include_threshold_scores: bool = False, + export_path: Optional[str] = None, ) -> pd.DataFrame: """ Use identify_outliers to label the original dataset for @@ -206,6 +226,9 @@ def label_outliers( include_threshold_scores: bool = False Whether to include the scores in addition to whether an outlier was detected or not. + export_path: Optional[str] = None + An optional path to export the data using SCDataFrame export + capabilities. If None no export is performed. Returns: pd.DataFrame: @@ -225,7 +248,7 @@ def label_outliers( feature_thresholds_file=feature_thresholds_file, include_threshold_scores=include_threshold_scores, ) - return pd.concat( + result = pd.concat( [ df, ( @@ -266,7 +289,13 @@ def label_outliers( axis=1, ) # return a dataframe with a deduplicated columns by name - return labeled_df.loc[:, ~labeled_df.columns.duplicated()] + result = labeled_df.loc[:, ~labeled_df.columns.duplicated()] + + # export the file if specified + if export_path is not None: + SCDataFrame(data=result).export(file_path=export_path) + + return result def read_thresholds_set_from_file( diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py index fb7ebcf..b290ef1 100644 --- a/src/cosmicqc/cli.py +++ b/src/cosmicqc/cli.py @@ -108,4 +108,4 @@ def cli_analyze() -> None: and label_outliers from the command line interface through analyze """ - fire.Fire(analyze) + cli_analyze() From fbedb7dfa5302d1ae5dd436cc052ebc6ede116b8 Mon Sep 17 00:00:00 2001 From: Dave Bunten Date: Tue, 18 Jun 2024 16:53:09 -0600 Subject: [PATCH 23/40] Apply suggestions from code review Co-authored-by: Gregory Way --- src/cosmicqc/scdataframe.py | 6 +++--- tests/conftest.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index 4a4acae..d364764 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -31,9 +31,9 @@ class SCDataFrame: __call__(): Returns the underlying pandas DataFrame. __repr__(): - Returns representation of underlying pandas DataFrame. + Returns a representation of the underlying pandas DataFrame. __getattr__(): - Returns underlying attributes of pandas DataFrame. + Returns the underlying attributes of the pandas DataFrame. __getitem__(): Returns slice of data from pandas DataFrame. """ @@ -118,7 +118,7 @@ def __call__(self: Self_SCDataFrame) -> pd.DataFrame: def __repr__(self: Self_SCDataFrame) -> pd.DataFrame: """ - Returns the representation of underlying pandas DataFrame. + Returns the representation of the underlying pandas DataFrame. Returns: pd.DataFrame: The data in a pandas DataFrame. diff --git a/tests/conftest.py b/tests/conftest.py index ed8591e..bc2883d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -36,7 +36,7 @@ def fixture_basic_outlier_csv( Creates basic example data csv for use in tests """ - basic_outlier_dataframe.to_csv(csv_path := tmp_path / "example.csv", index=False) + basic_outlier_dataframe.to_csv(csv_path := tmp_path / "basic_example.csv", index=False) return csv_path From 025701497204e9bae941a5ccdf1b4123741c3290 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 18 Jun 2024 17:00:49 -0600 Subject: [PATCH 24/40] update tests and docs --- pyproject.toml | 2 +- src/cosmicqc/scdataframe.py | 20 +++++++++----------- tests/conftest.py | 4 +++- tests/test_scdataframe.py | 15 ++++++--------- 4 files changed, 19 insertions(+), 22 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f3231ed..67af76e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ select = [ # Ignore `E402` and `F401` (unused imports) in all `__init__.py` files "__init__.py" = ["E402", "F401"] # ignore typing rules for tests -"tests/*" = ["ANN201"] +"tests/*" = ["ANN201", "PLR0913"] # set dynamic versioning capabilities for project [tool.poetry-dynamic-versioning] diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index d364764..17dd0ce 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -31,7 +31,7 @@ class SCDataFrame: __call__(): Returns the underlying pandas DataFrame. __repr__(): - Returns a representation of the underlying pandas DataFrame. + Returns a representational string of the underlying pandas DataFrame. __getattr__(): Returns the underlying attributes of the pandas DataFrame. __getitem__(): @@ -53,10 +53,10 @@ def __init__( if isinstance(data, pd.DataFrame): # if data is a pd.DataFrame, remember this within the data_source attr - self.data_source = "pd.DataFrame" + self.data_source = "pandas.DataFrame" self.data = data - elif isinstance(data, pathlib.Path) or isinstance(data, str): # noqa: PLR1701, SIM101 + elif isinstance(data, pathlib.Path | str): # if the data is a string, remember the original source # through a data_source attr self.data_source = data @@ -65,15 +65,13 @@ def __init__( data_path = pathlib.Path(data) # Read the data from the file based on its extension - if data_path.suffix == ".csv": - # read as a CSV + if ( + data_path.suffix == ".csv" + or data_path.suffix in (".tsv", ".txt") + or data_path.suffixes == [".csv", ".gz"] + ): + # read as a CSV, CSV.GZ, .TSV, or .TXT file self.data = pd.read_csv(data, **kwargs) - elif data_path.suffixes == [".csv", ".gz"]: - # read as a CSV.GZ file - self.data = pd.read_csv(data, compression="gzip", **kwargs) - elif data_path.suffix in (".tsv", ".txt"): - # read as a TSV - self.data = pd.read_csv(data, delimiter="\t", **kwargs) elif data_path.suffix == ".parquet": # read as a Parquet file self.data = pd.read_parquet(data, **kwargs) diff --git a/tests/conftest.py b/tests/conftest.py index bc2883d..8f97176 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -36,7 +36,9 @@ def fixture_basic_outlier_csv( Creates basic example data csv for use in tests """ - basic_outlier_dataframe.to_csv(csv_path := tmp_path / "basic_example.csv", index=False) + basic_outlier_dataframe.to_csv( + csv_path := tmp_path / "basic_example.csv", index=False + ) return csv_path diff --git a/tests/test_scdataframe.py b/tests/test_scdataframe.py index 78e5dd9..713fc97 100644 --- a/tests/test_scdataframe.py +++ b/tests/test_scdataframe.py @@ -10,7 +10,12 @@ def test_SCDataFrame_with_dataframe( - tmp_path: pathlib.Path, basic_outlier_dataframe: pd.DataFrame + tmp_path: pathlib.Path, + basic_outlier_dataframe: pd.DataFrame, + basic_outlier_csv: str, + basic_outlier_csv_gz: str, + basic_outlier_tsv: str, + basic_outlier_parquet: str, ): """ Tests SCDataFrame with pd.DataFrame input. @@ -30,8 +35,6 @@ def test_SCDataFrame_with_dataframe( assert parquet.read_table(control_path).equals(parquet.read_table(test_path)) - -def test_SCDataFrame_with_csv(tmp_path: pathlib.Path, basic_outlier_csv: str): """ Tests SCDataFrame with CSV input. """ @@ -48,8 +51,6 @@ def test_SCDataFrame_with_csv(tmp_path: pathlib.Path, basic_outlier_csv: str): pd.testing.assert_frame_equal(expected_df, pd.read_csv(test_path)) - -def test_SCDataFrame_with_csv_gz(tmp_path: pathlib.Path, basic_outlier_csv_gz: str): """ Tests SCDataFrame with CSV input. """ @@ -68,8 +69,6 @@ def test_SCDataFrame_with_csv_gz(tmp_path: pathlib.Path, basic_outlier_csv_gz: s expected_df, pd.read_csv(test_path, compression="gzip") ) - -def test_SCDataFrame_with_tsv(tmp_path: pathlib.Path, basic_outlier_tsv: str): """ Tests SCDataFrame with TSV input. """ @@ -86,8 +85,6 @@ def test_SCDataFrame_with_tsv(tmp_path: pathlib.Path, basic_outlier_tsv: str): pd.testing.assert_frame_equal(expected_df, pd.read_csv(test_path, sep="\t")) - -def test_SCDataFrame_with_parquet(tmp_path: pathlib.Path, basic_outlier_parquet: str): """ Tests SCDataFrame with TSV input. """ From 985a6dda77d5d57fe894f262ba1aa6e3538eb6a6 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 19 Jun 2024 08:31:14 -0600 Subject: [PATCH 25/40] fix tests --- pyproject.toml | 2 +- src/cosmicqc/scdataframe.py | 6 ++++-- tests/test_scdataframe.py | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 67af76e..5adde66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ profile = "black" exclude_dirs = ["tests"] [tool.ruff] -target-version = "py311" +target-version = "py38" line-length = 88 fix = true diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index 17dd0ce..d96cbee 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -39,7 +39,9 @@ class SCDataFrame: """ def __init__( - self: Self_SCDataFrame, data: Union[pd.DataFrame, str], **kwargs: Dict[str, Any] + self: Self_SCDataFrame, + data: Union[pd.DataFrame, str, pathlib.Path], + **kwargs: Dict[str, Any], ) -> None: """ Initializes the SCDataFrame with either a DataFrame or a file path. @@ -56,7 +58,7 @@ def __init__( self.data_source = "pandas.DataFrame" self.data = data - elif isinstance(data, pathlib.Path | str): + elif isinstance(data, (pathlib.Path, str)): # if the data is a string, remember the original source # through a data_source attr self.data_source = data diff --git a/tests/test_scdataframe.py b/tests/test_scdataframe.py index 713fc97..bcb520c 100644 --- a/tests/test_scdataframe.py +++ b/tests/test_scdataframe.py @@ -24,7 +24,7 @@ def test_SCDataFrame_with_dataframe( sc_df = SCDataFrame(data=basic_outlier_dataframe) # test that we ingested the data properly - assert sc_df.data_source == "pd.DataFrame" + assert sc_df.data_source == "pandas.DataFrame" assert sc_df.equals(basic_outlier_dataframe) # test export From 5034a0727473fd382944761e4ccce8754d628795 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 19 Jun 2024 11:01:36 -0600 Subject: [PATCH 26/40] update tests; add constructor path for scdataframe --- src/cosmicqc/analyze.py | 9 +++------ src/cosmicqc/scdataframe.py | 25 +++++++++++++++---------- tests/test_scdataframe.py | 31 +++++++++++-------------------- 3 files changed, 29 insertions(+), 36 deletions(-) diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index d616f78..abe43d6 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -55,8 +55,7 @@ def identify_outliers( """ # interpret the df as SCDataFrame - if not isinstance(df, SCDataFrame): - df = SCDataFrame(data=df) + df = SCDataFrame(data=df) # create a copy of the dataframe to ensure # we don't modify the supplied dataframe inplace. @@ -145,8 +144,7 @@ def find_outliers( """ # interpret the df as SCDataFrame - if not isinstance(df, SCDataFrame): - df = SCDataFrame(data=df) + df = SCDataFrame(data=df) if isinstance(feature_thresholds, str): feature_thresholds = read_thresholds_set_from_file( @@ -212,8 +210,7 @@ def label_outliers( """ # interpret the df as SCDataFrame - if not isinstance(df, SCDataFrame): - df = SCDataFrame(data=df) + df = SCDataFrame(data=df) # for single outlier processing if isinstance(feature_thresholds, (str, dict)): diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index d96cbee..bbe29db 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -9,7 +9,7 @@ # provide backwards compatibility for Self type in earlier Python versions. # see: https://peps.python.org/pep-0484/#annotating-instance-and-class-methods -Self_SCDataFrame = TypeVar("Self_SCDataFrame", bound="SCDataFrame") +SCDataFrame_type = TypeVar("SCDataFrame_type", bound="SCDataFrame") class SCDataFrame: @@ -39,8 +39,8 @@ class SCDataFrame: """ def __init__( - self: Self_SCDataFrame, - data: Union[pd.DataFrame, str, pathlib.Path], + self: SCDataFrame_type, + data: Union[SCDataFrame_type, pd.DataFrame, str, pathlib.Path], **kwargs: Dict[str, Any], ) -> None: """ @@ -53,7 +53,12 @@ def __init__( Additional keyword arguments to pass to the pandas read functions. """ - if isinstance(data, pd.DataFrame): + if isinstance(data, SCDataFrame): + # if data is an instance of SCDataFrame, use its data_source and data + self.data_source = data.data_source + self.data = data.data + + elif isinstance(data, pd.DataFrame): # if data is a pd.DataFrame, remember this within the data_source attr self.data_source = "pandas.DataFrame" self.data = data @@ -80,10 +85,10 @@ def __init__( else: raise ValueError("Unsupported file format for SCDataFrame.") else: - raise ValueError("Unsupported input type for SCDataFrame.") + raise ValueError("Unsupported data type for SCDataFrame.") def export( - self: Self_SCDataFrame, file_path: str, **kwargs: Dict[str, Any] + self: SCDataFrame_type, file_path: str, **kwargs: Dict[str, Any] ) -> None: """ Exports the underlying pandas DataFrame to a file. @@ -107,7 +112,7 @@ def export( else: raise ValueError("Unsupported file format for export.") - def __call__(self: Self_SCDataFrame) -> pd.DataFrame: + def __call__(self: SCDataFrame_type) -> pd.DataFrame: """ Returns the underlying pandas DataFrame. @@ -116,7 +121,7 @@ def __call__(self: Self_SCDataFrame) -> pd.DataFrame: """ return self.data - def __repr__(self: Self_SCDataFrame) -> pd.DataFrame: + def __repr__(self: SCDataFrame_type) -> pd.DataFrame: """ Returns the representation of the underlying pandas DataFrame. @@ -125,7 +130,7 @@ def __repr__(self: Self_SCDataFrame) -> pd.DataFrame: """ return repr(self.data) - def __getattr__(self: Self_SCDataFrame, attr: str) -> Any: # noqa: ANN401 + def __getattr__(self: SCDataFrame_type, attr: str) -> Any: # noqa: ANN401 """ Intercept attribute accesses and delegate them to the underlying pandas DataFrame, except for custom methods. @@ -140,7 +145,7 @@ def __getattr__(self: Self_SCDataFrame, attr: str) -> Any: # noqa: ANN401 return self.__dict__[attr] return getattr(self.data, attr) - def __getitem__(self: Self_SCDataFrame, key: Union[int, str]) -> Any: # noqa: ANN401 + def __getitem__(self: SCDataFrame_type, key: Union[int, str]) -> Any: # noqa: ANN401 """ Returns an element or a slice of the underlying pandas DataFrame. diff --git a/tests/test_scdataframe.py b/tests/test_scdataframe.py index bcb520c..2627b88 100644 --- a/tests/test_scdataframe.py +++ b/tests/test_scdataframe.py @@ -17,10 +17,8 @@ def test_SCDataFrame_with_dataframe( basic_outlier_tsv: str, basic_outlier_parquet: str, ): - """ - Tests SCDataFrame with pd.DataFrame input. - """ - + + # Tests SCDataFrame with pd.DataFrame input. sc_df = SCDataFrame(data=basic_outlier_dataframe) # test that we ingested the data properly @@ -35,10 +33,7 @@ def test_SCDataFrame_with_dataframe( assert parquet.read_table(control_path).equals(parquet.read_table(test_path)) - """ - Tests SCDataFrame with CSV input. - """ - + # Tests SCDataFrame with CSV input. sc_df = SCDataFrame(data=basic_outlier_csv) expected_df = pd.read_csv(basic_outlier_csv) @@ -51,10 +46,7 @@ def test_SCDataFrame_with_dataframe( pd.testing.assert_frame_equal(expected_df, pd.read_csv(test_path)) - """ - Tests SCDataFrame with CSV input. - """ - + # Tests SCDataFrame with CSV input. sc_df = SCDataFrame(data=basic_outlier_csv_gz) expected_df = pd.read_csv(basic_outlier_csv_gz) @@ -69,10 +61,7 @@ def test_SCDataFrame_with_dataframe( expected_df, pd.read_csv(test_path, compression="gzip") ) - """ - Tests SCDataFrame with TSV input. - """ - + # Tests SCDataFrame with TSV input. sc_df = SCDataFrame(data=basic_outlier_tsv) expected_df = pd.read_csv(basic_outlier_tsv, delimiter="\t") @@ -85,10 +74,7 @@ def test_SCDataFrame_with_dataframe( pd.testing.assert_frame_equal(expected_df, pd.read_csv(test_path, sep="\t")) - """ - Tests SCDataFrame with TSV input. - """ - + # Tests SCDataFrame with parquet input. sc_df = SCDataFrame(data=basic_outlier_parquet) expected_df = pd.read_parquet(basic_outlier_parquet) @@ -102,3 +88,8 @@ def test_SCDataFrame_with_dataframe( assert parquet.read_table(basic_outlier_parquet).equals( parquet.read_table(test_path) ) + + # test SCDataFrame with SCDataFrame input + copy_sc_df = SCDataFrame(data=sc_df) + + pd.testing.assert_frame_equal(copy_sc_df.data, sc_df.data) From fd818685c9b3d4ae5dea88d7ca3a40a5e1c3615f Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 19 Jun 2024 11:01:48 -0600 Subject: [PATCH 27/40] linting --- tests/test_scdataframe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_scdataframe.py b/tests/test_scdataframe.py index 2627b88..2781c8c 100644 --- a/tests/test_scdataframe.py +++ b/tests/test_scdataframe.py @@ -17,7 +17,6 @@ def test_SCDataFrame_with_dataframe( basic_outlier_tsv: str, basic_outlier_parquet: str, ): - # Tests SCDataFrame with pd.DataFrame input. sc_df = SCDataFrame(data=basic_outlier_dataframe) From 3bab60f581e93c23360bfa2506c014f7dc87d201 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 19 Jun 2024 11:34:41 -0600 Subject: [PATCH 28/40] modify tests --- src/cosmicqc/scdataframe.py | 5 +++++ tests/test_cli.py | 12 ++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index bbe29db..9ec6bf7 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -62,6 +62,11 @@ def __init__( # if data is a pd.DataFrame, remember this within the data_source attr self.data_source = "pandas.DataFrame" self.data = data + + elif isinstance(data, pd.Series): + # if data is a pd.DataFrame, remember this within the data_source attr + self.data_source = "pandas.Series" + self.data = pd.DataFrame(data) elif isinstance(data, (pathlib.Path, str)): # if the data is a string, remember the original source diff --git a/tests/test_cli.py b/tests/test_cli.py index f7cfaac..f3ebf7f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,7 +3,8 @@ """ from .utils import run_cli_command - +import pathlib +from pyarrow import parquet def test_cli_util(): """ @@ -16,7 +17,7 @@ def test_cli_util(): assert returncode == 0 -def test_cli_identify_outliers(basic_outlier_csv: str): +def test_cli_identify_outliers(tmp_path: pathlib.Path, basic_outlier_csv: str): """ Test the `identify_outliers` function of the CLI. """ @@ -25,9 +26,12 @@ def test_cli_identify_outliers(basic_outlier_csv: str): ( f"""cosmicqc identify_outliers --df {basic_outlier_csv}""" """ --feature_thresholds {"example_feature":1.0}""" + f" --export_path {tmp_path}/identify_outliers_output.parquet" + ) ) + print(stderr) assert returncode == 0 assert ( stdout.strip() @@ -44,6 +48,10 @@ def test_cli_identify_outliers(basic_outlier_csv: str): Name: Z_Score_example_feature, dtype: bool""".strip() ) + + print(parquet.read_table(f"{tmp_path}/identify_outliers_output.parquet").to_pydict()) + assert parquet.read_table(f"{tmp_path}/identify_outliers_output.parquet").to_pydict() == {} + def test_cli_find_outliers(basic_outlier_csv: str): """ From 6d61bf3d0cec43780dea125c14bae4a9f27e8724 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 19 Jun 2024 11:39:05 -0600 Subject: [PATCH 29/40] enable pd.series compatibility --- src/cosmicqc/scdataframe.py | 6 ++++++ tests/test_scdataframe.py | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index bbe29db..50eb0f8 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -58,6 +58,12 @@ def __init__( self.data_source = data.data_source self.data = data.data + elif isinstance(data, pd.Series): + # if data is a pd.Series, remember this within the data_source attr + self.data_source = "pandas.Series" + # also cast the series to a dataframe + self.data = pd.DataFrame(data) + elif isinstance(data, pd.DataFrame): # if data is a pd.DataFrame, remember this within the data_source attr self.data_source = "pandas.DataFrame" diff --git a/tests/test_scdataframe.py b/tests/test_scdataframe.py index 2781c8c..2ddcb03 100644 --- a/tests/test_scdataframe.py +++ b/tests/test_scdataframe.py @@ -32,6 +32,13 @@ def test_SCDataFrame_with_dataframe( assert parquet.read_table(control_path).equals(parquet.read_table(test_path)) + # Tests SCDataFrame with pd.Series input. + sc_df = SCDataFrame(data=basic_outlier_dataframe.loc[0]) + + # test that we ingested the data properly + assert sc_df.data_source == "pandas.Series" + assert sc_df.equals(pd.DataFrame(basic_outlier_dataframe.loc[0])) + # Tests SCDataFrame with CSV input. sc_df = SCDataFrame(data=basic_outlier_csv) expected_df = pd.read_csv(basic_outlier_csv) From 90c2088d65c58ff10adfa24f5238a6874ea38104 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 19 Jun 2024 13:25:19 -0600 Subject: [PATCH 30/40] update for exports via cli --- src/cosmicqc/scdataframe.py | 2 +- tests/test_cli.py | 57 +++++++++++++++++++++++++++++++------ 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index 27de981..7030745 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -68,7 +68,7 @@ def __init__( # if data is a pd.DataFrame, remember this within the data_source attr self.data_source = "pandas.DataFrame" self.data = data - + elif isinstance(data, pd.Series): # if data is a pd.DataFrame, remember this within the data_source attr self.data_source = "pandas.Series" diff --git a/tests/test_cli.py b/tests/test_cli.py index f3ebf7f..95b1151 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2,10 +2,13 @@ Tests cosmicqc cli module """ -from .utils import run_cli_command import pathlib + from pyarrow import parquet +from .utils import run_cli_command + + def test_cli_util(): """ Test the `identify_outliers` function of the CLI. @@ -27,7 +30,6 @@ def test_cli_identify_outliers(tmp_path: pathlib.Path, basic_outlier_csv: str): f"""cosmicqc identify_outliers --df {basic_outlier_csv}""" """ --feature_thresholds {"example_feature":1.0}""" f" --export_path {tmp_path}/identify_outliers_output.parquet" - ) ) @@ -48,12 +50,25 @@ def test_cli_identify_outliers(tmp_path: pathlib.Path, basic_outlier_csv: str): Name: Z_Score_example_feature, dtype: bool""".strip() ) - - print(parquet.read_table(f"{tmp_path}/identify_outliers_output.parquet").to_pydict()) - assert parquet.read_table(f"{tmp_path}/identify_outliers_output.parquet").to_pydict() == {} - - -def test_cli_find_outliers(basic_outlier_csv: str): + assert parquet.read_table( + f"{tmp_path}/identify_outliers_output.parquet" + ).to_pydict() == { + "Z_Score_example_feature": [ + False, + False, + False, + False, + False, + False, + False, + False, + True, + True, + ] + } + + +def test_cli_find_outliers(tmp_path: pathlib.Path, basic_outlier_csv: str): """ Test the `find_outliers` function of the CLI. """ @@ -62,6 +77,7 @@ def test_cli_find_outliers(basic_outlier_csv: str): ( f"""cosmicqc find_outliers --df {basic_outlier_csv}""" """ --metadata_columns [] --feature_thresholds {"example_feature":1.0}""" + f" --export_path {tmp_path}/find_outliers_output.parquet" ) ) @@ -77,8 +93,12 @@ def test_cli_find_outliers(basic_outlier_csv: str): 9 10""".strip() ) + assert parquet.read_table( + f"{tmp_path}/find_outliers_output.parquet" + ).to_pydict() == {"example_feature": [9, 10], "__index_level_0__": [8, 9]} -def test_cli_label_outliers(basic_outlier_csv: str): + +def test_cli_label_outliers(tmp_path: pathlib.Path, basic_outlier_csv: str): """ Test the `label_outliers` function of the CLI. """ @@ -87,6 +107,7 @@ def test_cli_label_outliers(basic_outlier_csv: str): ( f"""cosmicqc label_outliers --df {basic_outlier_csv}""" """ --feature_thresholds {"example_feature":1.0}""" + f" --export_path {tmp_path}/label_outliers_output.parquet" ) ) @@ -105,3 +126,21 @@ def test_cli_label_outliers(basic_outlier_csv: str): 8 9 True 9 10 True""".strip() ) + + assert parquet.read_table( + f"{tmp_path}/label_outliers_output.parquet" + ).to_pydict() == { + "example_feature": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + "outlier_custom": [ + False, + False, + False, + False, + False, + False, + False, + False, + True, + True, + ], + } From c966ce872638d7905f9ccf955ee98238c5d96dfe Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 25 Jun 2024 13:36:44 -0600 Subject: [PATCH 31/40] fix docstring --- src/cosmicqc/scdataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index 799afab..32f60a3 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -142,7 +142,7 @@ def __repr__(self: SCDataFrame_type) -> str: Returns the representation of the underlying pandas DataFrame. Returns: - pd.DataFrame: The data in a pandas DataFrame. + str: The string-based representation of a pandas DataFrame. """ return repr(self.data) From ac734ca47ddefc2582c079f05931907a7fa395f3 Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 25 Jun 2024 13:41:28 -0600 Subject: [PATCH 32/40] add return types for test util --- tests/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index c3abfc3..a3d6373 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -3,9 +3,10 @@ """ import subprocess +from typing import Tuple -def run_cli_command(command: str): +def run_cli_command(command: str) -> Tuple[str, str, int]: """ Run a CLI command using subprocess and capture the output and return code. @@ -13,7 +14,7 @@ def run_cli_command(command: str): command (list): The command to run as a list of strings. Returns: - tuple: (stdout, stderr, returncode) + tuple: (str: stdout, str: stderr, int: returncode) """ result = subprocess.run( From 812b171a22e84c079d15a722daf8ea8f3f40d0a8 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 26 Jun 2024 15:23:38 -0600 Subject: [PATCH 33/40] fix deps --- poetry.lock | 171 ++++++++++++++----------------------------------- pyproject.toml | 1 - 2 files changed, 47 insertions(+), 125 deletions(-) diff --git a/poetry.lock b/poetry.lock index 593a1ee..d12396d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -50,92 +50,6 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "jinja2" -version = "3.1.4" -description = "A very fast and expressive template engine." -optional = false -python-versions = ">=3.7" -files = [ - {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, - {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"}, -] - -[package.dependencies] -MarkupSafe = ">=2.0" - -[package.extras] -i18n = ["Babel (>=2.7)"] - -[[package]] -name = "markupsafe" -version = "2.1.5" -description = "Safely add untrusted strings to HTML/XML markup." -optional = false -python-versions = ">=3.7" -files = [ - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, - {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, - {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, - {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, - {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, - {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, - {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, - {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, -] - [[package]] name = "numpy" version = "1.24.4" @@ -175,47 +89,56 @@ files = [ [[package]] name = "numpy" -version = "1.26.3" +version = "2.0.0" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "numpy-1.26.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:806dd64230dbbfaca8a27faa64e2f414bf1c6622ab78cc4264f7f5f028fee3bf"}, - {file = "numpy-1.26.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02f98011ba4ab17f46f80f7f8f1c291ee7d855fcef0a5a98db80767a468c85cd"}, - {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d45b3ec2faed4baca41c76617fcdcfa4f684ff7a151ce6fc78ad3b6e85af0a6"}, - {file = "numpy-1.26.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd2b45bf079d9ad90377048e2747a0c82351989a2165821f0c96831b4a2a54b"}, - {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:211ddd1e94817ed2d175b60b6374120244a4dd2287f4ece45d49228b4d529178"}, - {file = "numpy-1.26.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b1240f767f69d7c4c8a29adde2310b871153df9b26b5cb2b54a561ac85146485"}, - {file = "numpy-1.26.3-cp310-cp310-win32.whl", hash = "sha256:21a9484e75ad018974a2fdaa216524d64ed4212e418e0a551a2d83403b0531d3"}, - {file = "numpy-1.26.3-cp310-cp310-win_amd64.whl", hash = "sha256:9e1591f6ae98bcfac2a4bbf9221c0b92ab49762228f38287f6eeb5f3f55905ce"}, - {file = "numpy-1.26.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b831295e5472954104ecb46cd98c08b98b49c69fdb7040483aff799a755a7374"}, - {file = "numpy-1.26.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e87562b91f68dd8b1c39149d0323b42e0082db7ddb8e934ab4c292094d575d6"}, - {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c66d6fec467e8c0f975818c1796d25c53521124b7cfb760114be0abad53a0a2"}, - {file = "numpy-1.26.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25e2811a9c932e43943a2615e65fc487a0b6b49218899e62e426e7f0a57eeda"}, - {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:af36e0aa45e25c9f57bf684b1175e59ea05d9a7d3e8e87b7ae1a1da246f2767e"}, - {file = "numpy-1.26.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:51c7f1b344f302067b02e0f5b5d2daa9ed4a721cf49f070280ac202738ea7f00"}, - {file = "numpy-1.26.3-cp311-cp311-win32.whl", hash = "sha256:7ca4f24341df071877849eb2034948459ce3a07915c2734f1abb4018d9c49d7b"}, - {file = "numpy-1.26.3-cp311-cp311-win_amd64.whl", hash = "sha256:39763aee6dfdd4878032361b30b2b12593fb445ddb66bbac802e2113eb8a6ac4"}, - {file = "numpy-1.26.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a7081fd19a6d573e1a05e600c82a1c421011db7935ed0d5c483e9dd96b99cf13"}, - {file = "numpy-1.26.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12c70ac274b32bc00c7f61b515126c9205323703abb99cd41836e8125ea0043e"}, - {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f784e13e598e9594750b2ef6729bcd5a47f6cfe4a12cca13def35e06d8163e3"}, - {file = "numpy-1.26.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f24750ef94d56ce6e33e4019a8a4d68cfdb1ef661a52cdaee628a56d2437419"}, - {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:77810ef29e0fb1d289d225cabb9ee6cf4d11978a00bb99f7f8ec2132a84e0166"}, - {file = "numpy-1.26.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8ed07a90f5450d99dad60d3799f9c03c6566709bd53b497eb9ccad9a55867f36"}, - {file = "numpy-1.26.3-cp312-cp312-win32.whl", hash = "sha256:f73497e8c38295aaa4741bdfa4fda1a5aedda5473074369eca10626835445511"}, - {file = "numpy-1.26.3-cp312-cp312-win_amd64.whl", hash = "sha256:da4b0c6c699a0ad73c810736303f7fbae483bcb012e38d7eb06a5e3b432c981b"}, - {file = "numpy-1.26.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1666f634cb3c80ccbd77ec97bc17337718f56d6658acf5d3b906ca03e90ce87f"}, - {file = "numpy-1.26.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18c3319a7d39b2c6a9e3bb75aab2304ab79a811ac0168a671a62e6346c29b03f"}, - {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b7e807d6888da0db6e7e75838444d62495e2b588b99e90dd80c3459594e857b"}, - {file = "numpy-1.26.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4d362e17bcb0011738c2d83e0a65ea8ce627057b2fdda37678f4374a382a137"}, - {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b8c275f0ae90069496068c714387b4a0eba5d531aace269559ff2b43655edd58"}, - {file = "numpy-1.26.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cc0743f0302b94f397a4a65a660d4cd24267439eb16493fb3caad2e4389bccbb"}, - {file = "numpy-1.26.3-cp39-cp39-win32.whl", hash = "sha256:9bc6d1a7f8cedd519c4b7b1156d98e051b726bf160715b769106661d567b3f03"}, - {file = "numpy-1.26.3-cp39-cp39-win_amd64.whl", hash = "sha256:867e3644e208c8922a3be26fc6bbf112a035f50f0a86497f98f228c50c607bb2"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3c67423b3703f8fbd90f5adaa37f85b5794d3366948efe9a5190a5f3a83fc34e"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46f47ee566d98849323f01b349d58f2557f02167ee301e5e28809a8c0e27a2d0"}, - {file = "numpy-1.26.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a8474703bffc65ca15853d5fd4d06b18138ae90c17c8d12169968e998e448bb5"}, - {file = "numpy-1.26.3.tar.gz", hash = "sha256:697df43e2b6310ecc9d95f05d5ef20eacc09c7c4ecc9da3f235d39e71b7da1e4"}, + {file = "numpy-2.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f"}, + {file = "numpy-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2"}, + {file = "numpy-2.0.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238"}, + {file = "numpy-2.0.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514"}, + {file = "numpy-2.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196"}, + {file = "numpy-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1"}, + {file = "numpy-2.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc"}, + {file = "numpy-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787"}, + {file = "numpy-2.0.0-cp310-cp310-win32.whl", hash = "sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98"}, + {file = "numpy-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b"}, + {file = "numpy-2.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5"}, + {file = "numpy-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289"}, + {file = "numpy-2.0.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609"}, + {file = "numpy-2.0.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871"}, + {file = "numpy-2.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4"}, + {file = "numpy-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581"}, + {file = "numpy-2.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995"}, + {file = "numpy-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f"}, + {file = "numpy-2.0.0-cp311-cp311-win32.whl", hash = "sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f"}, + {file = "numpy-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c"}, + {file = "numpy-2.0.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f"}, + {file = "numpy-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85"}, + {file = "numpy-2.0.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2"}, + {file = "numpy-2.0.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e"}, + {file = "numpy-2.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2"}, + {file = "numpy-2.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a"}, + {file = "numpy-2.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95"}, + {file = "numpy-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9"}, + {file = "numpy-2.0.0-cp312-cp312-win32.whl", hash = "sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54"}, + {file = "numpy-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df"}, + {file = "numpy-2.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de"}, + {file = "numpy-2.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb"}, + {file = "numpy-2.0.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f"}, + {file = "numpy-2.0.0-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86"}, + {file = "numpy-2.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a"}, + {file = "numpy-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d"}, + {file = "numpy-2.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4"}, + {file = "numpy-2.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44"}, + {file = "numpy-2.0.0-cp39-cp39-win32.whl", hash = "sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275"}, + {file = "numpy-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65"}, + {file = "numpy-2.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6"}, + {file = "numpy-2.0.0-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a"}, + {file = "numpy-2.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad"}, + {file = "numpy-2.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9"}, + {file = "numpy-2.0.0.tar.gz", hash = "sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864"}, ] [[package]] @@ -665,4 +588,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.13" -content-hash = "097ed443c49a9a397d357d315b58cd467ca0e90047f8c624b77a3d4569c14ef2" +content-hash = "d905f4221fc9dac67766e214aa716fafe85597c04a86494d3bdbbefb70f04fbf" diff --git a/pyproject.toml b/pyproject.toml index b6dbb5b..f99302b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,6 @@ scipy = [ pyarrow = "^16.0.0" pyyaml = "^6.0.1" fire = "^0.6.0" -jinja2 = "^3.1.4" [tool.poetry.group.dev.dependencies] pytest = "^8.2.0" From 2e18455e13723f28b000e485302157e3f9acd0ee Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 26 Jun 2024 15:25:42 -0600 Subject: [PATCH 34/40] add to docs on exports --- src/cosmicqc/analyze.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cosmicqc/analyze.py b/src/cosmicqc/analyze.py index eb54de7..4c649e4 100644 --- a/src/cosmicqc/analyze.py +++ b/src/cosmicqc/analyze.py @@ -53,6 +53,7 @@ def identify_outliers( export_path: Optional[str] = None An optional path to export the data using SCDataFrame export capabilities. If None no export is performed. + Note: compatible exports are CSV's, TSV's, and parquet. Returns: Union[pd.Series, pd.DataFrame]: @@ -153,6 +154,7 @@ def find_outliers( export_path: Optional[str] = None An optional path to export the data using SCDataFrame export capabilities. If None no export is performed. + Note: compatible exports are CSV's, TSV's, and parquet. Returns: pd.DataFrame: @@ -230,6 +232,7 @@ def label_outliers( export_path: Optional[str] = None An optional path to export the data using SCDataFrame export capabilities. If None no export is performed. + Note: compatible exports are CSV's, TSV's, and parquet. Returns: pd.DataFrame: From 273acf1f52f46db0b14cb6838200377f4e6d99fb Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 26 Jun 2024 15:38:12 -0600 Subject: [PATCH 35/40] add docs for context --- src/cosmicqc/cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py index b290ef1..ec306f2 100644 --- a/src/cosmicqc/cli.py +++ b/src/cosmicqc/cli.py @@ -15,6 +15,7 @@ from . import analyze +# used to avoid bugs with python-fire and pandas string-based repr compatibility. # referenced from https://github.com/google/python-fire/pull/446 # to be removed after python-fire merges changes (uncertain of timeline) def HasCustomRepr(component: object) -> bool: @@ -35,11 +36,10 @@ def HasCustomRepr(component: object) -> bool: return True return False - +# used to avoid bugs with python-fire and pandas string-based repr compatibility. # referenced with modifications from https://github.com/google/python-fire/pull/446 # to be removed after python-fire merges changes (uncertain of timeline) -# ruff: noqa: C901 -def _PrintResult( +def _PrintResult( # noqa: C901 component_trace: FireTrace, verbose: bool = False, serialize: Optional[bool] = None ) -> None: """Prints the result of the Fire call to stdout in a human readable way.""" From 77c27e138ba5942c6774a67e482cc42cbbc3d725 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 26 Jun 2024 15:39:20 -0600 Subject: [PATCH 36/40] note about ignore rule --- src/cosmicqc/cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py index ec306f2..9713789 100644 --- a/src/cosmicqc/cli.py +++ b/src/cosmicqc/cli.py @@ -36,10 +36,12 @@ def HasCustomRepr(component: object) -> bool: return True return False + # used to avoid bugs with python-fire and pandas string-based repr compatibility. # referenced with modifications from https://github.com/google/python-fire/pull/446 # to be removed after python-fire merges changes (uncertain of timeline) -def _PrintResult( # noqa: C901 +# ignore rule below added to help avoid triggering ruff linting checks on temporary fix. +def _PrintResult( # noqa: C901 component_trace: FireTrace, verbose: bool = False, serialize: Optional[bool] = None ) -> None: """Prints the result of the Fire call to stdout in a human readable way.""" From 4d57177ff61da2d76611efa719a9531c5c28e889 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 26 Jun 2024 15:39:46 -0600 Subject: [PATCH 37/40] remove todo --- src/cosmicqc/cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py index 9713789..1fd7599 100644 --- a/src/cosmicqc/cli.py +++ b/src/cosmicqc/cli.py @@ -45,8 +45,6 @@ def _PrintResult( # noqa: C901 component_trace: FireTrace, verbose: bool = False, serialize: Optional[bool] = None ) -> None: """Prints the result of the Fire call to stdout in a human readable way.""" - # TODO(dbieber): Design human readable deserializable serialization method - # and move serialization to its own module. result = component_trace.GetResult() # Allow users to modify the return value of the component and provide # custom formatting. From 1900c65a9f34378923c809cc5029ab84544728b5 Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 26 Jun 2024 15:43:01 -0600 Subject: [PATCH 38/40] minor comment about display --- src/cosmicqc/cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cosmicqc/cli.py b/src/cosmicqc/cli.py index 1fd7599..e0d0a51 100644 --- a/src/cosmicqc/cli.py +++ b/src/cosmicqc/cli.py @@ -82,6 +82,7 @@ def _PrintResult( # noqa: C901 else: help_text = helptext.HelpText(result, trace=component_trace, verbose=verbose) output = [help_text] + # used for displaying output through python-fire Display(output, out=sys.stdout) From 0a46f408ea2423b412882959764c6057b29662bd Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 26 Jun 2024 15:43:44 -0600 Subject: [PATCH 39/40] retain code comment --- src/cosmicqc/scdataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cosmicqc/scdataframe.py b/src/cosmicqc/scdataframe.py index 32f60a3..177683a 100644 --- a/src/cosmicqc/scdataframe.py +++ b/src/cosmicqc/scdataframe.py @@ -77,7 +77,7 @@ def __init__( self.data = pd.DataFrame(data) elif isinstance(data, (pathlib.Path, str)): - # if the data is a string, remember the original source + # if the data is a string or a pathlib path, remember the original source # through a data_source attr self.data_source = data From 5d5257bae935d444808a10a98633d22d29df8d7a Mon Sep 17 00:00:00 2001 From: d33bs Date: Wed, 26 Jun 2024 15:45:04 -0600 Subject: [PATCH 40/40] correct code comment --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 95b1151..622bd7e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -11,7 +11,7 @@ def test_cli_util(): """ - Test the `identify_outliers` function of the CLI. + Test the run_cli_command for successful output """ command = """echo 'hello world'"""