Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci(mypy): add mypy check and adjust code for types #439

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
22 changes: 22 additions & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,28 @@ jobs:
uses: pre-commit/action@v3.0.0
with:
extra_args: --all-files
python-type-checks:
# This job is used to check Python types
name: Python type checks
# Avoid fail-fast to retain output
strategy:
fail-fast: false
runs-on: ubuntu-22.04
if: github.event_name != 'schedule'
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Setup python, and check pre-commit cache
uses: ./.github/actions/setup-env
with:
python-version: ${{ env.TARGET_PYTHON_VERSION }}
cache-pre-commit: false
cache-venv: true
setup-poetry: true
install-deps: true
- name: Run mypy
run: |
poetry run mypy .
integration-test:
name: Pytest (Python ${{ matrix.python-version }} on ${{ matrix.os }})
# Runs pytest on all tested versions of python and OSes
Expand Down
74 changes: 66 additions & 8 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pycytominer/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def aggregate(
population_df = population_df.drop([columns_to_drop], axis="columns")

if output_file is not None:
output(
return output(
df=population_df,
output_filename=output_file,
output_type=output_type,
Expand Down
5 changes: 3 additions & 2 deletions pycytominer/cyto_utils/DeepProfiler_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import pandas as pd
import warnings

from pycytominer import aggregate, normalize
from pycytominer.cyto_utils import (
# use mypy ignores below to avoid duplicate import warnings
from pycytominer import aggregate, normalize # type: ignore[no-redef]
from pycytominer.cyto_utils import ( # type: ignore[no-redef]
load_npz_features,
load_npz_locations,
infer_cp_features,
Expand Down
12 changes: 8 additions & 4 deletions pycytominer/cyto_utils/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ def merge_single_cells(
"""

# Load the single cell dataframe by merging on the specific linking columns
sc_df = ""
left_compartment_loaded = False
linking_check_cols = []
merge_suffix_rename = []
for left_compartment in self.compartment_linking_cols:
Expand All @@ -737,7 +737,7 @@ def merge_single_cells(
left_compartment
]

if isinstance(sc_df, str):
if not left_compartment_loaded:
sc_df = self.load_compartment(compartment=left_compartment)

if compute_subsample:
Expand All @@ -752,6 +752,8 @@ def merge_single_cells(
sc_df, how="left", on=subset_logic_df.columns.tolist()
).reindex(sc_df.columns, axis="columns")

left_compartment_loaded = True

sc_df = sc_df.merge(
self.load_compartment(compartment=right_compartment),
left_on=[*self.merge_cols, left_link_col],
Expand Down Expand Up @@ -804,11 +806,13 @@ def merge_single_cells(

normalize_args["features"] = features

sc_df = normalize(profiles=sc_df, **normalize_args)
# ignore mypy warnings below as these reference root package imports
sc_df = normalize(profiles=sc_df, **normalize_args) # type: ignore[operator]

# In case platemap metadata is provided, use pycytominer.annotate for metadata
if platemap is not None:
sc_df = annotate(
# ignore mypy warnings below as these reference root package imports
sc_df = annotate( # type: ignore[operator]
profiles=sc_df, platemap=platemap, output_file=None, **kwargs
)

Expand Down
2 changes: 1 addition & 1 deletion pycytominer/cyto_utils/collate.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def collate(
with sqlite3.connect(cache_backend_file, isolation_level=None) as connection:
cursor = connection.cursor()
if column:
if print:
if printtoscreen:
print(f"Adding a Metadata_Plate column based on column {column}")
cursor.execute("ALTER TABLE Image ADD COLUMN Metadata_Plate TEXT;")
cursor.execute(f"UPDATE image SET Metadata_Plate ={column};")
Expand Down
10 changes: 7 additions & 3 deletions pycytominer/cyto_utils/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
def output(
df: pd.DataFrame,
output_filename: str,
output_type: str = "csv",
output_type: Optional[str] = "csv",
sep: str = ",",
float_format: Optional[str] = None,
compression_options: Union[str, Dict] = {"method": "gzip", "mtime": 1},
compression_options: Optional[Union[str, Dict]] = {"method": "gzip", "mtime": 1},
**kwargs,
):
"""Given an output file and compression options, write file to disk
Expand Down Expand Up @@ -79,6 +79,10 @@ def output(
)
"""

# ensure a default output type
if output_type is None:
output_type = "csv"
Comment on lines +86 to +87
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels dangerous - perhaps we could throw a warning that None is not supported?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is standard way of checking if a default none argument has been passed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, I should clarify what I mean by "dangerous" - if a user explicitly sets output_type=None and expects certain behavior (e.g., outputting to variable and not writing), they may be surprised that we overwrite it to output_type="csv". Perhaps the solution isn't to throw a warning (maybe add to docstring)? Or, we could elect to do nothing if this is standard expected behavior.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for these comments! I had to make this change in order to integrate aggregate with output properly given its default parameter types. I recognize we could change the defaults one direction or the other (with aggregate or output) but I didn't want to move too much outside of the scope of the changes here in order to get a first pass with mypy. I wasn't sure about rationalizing a default value for aggregate's output_type if it wasn't going to use output. At the same time, I didn't want to change too much of output's default capabilities due to how it integrates with many other functions (could get complex for the scope of this PR).

My feeling here was that if output was used at all that we'd expect to be "outputting" something, and that if None were passed to it for output_type (especially for nested function calls) that it should revert to the default string value for the parameter in output.


if output_type == "csv":
compression_options = set_compression_method(compression=compression_options)

Expand All @@ -98,7 +102,7 @@ def output(
return output_filename


def set_compression_method(compression: Union[str, Dict]):
def set_compression_method(compression: Optional[Union[str, Dict]]):
"""Set the compression options

Parameters
Expand Down
13 changes: 13 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ pytest-cov = "^4.1.0"
pre-commit = ">=3.3.2"
commitizen = "^3.12.0"
ruff = "^0.3.4"
mypy = "^1.11.2"

[tool.poetry.group.docs]
optional = true
Expand Down Expand Up @@ -177,6 +178,18 @@ preview = true
[tool.pytest.ini_options]
testpaths = "tests"

[tool.mypy]
# ignores optionally added type packages
ignore_missing_imports = true
# ignores redefinition of variable labels to new types
allow_redefinition = true
exclude = [
# ignore notebook-based walkthroughs
"walkthroughs",
# ignore tests dir
"tests"
]

[build-system]
requires = ["poetry-core>=1.7.0", "poetry-dynamic-versioning>=1.1.0"]
build-backend = "poetry_dynamic_versioning.backend"
Expand Down
14 changes: 14 additions & 0 deletions tests/test_cyto_utils/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,20 @@ def test_output_default():
result, DATA_DF, check_names=False, check_exact=False, atol=1e-3
)

# test with an output_type of None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice!

output_result = output(
df=DATA_DF,
output_filename=output_filename,
compression_options=TEST_COMPRESSION_OPTIONS,
float_format=None,
output_type=None,
)
result = pd.read_csv(output_result)

pd.testing.assert_frame_equal(
result, DATA_DF, check_names=False, check_exact=False, atol=1e-3
)


def test_output_tsv():
# Test input filename of writing a tab separated file
Expand Down
Loading