Skip to content

Commit

Permalink
enable parallel testing using pytest-xdist
Browse files Browse the repository at this point in the history
This PR adds the [pytest-xdist](https://pytest-xdist.readthedocs.io/en/latest/index.html) to enable parallel testing to speed up the testing. 

To make parallel testing work, some session-scoped pytest fixtures that generate data or files used by multiple test files were updated (see this [example](https://pytest-xdist.readthedocs.io/en/latest/how-to.html#making-session-scoped-fixtures-execute-only-once)).


Results:
- sequential testing: 380s
- parallel testing (8cores): 85s

Using `pytest --durations=0` can detect the most time consuming tests. It shows that tests on downloaders are most expensive, and ignoring them can reduce testing time from 85s to 15s!
  • Loading branch information
CunliangGeng authored Dec 19, 2023
1 parent 4cfce6d commit 095dcec
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 58 deletions.
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ coverage.xml
*.cover
.hypothesis/
.pytest_cache/
pytest.ini

# Sphinx documentation
docs/_build/
Expand All @@ -61,6 +62,3 @@ tags
webapp/npapp/static/css/bokeh*.css
webapp/npapp/static/js/bokeh*.js
src/nplinker/scoring/iokr/data/SPEC/

tests/data/ProteoSAFe-METABOLOMICS-SNETS-c22f44b1-download_clustered_spectra
tests/data/ProteoSAFe-FEATURE-BASED-MOLECULAR-NETWORKING-92036537-download_cytoscape_data
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespaces = true # enable data directory to be identified

[tool.pytest.ini_options]
minversion = "6.0"
addopts = "-ra -q"
addopts = "-ra -n auto" # -ra: show summary info for all test outcomes; -n auto: run tests in parallel
testpaths = ["tests"]

[tool.coverage.run]
Expand Down
1 change: 1 addition & 0 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ bump2version
coverage[toml]
pytest
pytest-cov
pytest-xdist
ruff
sphinx
sphinx_rtd_theme
Expand Down
19 changes: 0 additions & 19 deletions tests/conftest.py

This file was deleted.

13 changes: 6 additions & 7 deletions tests/genomics/test_mibig_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,15 @@


class TestMibigBGCLoader:
@pytest.fixture
def data_dir(self, tmp_path):
download_root = tmp_path / "download"
extract_path = tmp_path / "metadata"
download_root.mkdir()
extract_path.mkdir()
@pytest.fixture(scope="session")
def data_dir(self, tmp_path_factory):
# get the temp directory shared by all workers
download_root = tmp_path_factory.mktemp("download")
extract_path = tmp_path_factory.mktemp("metadata")
download_and_extract_mibig_metadata(download_root, extract_path)
yield str(extract_path)

@pytest.fixture
@pytest.fixture(scope="session")
def loader(self, data_dir):
loader = MibigLoader(data_dir)
yield loader
Expand Down
66 changes: 38 additions & 28 deletions tests/metabolomics/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import shutil
from os import PathLike
import httpx
import pytest
Expand All @@ -7,8 +6,9 @@
from .. import GNPS_DATA_DIR


@pytest.fixture(scope="session", autouse=True)
@pytest.fixture(scope="session")
def gnps_website_is_down():
"""Check if the GNPS website is down."""
gnps_url = "https://gnps.ucsd.edu"
try:
_ = httpx.get(gnps_url)
Expand All @@ -17,9 +17,9 @@ def gnps_website_is_down():
return True


@pytest.fixture(scope="session", autouse=True)
@pytest.fixture(scope="session")
def gnps_zip_files() -> dict[GNPSFormat, PathLike]:
"""Get the GNPS zip archives as a dictionary.
"""Get the paths of the GNPS zip archives as a dict.
The dict keys are the workflow short names taken from the GNPSFormat enum.
The dict values are the paths to the zip archives.
Expand All @@ -41,81 +41,91 @@ def gnps_zip_files() -> dict[GNPSFormat, PathLike]:
}


@pytest.fixture(scope="session")
def tmp_gnps_dir(tmp_path_factory):
"""Temporary root directory for testing gnps."""
return tmp_path_factory.mktemp("gnps")


@pytest.fixture(scope="session", autouse=True)
def prepare_data(gnps_zip_files):
"""Extract GNPS zip archive to a temporary directory and delete it after the test run.
def prepare_data(tmp_gnps_dir, gnps_zip_files):
"""Extract GNPS zip archives to the "tmp_gnps_dir" directory.
The extracted archive is named after the workflow, e.g. "SNETS", "SNETSV2", "FBMN", so for
example the SNETS archive is extracted to the "SNETS" directory in the "tmp_gnps_dir" directory.
The temporary directory is named after the workflow, e.g. "SNETS", "SNETSV2", "FBMN".
Note that these directory names are also used in other fixtures.
Note that the `autouse` must be set to `True` so that the fixture is executed before any other
test function.
"""
for workflow, zip_file in gnps_zip_files.items():
extract_archive(zip_file, GNPS_DATA_DIR / workflow.name)
yield
for workflow in gnps_zip_files:
shutil.rmtree(GNPS_DATA_DIR / workflow.name)
extract_archive(zip_file, tmp_gnps_dir / workflow.name)


@pytest.fixture(scope="session")
def gnps_file_mappings_files() -> dict[GNPSFormat, PathLike]:
def gnps_file_mappings_files(tmp_gnps_dir) -> dict[GNPSFormat, PathLike]:
"""Get the paths of the GNPS file mappings."""
return {
GNPSFormat.SNETS: GNPS_DATA_DIR
GNPSFormat.SNETS: tmp_gnps_dir
/ GNPSFormat.SNETS.name
/ "clusterinfosummarygroup_attributes_withIDs_withcomponentID"
/ "d69356c8e5044c2a9fef3dd2a2f991e1.tsv",
GNPSFormat.SNETSV2: GNPS_DATA_DIR
GNPSFormat.SNETSV2: tmp_gnps_dir
/ GNPSFormat.SNETSV2.name
/ "clusterinfosummarygroup_attributes_withIDs_withcomponentID"
/ "16f782af01bc4f50a23ed163566072f9.clustersummary",
GNPSFormat.FBMN: GNPS_DATA_DIR
GNPSFormat.FBMN: tmp_gnps_dir
/ GNPSFormat.FBMN.name
/ "quantification_table_reformatted"
/ "1a12f6fbd2ca4e099ec56bdaea56368f.csv",
}


@pytest.fixture(scope="session")
def gnps_spectra_files() -> dict[GNPSFormat, PathLike]:
def gnps_spectra_files(tmp_gnps_dir) -> dict[GNPSFormat, PathLike]:
"""Get the paths of the GNPS spectra."""
return {
GNPSFormat.SNETS: GNPS_DATA_DIR
GNPSFormat.SNETS: tmp_gnps_dir
/ GNPSFormat.SNETS.name
/ "METABOLOMICS-SNETS-c22f44b1-download_clustered_spectra-main.mgf",
GNPSFormat.SNETSV2: GNPS_DATA_DIR
GNPSFormat.SNETSV2: tmp_gnps_dir
/ GNPSFormat.SNETSV2.name
/ "METABOLOMICS-SNETS-V2-189e8bf1-download_clustered_spectra-main.mgf",
GNPSFormat.FBMN: GNPS_DATA_DIR / GNPSFormat.FBMN.name / "spectra" / "specs_ms.mgf",
GNPSFormat.FBMN: tmp_gnps_dir / GNPSFormat.FBMN.name / "spectra" / "specs_ms.mgf",
}


@pytest.fixture(scope="session")
def gnps_mf_files() -> dict[GNPSFormat, PathLike]:
def gnps_mf_files(tmp_gnps_dir) -> dict[GNPSFormat, PathLike]:
"""Get the paths of the GNPS molecular formula files."""
return {
GNPSFormat.SNETS: GNPS_DATA_DIR
GNPSFormat.SNETS: tmp_gnps_dir
/ GNPSFormat.SNETS.name
/ "networkedges_selfloop"
/ "6da5be36f5b14e878860167fa07004d6.pairsinfo",
GNPSFormat.SNETSV2: GNPS_DATA_DIR
GNPSFormat.SNETSV2: tmp_gnps_dir
/ GNPSFormat.SNETSV2.name
/ "networkedges_selfloop"
/ "06dd31e28bb547ba852859219db9298c..selfloop",
GNPSFormat.FBMN: GNPS_DATA_DIR
GNPSFormat.FBMN: tmp_gnps_dir
/ GNPSFormat.FBMN.name
/ "networkedges_selfloop"
/ "c74fec018736475483e9c8b05e230cce..selfloop",
}


@pytest.fixture(scope="session")
def gnps_annotations_files() -> dict[GNPSFormat, PathLike]:
def gnps_annotations_files(tmp_gnps_dir) -> dict[GNPSFormat, PathLike]:
"""Get the paths of the GNPS annotations."""
return {
GNPSFormat.SNETS: GNPS_DATA_DIR
GNPSFormat.SNETS: tmp_gnps_dir
/ GNPSFormat.SNETS.name
/ "result_specnets_DB"
/ "885e4c5485ba42569e4876d1fe90d759.tsv",
GNPSFormat.SNETSV2: GNPS_DATA_DIR
GNPSFormat.SNETSV2: tmp_gnps_dir
/ GNPSFormat.SNETSV2.name
/ "result_specnets_DB"
/ "017fadadf6744c10b5d39f109e1438dc.tsv",
GNPSFormat.FBMN: GNPS_DATA_DIR
GNPSFormat.FBMN: tmp_gnps_dir
/ GNPSFormat.FBMN.name
/ "DB_result"
/ "7dc5b46b50d94246a1de12ef485d0f75.tsv",
Expand Down
9 changes: 9 additions & 0 deletions tests/test_strain.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
import pytest
from nplinker.strain import Strain


@pytest.fixture
def strain() -> Strain:
"""Return a Strain object with one alias."""
strain = Strain("strain_1")
strain.add_alias("strain_1_a")
return strain


def test_default():
sut = Strain("strain_1")
assert sut.id == "strain_1"
Expand Down
8 changes: 8 additions & 0 deletions tests/test_strain_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
from nplinker.strain_collection import StrainCollection


@pytest.fixture
def strain() -> Strain:
"""Return a Strain object with one alias."""
strain = Strain("strain_1")
strain.add_alias("strain_1_a")
return strain


@pytest.fixture
def collection(strain: Strain) -> StrainCollection:
sut = StrainCollection()
Expand Down

0 comments on commit 095dcec

Please sign in to comment.