diff --git a/src/nplinker/__init__.py b/src/nplinker/__init__.py index 43ace5fe..853775c7 100644 --- a/src/nplinker/__init__.py +++ b/src/nplinker/__init__.py @@ -1,5 +1,6 @@ import logging -from pathlib import Path +from .logger import setup_logging +from .nplinker import NPLinker logging.getLogger(__name__).addHandler(logging.NullHandler()) @@ -9,48 +10,4 @@ __version__ = "2.0.0-alpha.1" -# The path to the NPLinker application database directory -NPLINKER_APP_DATA_DIR = Path(__file__).parent / "data" -del Path - - -def setup_logging(level: str = "INFO", file: str = "", use_console: bool = True) -> None: - """Setup logging configuration for the ancestor logger "nplinker". - - Args: - level: The log level, use the logging module's log level constants. Valid levels are: - "NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". - file: The file to write the log to. If the file does not exist, it will be created. The log - will be written to the file in append mode. If the file is an empty string (by default), - the log will not be written to a file. - use_console: Whether to log to the console. - """ - from rich.console import Console - from rich.logging import RichHandler - - # Get the ancestor logger "nplinker" - logger = logging.getLogger(__name__) - logger.setLevel(level) - - # File handler - if file: - logger.addHandler( - RichHandler( - console=Console(file=open(file, "a"), width=120), # force the line width to 120 - omit_repeated_times=False, - rich_tracebacks=True, - tracebacks_show_locals=True, - log_time_format="[%Y-%m-%d %X]", - ) - ) - - # Console handler - if use_console: - logger.addHandler( - RichHandler( - omit_repeated_times=False, - rich_tracebacks=True, - tracebacks_show_locals=True, - log_time_format="[%Y-%m-%d %X]", - ) - ) +__all__ = ["NPLinker", "setup_logging"] diff --git a/src/nplinker/arranger.py b/src/nplinker/arranger.py index 06df9ac5..3a88be7a 100644 --- a/src/nplinker/arranger.py +++ b/src/nplinker/arranger.py @@ -1,7 +1,9 @@ +from __future__ import annotations import fnmatch import json import shutil from glob import glob +from os import PathLike from pathlib import Path from dynaconf import Dynaconf from jsonschema import validate @@ -369,7 +371,7 @@ def arrange_strains_selected(self) -> None: validate(instance=json_data, schema=USER_STRAINS_SCHEMA) -def validate_gnps(gnps_dir: Path) -> None: +def validate_gnps(gnps_dir: str | PathLike) -> None: """Validate the GNPS data directory and its contents. The GNPS data directory must contain the following files: @@ -387,6 +389,7 @@ def validate_gnps(gnps_dir: Path) -> None: is not found. ValueError: If both file_mappings.tsv and file_mapping.csv are found. """ + gnps_dir = Path(gnps_dir) if not gnps_dir.exists(): raise FileNotFoundError(f"GNPS data directory not found at {gnps_dir}") @@ -415,7 +418,7 @@ def validate_gnps(gnps_dir: Path) -> None: ) -def validate_antismash(antismash_dir: Path) -> None: +def validate_antismash(antismash_dir: str | PathLike) -> None: """Validate the antiSMASH data directory and its contents. The validation only checks the structure of the antiSMASH data directory and file names. @@ -438,6 +441,7 @@ def validate_antismash(antismash_dir: Path) -> None: sub-directory. ValueError: If any sub-directory name contains a space. """ + antismash_dir = Path(antismash_dir) if not antismash_dir.exists(): raise FileNotFoundError(f"antiSMASH data directory not found at {antismash_dir}") @@ -460,7 +464,7 @@ def validate_antismash(antismash_dir: Path) -> None: raise FileNotFoundError(f"No BGC files found in antiSMASH sub-directory {sub_dir}") -def validate_bigscape(bigscape_dir: Path, cutoff: str) -> None: +def validate_bigscape(bigscape_dir: str | PathLike, cutoff: str) -> None: """Validate the BiG-SCAPE data directory and its contents. The BiG-SCAPE data directory must exist and contain the clustering file @@ -478,6 +482,7 @@ def validate_bigscape(bigscape_dir: Path, cutoff: str) -> None: Raises: FileNotFoundError: If the BiG-SCAPE data directory or the clustering file is not found. """ + bigscape_dir = Path(bigscape_dir) if not bigscape_dir.exists(): raise FileNotFoundError(f"BiG-SCAPE data directory not found at {bigscape_dir}") diff --git a/src/nplinker/defaults.py b/src/nplinker/defaults.py index a6d53050..65ef08a1 100644 --- a/src/nplinker/defaults.py +++ b/src/nplinker/defaults.py @@ -1,18 +1,26 @@ -STRAIN_MAPPINGS_FILENAME = "strain_mappings.json" -GENOME_BGC_MAPPINGS_FILENAME = "genome_bgc_mappings.json" -GENOME_STATUS_FILENAME = "genome_status.json" -GNPS_SPECTRA_FILENAME = "spectra.mgf" -GNPS_MOLECULAR_FAMILY_FILENAME = "molecular_families.tsv" -GNPS_ANNOTATIONS_FILENAME = "annotations.tsv" -GNPS_FILE_MAPPINGS_TSV = "file_mappings.tsv" -GNPS_FILE_MAPPINGS_CSV = "file_mappings.csv" -STRAINS_SELECTED_FILENAME = "strains_selected.json" +from pathlib import Path +from typing import Final -DOWNLOADS_DIRNAME = "downloads" -MIBIG_DIRNAME = "mibig" -GNPS_DIRNAME = "gnps" -ANTISMASH_DIRNAME = "antismash" -BIGSCAPE_DIRNAME = "bigscape" -BIGSCAPE_RUNNING_OUTPUT_DIRNAME = "bigscape_running_output" -OUTPUT_DIRNAME = "output" +# The path to the NPLinker application database directory +NPLINKER_APP_DATA_DIR: Final = Path(__file__).parent / "data" + + +STRAIN_MAPPINGS_FILENAME: Final = "strain_mappings.json" +GENOME_BGC_MAPPINGS_FILENAME: Final = "genome_bgc_mappings.json" +GENOME_STATUS_FILENAME: Final = "genome_status.json" +GNPS_SPECTRA_FILENAME: Final = "spectra.mgf" +GNPS_MOLECULAR_FAMILY_FILENAME: Final = "molecular_families.tsv" +GNPS_ANNOTATIONS_FILENAME: Final = "annotations.tsv" +GNPS_FILE_MAPPINGS_TSV: Final = "file_mappings.tsv" +GNPS_FILE_MAPPINGS_CSV: Final = "file_mappings.csv" +STRAINS_SELECTED_FILENAME: Final = "strains_selected.json" + + +DOWNLOADS_DIRNAME: Final = "downloads" +MIBIG_DIRNAME: Final = "mibig" +GNPS_DIRNAME: Final = "gnps" +ANTISMASH_DIRNAME: Final = "antismash" +BIGSCAPE_DIRNAME: Final = "bigscape" +BIGSCAPE_RUNNING_OUTPUT_DIRNAME: Final = "bigscape_running_output" +OUTPUT_DIRNAME: Final = "output" diff --git a/src/nplinker/genomics/abc.py b/src/nplinker/genomics/abc.py index daeb3bef..53dc0e7e 100644 --- a/src/nplinker/genomics/abc.py +++ b/src/nplinker/genomics/abc.py @@ -1,5 +1,7 @@ +from __future__ import annotations from abc import ABC from abc import abstractmethod +from os import PathLike from .bgc import BGC from .gcf import GCF @@ -7,14 +9,14 @@ class BGCLoaderBase(ABC): """Abstract base class for BGC loader.""" - def __init__(self, data_dir: str) -> None: + def __init__(self, data_dir: str | PathLike) -> None: """Initialize the BGC loader. Args: data_dir: Path to directory that contains BGC metadata files (.json) or full data genbank files (.gbk). """ - self.data_dir = data_dir + self.data_dir = str(data_dir) @abstractmethod def get_files(self) -> dict[str, str]: diff --git a/src/nplinker/genomics/antismash/antismash_loader.py b/src/nplinker/genomics/antismash/antismash_loader.py index 4c6bd991..0994a212 100644 --- a/src/nplinker/genomics/antismash/antismash_loader.py +++ b/src/nplinker/genomics/antismash/antismash_loader.py @@ -2,7 +2,9 @@ import fnmatch import logging import os -from typing import Mapping +from collections.abc import Mapping +from os import PathLike +from pathlib import Path from Bio import SeqIO from Bio import SeqRecord from nplinker.genomics import BGC @@ -15,7 +17,7 @@ logger = logging.getLogger(__name__) -class AntismashBGCLoader: +class AntismashBGCLoader(BGCLoaderBase): """Build a loader for AntiSMASH BGC genbank (.gbk) files. Note: @@ -32,14 +34,14 @@ class AntismashBGCLoader: ``` """ - def __init__(self, data_dir: str) -> None: + def __init__(self, data_dir: str | PathLike) -> None: """Initialize the AntiSMASH BGC loader. Args: data_dir: Path to AntiSMASH directory that contains a collection of AntiSMASH outputs. """ - self.data_dir = data_dir + self.data_dir = str(data_dir) self._file_dict = self._parse_data_dir(self.data_dir) self._bgcs = self._parse_bgcs(self._file_dict) @@ -111,7 +113,7 @@ def _parse_bgcs(bgc_files: Mapping[str, str]) -> list[BGC]: return [parse_bgc_genbank(file) for file in bgc_files.values()] -def parse_bgc_genbank(file: str) -> BGC: +def parse_bgc_genbank(file: str | PathLike) -> BGC: """Parse a single BGC gbk file to BGC object. Args: @@ -124,7 +126,8 @@ def parse_bgc_genbank(file: str) -> BGC: >>> bgc = AntismashBGCLoader.parse_bgc( ... "/data/antismash/GCF_000016425.1/NC_009380.1.region001.gbk") """ - fname = os.path.splitext(os.path.basename(file))[0] + file = Path(file) + fname = file.stem record = SeqIO.read(file, format="genbank") description = record.description # "DEFINITION" in gbk file @@ -138,7 +141,7 @@ def parse_bgc_genbank(file: str) -> BGC: bgc = BGC(fname, *product_prediction) bgc.description = description bgc.antismash_id = antismash_id - bgc.antismash_file = file + bgc.antismash_file = str(file) bgc.antismash_region = features.get("region_number") bgc.smiles = features.get("smiles") bgc.strain = Strain(fname) @@ -160,7 +163,3 @@ def _parse_antismash_genbank(record: SeqRecord.SeqRecord) -> dict: smiles = tuple(i.replace(" ", "") for i in smiles) features["smiles"] = smiles return features - - -# register as virtual class to prevent metaclass conflicts -BGCLoaderBase.register(AntismashBGCLoader) diff --git a/src/nplinker/genomics/antismash/podp_antismash_downloader.py b/src/nplinker/genomics/antismash/podp_antismash_downloader.py index 2a77cc20..1d1bf0b6 100644 --- a/src/nplinker/genomics/antismash/podp_antismash_downloader.py +++ b/src/nplinker/genomics/antismash/podp_antismash_downloader.py @@ -3,10 +3,10 @@ import logging import re import time +from collections.abc import Mapping +from collections.abc import Sequence from os import PathLike from pathlib import Path -from typing import Mapping -from typing import Sequence import httpx from bs4 import BeautifulSoup from bs4 import NavigableString diff --git a/src/nplinker/genomics/bigscape/bigscape_loader.py b/src/nplinker/genomics/bigscape/bigscape_loader.py index c4e7637b..f38f47af 100644 --- a/src/nplinker/genomics/bigscape/bigscape_loader.py +++ b/src/nplinker/genomics/bigscape/bigscape_loader.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -class BigscapeGCFLoader: +class BigscapeGCFLoader(GCFLoaderBase): """Build a loader for BiG-SCAPE GCF cluster file. Attributes: @@ -61,11 +61,7 @@ def _parse_gcf(cluster_file: str) -> list[GCF]: return list(gcf_dict.values()) -# register as virtual class to prevent metaclass conflicts -GCFLoaderBase.register(BigscapeGCFLoader) - - -class BigscapeV2GCFLoader: +class BigscapeV2GCFLoader(GCFLoaderBase): """Build a loader for BiG-SCAPE v2 database file. Attributes: @@ -137,7 +133,3 @@ def _parse_gcf(db_file: str) -> list[GCF]: gcf_dict[family_id].bgc_ids.add(bgc_id) return list(gcf_dict.values()) - - -# register as virtual class to prevent metaclass conflicts -GCFLoaderBase.register(BigscapeV2GCFLoader) diff --git a/src/nplinker/genomics/mibig/mibig_loader.py b/src/nplinker/genomics/mibig/mibig_loader.py index 3d8eab6b..b588a189 100644 --- a/src/nplinker/genomics/mibig/mibig_loader.py +++ b/src/nplinker/genomics/mibig/mibig_loader.py @@ -1,5 +1,7 @@ +from __future__ import annotations import logging -import os.path +from os import PathLike +from pathlib import Path from nplinker.strain import Strain from nplinker.utils import list_files from ..abc import BGCLoaderBase @@ -10,7 +12,7 @@ logger = logging.getLogger(__name__) -class MibigLoader: +class MibigLoader(BGCLoaderBase): """Parse MIBiG metadata files and return BGC objects. MIBiG metadata file (json) contains annotations/metadata information @@ -20,13 +22,13 @@ class MibigLoader: objects have Strain object as their strain attribute (i.e. `BGC.strain`). """ - def __init__(self, data_dir: str): + def __init__(self, data_dir: str | PathLike): """Initialize the MIBiG metadata loader. Args: data_dir: Path to the directory of MIBiG metadata json files """ - self.data_dir = data_dir + self.data_dir = str(data_dir) self._file_dict = self.parse_data_dir(self.data_dir) self._metadata_dict = self._parse_metadata() self._bgcs = self._parse_bgcs() @@ -41,7 +43,7 @@ def get_files(self) -> dict[str, str]: return self._file_dict @staticmethod - def parse_data_dir(data_dir: str) -> dict[str, str]: + def parse_data_dir(data_dir: str | PathLike) -> dict[str, str]: """Parse metadata directory and return paths to all metadata json files. Args: @@ -54,7 +56,7 @@ def parse_data_dir(data_dir: str) -> dict[str, str]: file_dict = {} json_files = list_files(data_dir, prefix="BGC", suffix=".json") for file in json_files: - fname = os.path.splitext(os.path.basename(file))[0] + fname = Path(file).stem file_dict[fname] = file return file_dict @@ -99,7 +101,7 @@ def _parse_bgcs(self) -> list[BGC]: return [parse_bgc_metadata_json(file) for file in self._file_dict.values()] -def parse_bgc_metadata_json(file: str) -> BGC: +def parse_bgc_metadata_json(file: str | PathLike) -> BGC: """Parse MIBiG metadata file and return BGC object. Note that the MiBIG accession is used as the BGC id and strain name. The BGC @@ -111,12 +113,8 @@ def parse_bgc_metadata_json(file: str) -> BGC: Returns: BGC object """ - metadata = MibigMetadata(file) + metadata = MibigMetadata(str(file)) mibig_bgc = BGC(metadata.mibig_accession, *metadata.biosyn_class) mibig_bgc.mibig_bgc_class = metadata.biosyn_class mibig_bgc.strain = Strain(metadata.mibig_accession) return mibig_bgc - - -# register as virtual class to prevent metaclass conflicts -BGCLoaderBase.register(MibigLoader) diff --git a/src/nplinker/genomics/mibig/mibig_metadata.py b/src/nplinker/genomics/mibig/mibig_metadata.py index 4abcbdf8..ebef8685 100644 --- a/src/nplinker/genomics/mibig/mibig_metadata.py +++ b/src/nplinker/genomics/mibig/mibig_metadata.py @@ -1,4 +1,6 @@ +from __future__ import annotations import json +from os import PathLike class MibigMetadata: @@ -9,7 +11,7 @@ class MibigMetadata: https://mibig.secondarymetabolites.org/download. """ - def __init__(self, file: str) -> None: + def __init__(self, file: str | PathLike) -> None: """Initialize the MIBiG metadata object. Args: @@ -18,7 +20,7 @@ def __init__(self, file: str) -> None: Examples: >>> metadata = MibigMetadata("/data/BGC0000001.json") """ - self.file = file + self.file = str(file) with open(self.file, "rb") as f: self.metadata = json.load(f) diff --git a/src/nplinker/genomics/utils.py b/src/nplinker/genomics/utils.py index 41b65316..fd861b9f 100644 --- a/src/nplinker/genomics/utils.py +++ b/src/nplinker/genomics/utils.py @@ -1,10 +1,10 @@ from __future__ import annotations import json import logging +from collections.abc import Mapping +from collections.abc import Sequence from os import PathLike from pathlib import Path -from typing import Mapping -from typing import Sequence from jsonschema import validate from nplinker.defaults import GENOME_BGC_MAPPINGS_FILENAME from nplinker.schemas import GENOME_BGC_MAPPINGS_SCHEMA diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index 3f248bec..db817f79 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -3,8 +3,8 @@ import os from deprecated import deprecated from dynaconf import Dynaconf -from nplinker import NPLINKER_APP_DATA_DIR from nplinker import defaults +from nplinker.defaults import NPLINKER_APP_DATA_DIR from nplinker.genomics import BGC from nplinker.genomics import GCF from nplinker.genomics.antismash import AntismashBGCLoader diff --git a/src/nplinker/logger.py b/src/nplinker/logger.py new file mode 100644 index 00000000..b02491ee --- /dev/null +++ b/src/nplinker/logger.py @@ -0,0 +1,42 @@ +import logging +from rich.console import Console +from rich.logging import RichHandler + + +def setup_logging(level: str = "INFO", file: str = "", use_console: bool = True) -> None: + """Setup logging configuration for the ancestor logger "nplinker". + + Args: + level: The log level, use the logging module's log level constants. Valid levels are: + "NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL". + file: The file to write the log to. If the file does not exist, it will be created. The log + will be written to the file in append mode. If the file is an empty string (by default), + the log will not be written to a file. + use_console: Whether to log to the console. + """ + # Get the ancestor logger "nplinker" + logger = logging.getLogger("nplinker") + logger.setLevel(level) + + # File handler + if file: + logger.addHandler( + RichHandler( + console=Console(file=open(file, "a"), width=120), # force the line width to 120 + omit_repeated_times=False, + rich_tracebacks=True, + tracebacks_show_locals=True, + log_time_format="[%Y-%m-%d %X]", + ) + ) + + # Console handler + if use_console: + logger.addHandler( + RichHandler( + omit_repeated_times=False, + rich_tracebacks=True, + tracebacks_show_locals=True, + log_time_format="[%Y-%m-%d %X]", + ) + ) diff --git a/src/nplinker/metabolomics/abc.py b/src/nplinker/metabolomics/abc.py index 6af3052e..dced9252 100644 --- a/src/nplinker/metabolomics/abc.py +++ b/src/nplinker/metabolomics/abc.py @@ -1,3 +1,4 @@ +from __future__ import annotations from abc import ABC from abc import abstractmethod from .molecular_family import MolecularFamily @@ -9,7 +10,7 @@ class SpectrumLoaderBase(ABC): @property @abstractmethod - def spectra(self) -> list["Spectrum"]: + def spectra(self) -> list[Spectrum]: """Get Spectrum objects. Returns: @@ -21,7 +22,7 @@ class MolecularFamilyLoaderBase(ABC): """Abstract base class for MolecularFamilyLoader.""" @abstractmethod - def get_mfs(self, keep_singleton: bool) -> list["MolecularFamily"]: + def get_mfs(self, keep_singleton: bool) -> list[MolecularFamily]: """Get MolecularFamily objects. Args: diff --git a/src/nplinker/metabolomics/utils.py b/src/nplinker/metabolomics/utils.py index 1110fb7d..0f55ef12 100644 --- a/src/nplinker/metabolomics/utils.py +++ b/src/nplinker/metabolomics/utils.py @@ -1,10 +1,10 @@ from __future__ import annotations import json import logging +from collections.abc import Mapping +from collections.abc import Sequence from os import PathLike from pathlib import Path -from typing import Mapping -from typing import Sequence from nplinker.schemas import validate_podp_json from nplinker.strain import StrainCollection from .gnps.gnps_file_mapping_loader import GNPSFileMappingLoader diff --git a/src/nplinker/nplinker.py b/src/nplinker/nplinker.py index e0455b62..80752633 100644 --- a/src/nplinker/nplinker.py +++ b/src/nplinker/nplinker.py @@ -1,18 +1,18 @@ from __future__ import annotations import logging import pickle +from collections.abc import Sequence from os import PathLike from pprint import pformat -from typing import Sequence -from typing import TypeVar +from typing import Any from typing import overload -from . import setup_logging from .arranger import DatasetArranger from .config import load_config from .defaults import OUTPUT_DIRNAME from .genomics import BGC from .genomics import GCF from .loader import DatasetLoader +from .logger import setup_logging from .metabolomics import MolecularFamily from .metabolomics import Spectrum from .scoring.link_graph import LinkGraph @@ -22,8 +22,6 @@ logger = logging.getLogger(__name__) -ObjectType = TypeVar("ObjectType", BGC, GCF, Spectrum, MolecularFamily) - class NPLinker: """Main class for the NPLinker application. @@ -191,22 +189,27 @@ def load_data(self): @overload def get_links( - self, objects: Sequence[BGC], scoring_method: str, **scoring_params + self, objects: Sequence[BGC], scoring_method: str, **scoring_params: Any ) -> LinkGraph: ... @overload def get_links( - self, objects: Sequence[GCF], scoring_method: str, **scoring_params + self, objects: Sequence[GCF], scoring_method: str, **scoring_params: Any ) -> LinkGraph: ... @overload def get_links( - self, objects: Sequence[Spectrum], scoring_method: str, **scoring_params + self, objects: Sequence[Spectrum], scoring_method: str, **scoring_params: Any ) -> LinkGraph: ... @overload def get_links( - self, objects: Sequence[MolecularFamily], scoring_method: str, **scoring_params + self, objects: Sequence[MolecularFamily], scoring_method: str, **scoring_params: Any ) -> LinkGraph: ... - def get_links(self, objects, scoring_method, **scoring_params): + def get_links( + self, + objects: Sequence[BGC] | Sequence[GCF] | Sequence[Spectrum] | Sequence[MolecularFamily], + scoring_method: str, + **scoring_params: Any, + ) -> LinkGraph: """Get the links for the given objects using the specified scoring method and parameters. Args: @@ -214,7 +217,7 @@ def get_links(self, objects, scoring_method, **scoring_params): type, i.e. `BGC`, `GCF`, `Spectrum` or `MolecularFamily` type. For scoring method `metcalf`, the BGC objects are not supported. scoring_method: The scoring method to use. Must be one of the valid scoring methods - `self.scoring_methods`. + `self.scoring_methods`, such as "metcalf". scoring_params: Parameters to pass to the scoring method. If not provided, the default parameters for the scoring method will be used. diff --git a/src/nplinker/scoring/__init__.py b/src/nplinker/scoring/__init__.py index 597fa4f9..6fcaac9c 100644 --- a/src/nplinker/scoring/__init__.py +++ b/src/nplinker/scoring/__init__.py @@ -1,4 +1,3 @@ -from .abc import ScoringBase from .link_graph import LinkGraph from .metcalf_scoring import MetcalfScoring from .score import Score @@ -9,6 +8,5 @@ "LinkGraph", "MetcalfScoring", "Score", - "ScoringBase", "ScoringMethod", ] diff --git a/src/nplinker/scoring/link_graph.py b/src/nplinker/scoring/link_graph.py index ce1a06b6..316ec450 100644 --- a/src/nplinker/scoring/link_graph.py +++ b/src/nplinker/scoring/link_graph.py @@ -1,5 +1,6 @@ from __future__ import annotations from functools import wraps +from typing import Union from networkx import Graph from nplinker.genomics import GCF from nplinker.metabolomics import MolecularFamily @@ -8,11 +9,17 @@ from .scoring_method import ScoringMethod +# Type aliases +Entity = Union[GCF, Spectrum, MolecularFamily] # using Union to ensure python 3.9 compatibility +LINK_DATA = dict[str, Score] +LINK = tuple[Entity, Entity, LINK_DATA] + + def validate_u(func): """A decorator to validate the type of the u object.""" @wraps(func) - def wrapper(self, u: GCF | Spectrum | MolecularFamily, *args, **kwargs): + def wrapper(self, u: Entity, *args, **kwargs): if not isinstance(u, (GCF, Spectrum, MolecularFamily)): raise TypeError(f"{u} is not a GCF, Spectrum, or MolecularFamily object.") @@ -27,8 +34,8 @@ def validate_uv(func): @wraps(func) def wrapper( self, - u: GCF | Spectrum | MolecularFamily, - v: GCF | Spectrum | MolecularFamily, + u: Entity, + v: Entity, *args, **kwargs, ): @@ -94,9 +101,7 @@ def __len__(self) -> int: return len(self._g) @validate_u - def __getitem__( - self, u: GCF | Spectrum | MolecularFamily - ) -> dict[GCF | Spectrum | MolecularFamily, dict[str, Score]]: + def __getitem__(self, u: Entity) -> dict[Entity, LINK_DATA]: """Get all links for a given object. Args: @@ -118,9 +123,7 @@ def __getitem__( @property def links( self, - ) -> list[ - tuple[GCF | Spectrum | MolecularFamily, GCF | Spectrum | MolecularFamily, dict[str, Score]] - ]: + ) -> list[LINK]: """Get all links. Returns: @@ -131,8 +134,8 @@ def links( @validate_uv def add_link( self, - u: GCF | Spectrum | MolecularFamily, - v: GCF | Spectrum | MolecularFamily, + u: Entity, + v: Entity, **data: Score, ) -> None: """Add a link between two objects. @@ -161,9 +164,7 @@ def add_link( self._g.add_edge(u, v, **data) @validate_uv - def has_link( - self, u: GCF | Spectrum | MolecularFamily, v: GCF | Spectrum | MolecularFamily - ) -> bool: + def has_link(self, u: Entity, v: Entity) -> bool: """Check if there is a link between two objects. Args: @@ -178,9 +179,9 @@ def has_link( @validate_uv def get_link_data( self, - u: GCF | Spectrum | MolecularFamily, - v: GCF | Spectrum | MolecularFamily, - ) -> dict[str, Score] | None: + u: Entity, + v: Entity, + ) -> LINK_DATA | None: """Get the data for a link between two objects. Args: diff --git a/src/nplinker/scoring/metcalf_scoring.py b/src/nplinker/scoring/metcalf_scoring.py index bc5f31b9..05a6718b 100644 --- a/src/nplinker/scoring/metcalf_scoring.py +++ b/src/nplinker/scoring/metcalf_scoring.py @@ -2,7 +2,8 @@ import logging from enum import Enum from typing import TYPE_CHECKING -from typing import TypeVar +from typing import Union +from typing import Any from typing import overload import numpy as np import pandas as pd @@ -33,7 +34,7 @@ class LinkType(Enum): MF_GCF = "mf-gcf" -ObjectType = TypeVar("ObjectType", GCF, Spectrum, MolecularFamily) +Entity = Union[GCF, Spectrum, MolecularFamily] class MetcalfScoring(ScoringBase): @@ -135,19 +136,19 @@ def setup(cls, npl: NPLinker): logger.info("MetcalfScoring.setup completed") @overload - def get_links(self, *objects: GCF, **parameters) -> LinkGraph: ... + def get_links(self, *objects: GCF, **parameters: Any) -> LinkGraph: ... @overload - def get_links(self, *objects: Spectrum, **parameters) -> LinkGraph: ... + def get_links(self, *objects: Spectrum, **parameters: Any) -> LinkGraph: ... @overload - def get_links(self, *objects: MolecularFamily, **parameters) -> LinkGraph: ... + def get_links(self, *objects: MolecularFamily, **parameters: Any) -> LinkGraph: ... def get_links(self, *objects, **parameters): """Get links for the given objects. Args: objects: The objects to get links for. All objects must be of the same type, i.e. `GCF`, - `Spectrum` or `MolecularFamily` type. - If no objects are provided, all detected objects (`npl.gcfs`) will be used. + `Spectrum` or `MolecularFamily` type. + If no objects are provided, all detected objects (`npl.gcfs`) will be used. parameters: The scoring parameters to use for the links. The parameters are: - cutoff: The minimum score to consider a link (≥cutoff). Default is 0. @@ -155,7 +156,7 @@ def get_links(self, *objects, **parameters): Returns: The `LinkGraph` object containing the links involving the input objects with the Metcalf - scores. + scores. Raises: TypeError: If the input objects are not of the same type or the object type is invalid. @@ -293,8 +294,8 @@ def _calc_mean_std( def _get_links( self, - *objects: ObjectType, - obj_type: GCF | Spectrum | MolecularFamily, + *objects: Entity, + obj_type: Entity, score_cutoff: float = 0, ) -> list[pd.DataFrame]: """Get links and scores for the given objects. diff --git a/src/nplinker/scoring/rosetta/rosetta.py b/src/nplinker/scoring/rosetta/rosetta.py index 524b7a42..91dfde10 100644 --- a/src/nplinker/scoring/rosetta/rosetta.py +++ b/src/nplinker/scoring/rosetta/rosetta.py @@ -14,6 +14,7 @@ import csv import logging import os +from nplinker.defaults import NPLINKER_APP_DATA_DIR from nplinker.scoring.rosetta.rosetta_hit import RosettaHit from ...genomics import BGC from ...parsers.kcb import KCBJSONParser @@ -21,7 +22,6 @@ from ...pickler import load_pickled_data from ...pickler import save_pickled_data from .spec_lib import SpecLib -from nplinker import NPLINKER_APP_DATA_DIR logger = logging.getLogger(__name__) diff --git a/src/nplinker/scoring/utils.py b/src/nplinker/scoring/utils.py index 35b2fbff..0b0cc9de 100644 --- a/src/nplinker/scoring/utils.py +++ b/src/nplinker/scoring/utils.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import Sequence +from collections.abc import Sequence import pandas as pd from nplinker.genomics import GCF from nplinker.metabolomics import MolecularFamily diff --git a/src/nplinker/strain/strain_collection.py b/src/nplinker/strain/strain_collection.py index 26cd8852..50ca1a3f 100644 --- a/src/nplinker/strain/strain_collection.py +++ b/src/nplinker/strain/strain_collection.py @@ -1,8 +1,8 @@ from __future__ import annotations import json import logging +from collections.abc import Iterator from os import PathLike -from typing import Iterator from jsonschema import validate from nplinker.schemas import STRAIN_MAPPINGS_SCHEMA from .strain import Strain diff --git a/src/nplinker/utils.py b/src/nplinker/utils.py index c28d14fd..a2777355 100644 --- a/src/nplinker/utils.py +++ b/src/nplinker/utils.py @@ -24,10 +24,11 @@ import sys import tarfile import zipfile +from collections.abc import Callable +from collections.abc import Sequence from os import PathLike from pathlib import Path from typing import IO -from typing import Callable, Sequence import httpx from rich.progress import BarColumn from rich.progress import DownloadColumn diff --git a/tests/unit/genomics/test_mibig_metadata.py b/tests/unit/genomics/test_mibig_metadata.py index 5474f865..738db8d3 100644 --- a/tests/unit/genomics/test_mibig_metadata.py +++ b/tests/unit/genomics/test_mibig_metadata.py @@ -8,7 +8,7 @@ class TestMibigMetadata: @pytest.fixture def json_file(self, version): json_file = DATA_DIR / "mibig" / f"BGC0000001_{version}.json" - yield json_file + yield str(json_file) @pytest.fixture def metadata(self, json_file):