From 17f8ac3b9a6c26bc44c5029ecd525a5d7d3fd81f Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Fri, 17 Nov 2023 14:08:04 +0100 Subject: [PATCH] change the returned type of `AntismashBGCLoader.get_bgcs` - change returned value from dict to list for `AntismashBGCLoader.get_bgcs` method and the underlying method `._parse_bgcs` - update the use of the changed methods - update unit tests --- .../genomics/antismash/antismash_loader.py | 19 +++++++------------ src/nplinker/loader.py | 4 ++-- .../antismash/test_antismash_loader.py | 7 ++----- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/nplinker/genomics/antismash/antismash_loader.py b/src/nplinker/genomics/antismash/antismash_loader.py index f8ff239b..d176fd92 100644 --- a/src/nplinker/genomics/antismash/antismash_loader.py +++ b/src/nplinker/genomics/antismash/antismash_loader.py @@ -35,7 +35,7 @@ def __init__(self, data_dir: str) -> None: """ self.data_dir = data_dir self._file_dict = self._parse_data_dir(self.data_dir) - self._bgc_dict = self._parse_bgcs(self._file_dict) + self._bgcs = self._parse_bgcs(self._file_dict) def get_bgc_genome_mapping(self) -> dict[str, str]: """Get the mapping from BGC to genome. @@ -85,17 +85,16 @@ def _parse_data_dir(data_dir: str) -> dict[str, str]: return bgc_files - def get_bgcs(self) -> dict[str, BGC]: + def get_bgcs(self) -> list[BGC]: """Get all BGC objects. Returns: - dict[str, BGC]: key is BGC name and value is - :class:`~nplinker.genomic.BGC` objects + list[BGC]: a list of :class:`~nplinker.genomic.BGC` objects """ - return self._bgc_dict + return self._bgcs @staticmethod - def _parse_bgcs(bgc_files: dict[str, str]) -> dict[str, BGC]: + def _parse_bgcs(bgc_files: dict[str, str]) -> list[BGC]: """Load given BGC files as BGC objects. Args: @@ -103,13 +102,9 @@ def _parse_bgcs(bgc_files: dict[str, str]) -> dict[str, BGC]: BGC gbk file, see method :meth:`.bgc_files`. Returns: - dict[str, BGC]: key is BGC name and value is :class:`~nplinker.genomic.BGC` objects + list[BGC]: a list of :class:`~nplinker.genomic.BGC` objects """ - bgcs = {} - for bgc_id in bgc_files: - bgc = parse_bgc_genbank(bgc_files[bgc_id]) - bgcs[bgc_id] = bgc - return bgcs + return [parse_bgc_genbank(file) for file in bgc_files.values()] def parse_bgc_genbank(file: str) -> BGC: diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index 521ec458..18f3d94a 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -436,8 +436,8 @@ def _load_genomics(self): # Step 1: load all BGC objects logger.debug("Parsing AntiSMASH directory...") - antismash_bgc_dict = AntismashBGCLoader(self.antismash_dir).get_bgcs() - raw_bgcs = list(antismash_bgc_dict.values()) + self.mibig_bgcs) + antismash_bgcs = AntismashBGCLoader(self.antismash_dir).get_bgcs() + raw_bgcs = antismash_bgcs + self.mibig_bgcs # Step 2: load all GCF objects bigscape_cluster_file = ( diff --git a/tests/genomics/antismash/test_antismash_loader.py b/tests/genomics/antismash/test_antismash_loader.py index d1e1694d..be6b7408 100644 --- a/tests/genomics/antismash/test_antismash_loader.py +++ b/tests/genomics/antismash/test_antismash_loader.py @@ -53,12 +53,9 @@ def test_parse_data_dir(self): def test_get_bgcs(self, loader): bgcs = loader.get_bgcs() - assert isinstance(bgcs, dict) + assert isinstance(bgcs, list) assert len(bgcs) == 44 - assert isinstance(bgcs["NZ_AZWB01000005.region001"], BGC) - assert isinstance(bgcs["NZ_AZWS01000001.region001"], BGC) - assert bgcs.get("GCF_000514855.1", "NotExist") == "NotExist" - assert bgcs.get("GCF_000514515.1", "NotExist") == "NotExist" + assert isinstance(bgcs[0], BGC) def test_parse_bgc_genbank():