From 3b4610741edfc9c83f94ed4d1e86ea3b8022ed2a Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Mon, 20 Nov 2023 16:00:53 +0100 Subject: [PATCH 1/5] rename `genomics.py` to `utils.py` --- src/nplinker/genomics/__init__.py | 10 +++++----- src/nplinker/genomics/{genomics.py => utils.py} | 0 src/nplinker/loader.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) rename src/nplinker/genomics/{genomics.py => utils.py} (100%) diff --git a/src/nplinker/genomics/__init__.py b/src/nplinker/genomics/__init__.py index cffff7c5..ffe0a287 100644 --- a/src/nplinker/genomics/__init__.py +++ b/src/nplinker/genomics/__init__.py @@ -2,11 +2,11 @@ from .abc import BGCLoaderBase from .bgc import BGC from .gcf import GCF -from .genomics import generate_mappings_genome_id_bgc_id -from .genomics import get_bgcs_from_gcfs -from .genomics import get_strains_from_bgcs -from .genomics import map_bgc_to_gcf -from .genomics import map_strain_to_bgc +from .utils import generate_mappings_genome_id_bgc_id +from .utils import get_bgcs_from_gcfs +from .utils import get_strains_from_bgcs +from .utils import map_bgc_to_gcf +from .utils import map_strain_to_bgc logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/src/nplinker/genomics/genomics.py b/src/nplinker/genomics/utils.py similarity index 100% rename from src/nplinker/genomics/genomics.py rename to src/nplinker/genomics/utils.py diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index fa66e2cb..9d31066a 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -6,11 +6,11 @@ from nplinker.class_info.class_matches import ClassMatches from nplinker.class_info.runcanopus import run_canopus from nplinker.genomics import generate_mappings_genome_id_bgc_id +from nplinker.genomics import get_bgcs_from_gcfs +from nplinker.genomics import map_bgc_to_gcf +from nplinker.genomics import map_strain_to_bgc from nplinker.genomics.antismash import AntismashBGCLoader from nplinker.genomics.bigscape import BigscapeGCFLoader -from nplinker.genomics.genomics import get_bgcs_from_gcfs -from nplinker.genomics.genomics import map_bgc_to_gcf -from nplinker.genomics.genomics import map_strain_to_bgc from nplinker.genomics.mibig import MibigLoader from nplinker.globals import GENOME_BGC_MAPPINGS_FILENAME from nplinker.globals import GENOME_STATUS_FILENAME From 412c7c6e2dd3f77cbdcad726d1fd64f8c43273d3 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Tue, 5 Dec 2023 16:42:47 +0100 Subject: [PATCH 2/5] rename `test_genomics.py` to `test_utils.py` --- tests/genomics/{test_genomics.py => test_utils.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/genomics/{test_genomics.py => test_utils.py} (100%) diff --git a/tests/genomics/test_genomics.py b/tests/genomics/test_utils.py similarity index 100% rename from tests/genomics/test_genomics.py rename to tests/genomics/test_utils.py From c2fd644593762e681e9cc0f4d8ac7a69097f43c6 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Tue, 5 Dec 2023 16:43:48 +0100 Subject: [PATCH 3/5] rename function `map_strain_to_bgc` to `add_strain_to_bgc` - rename function - update typing and docstring --- src/nplinker/genomics/__init__.py | 4 ++-- src/nplinker/genomics/utils.py | 9 ++++++--- src/nplinker/loader.py | 4 ++-- tests/genomics/test_utils.py | 8 ++++---- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/nplinker/genomics/__init__.py b/src/nplinker/genomics/__init__.py index ffe0a287..f7a7c0d0 100644 --- a/src/nplinker/genomics/__init__.py +++ b/src/nplinker/genomics/__init__.py @@ -2,11 +2,11 @@ from .abc import BGCLoaderBase from .bgc import BGC from .gcf import GCF +from .utils import add_strain_to_bgc from .utils import generate_mappings_genome_id_bgc_id from .utils import get_bgcs_from_gcfs from .utils import get_strains_from_bgcs from .utils import map_bgc_to_gcf -from .utils import map_strain_to_bgc logging.getLogger(__name__).addHandler(logging.NullHandler()) @@ -19,5 +19,5 @@ "get_bgcs_from_gcfs", "get_strains_from_bgcs", "map_bgc_to_gcf", - "map_strain_to_bgc", + "add_strain_to_bgc", ] diff --git a/src/nplinker/genomics/utils.py b/src/nplinker/genomics/utils.py index babadfe7..eef5ea91 100644 --- a/src/nplinker/genomics/utils.py +++ b/src/nplinker/genomics/utils.py @@ -62,10 +62,13 @@ def generate_mappings_genome_id_bgc_id( logger.info("Generated genome-BGC mappings file: %s", output_file) -def map_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]): - """To set BGC object's strain with representative strain object. +def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> None: + """Assign a Strain object to `BGC.strain` for input BGCs. - This method changes the list `bgcs` in place. + BGC id is used to find the corresponding Strain object. It's possible that + no Strain object is found for a BGC id. + + Note that the input list `bgcs` will be changed in place. Args: strains(StrainCollection): A collection of all strain objects. diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index 9d31066a..c61bdb46 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -5,10 +5,10 @@ from nplinker.class_info.chem_classes import ChemClassPredictions from nplinker.class_info.class_matches import ClassMatches from nplinker.class_info.runcanopus import run_canopus +from nplinker.genomics import add_strain_to_bgc from nplinker.genomics import generate_mappings_genome_id_bgc_id from nplinker.genomics import get_bgcs_from_gcfs from nplinker.genomics import map_bgc_to_gcf -from nplinker.genomics import map_strain_to_bgc from nplinker.genomics.antismash import AntismashBGCLoader from nplinker.genomics.bigscape import BigscapeGCFLoader from nplinker.genomics.mibig import MibigLoader @@ -444,7 +444,7 @@ def _load_genomics(self): raw_gcfs = BigscapeGCFLoader(bigscape_cluster_file).get_gcfs() # Step 3: assign Strain object to BGC.strain - map_strain_to_bgc(self.strains, raw_bgcs) + add_strain_to_bgc(self.strains, raw_bgcs) # Step 4: assign BGC objects to GCF.bgcs map_bgc_to_gcf(raw_bgcs, raw_gcfs) diff --git a/tests/genomics/test_utils.py b/tests/genomics/test_utils.py index 394f5a36..febe48ff 100644 --- a/tests/genomics/test_utils.py +++ b/tests/genomics/test_utils.py @@ -3,11 +3,11 @@ import pytest from nplinker.genomics import BGC from nplinker.genomics import GCF +from nplinker.genomics import add_strain_to_bgc from nplinker.genomics import generate_mappings_genome_id_bgc_id from nplinker.genomics import get_bgcs_from_gcfs from nplinker.genomics import get_strains_from_bgcs from nplinker.genomics import map_bgc_to_gcf -from nplinker.genomics import map_strain_to_bgc from nplinker.globals import GENOME_BGC_MAPPINGS_FILENAME from nplinker.strain import Strain from nplinker.strain_collection import StrainCollection @@ -111,7 +111,7 @@ def gcf_list_error() -> list[GCF]: def test_map_strain_to_bgc(strain_collection, bgc_list): for bgc in bgc_list: assert bgc.strain is None - map_strain_to_bgc(strain_collection, bgc_list) + add_strain_to_bgc(strain_collection, bgc_list) for bgc in bgc_list: assert bgc.strain is not None assert bgc_list[0].strain.id == "STRAIN_01" @@ -122,7 +122,7 @@ def test_map_strain_to_bgc(strain_collection, bgc_list): def test_map_strain_to_bgc_error(strain_collection): bgcs = [BGC("BGC_04", "NPR")] with pytest.raises(ValueError) as e: - map_strain_to_bgc(strain_collection, bgcs) + add_strain_to_bgc(strain_collection, bgcs) assert "Strain id 'BGC_04' from BGC object 'BGC_04' not found" in e.value.args[0] @@ -158,7 +158,7 @@ def test_get_bgcs_from_gcfs(bgc_list, gcf_list): def test_get_strains_from_bgcs(strain_collection, bgc_list): - map_strain_to_bgc(strain_collection, bgc_list) + add_strain_to_bgc(strain_collection, bgc_list) strains = get_strains_from_bgcs(bgc_list) assert isinstance(strains, StrainCollection) assert strains == strain_collection From cbb1089e2bb615f61972125afaa7bf94b54b89ce Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Tue, 5 Dec 2023 16:44:55 +0100 Subject: [PATCH 4/5] rename function `map_bgc_to_gcf` to `add_bgc_to_gcf` - rename function - update typing --- src/nplinker/genomics/__init__.py | 4 ++-- src/nplinker/genomics/utils.py | 2 +- src/nplinker/loader.py | 4 ++-- tests/genomics/test_utils.py | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/nplinker/genomics/__init__.py b/src/nplinker/genomics/__init__.py index f7a7c0d0..d90272b0 100644 --- a/src/nplinker/genomics/__init__.py +++ b/src/nplinker/genomics/__init__.py @@ -2,11 +2,11 @@ from .abc import BGCLoaderBase from .bgc import BGC from .gcf import GCF +from .utils import add_bgc_to_gcf from .utils import add_strain_to_bgc from .utils import generate_mappings_genome_id_bgc_id from .utils import get_bgcs_from_gcfs from .utils import get_strains_from_bgcs -from .utils import map_bgc_to_gcf logging.getLogger(__name__).addHandler(logging.NullHandler()) @@ -18,6 +18,6 @@ "generate_mappings_genome_id_bgc_id", "get_bgcs_from_gcfs", "get_strains_from_bgcs", - "map_bgc_to_gcf", + "add_bgc_to_gcf", "add_strain_to_bgc", ] diff --git a/src/nplinker/genomics/utils.py b/src/nplinker/genomics/utils.py index eef5ea91..b285d412 100644 --- a/src/nplinker/genomics/utils.py +++ b/src/nplinker/genomics/utils.py @@ -94,7 +94,7 @@ def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> None: bgc.strain = strain_list[0] -def map_bgc_to_gcf(bgcs: list[BGC], gcfs: list[GCF]): +def add_bgc_to_gcf(bgcs: list[BGC], gcfs: list[GCF]) -> None: """To add BGC objects to GCF object based on GCF's BGC ids. This method changes the lists `bgcs` and `gcfs` in place. diff --git a/src/nplinker/loader.py b/src/nplinker/loader.py index c61bdb46..abaa5e5f 100644 --- a/src/nplinker/loader.py +++ b/src/nplinker/loader.py @@ -5,10 +5,10 @@ from nplinker.class_info.chem_classes import ChemClassPredictions from nplinker.class_info.class_matches import ClassMatches from nplinker.class_info.runcanopus import run_canopus +from nplinker.genomics import add_bgc_to_gcf from nplinker.genomics import add_strain_to_bgc from nplinker.genomics import generate_mappings_genome_id_bgc_id from nplinker.genomics import get_bgcs_from_gcfs -from nplinker.genomics import map_bgc_to_gcf from nplinker.genomics.antismash import AntismashBGCLoader from nplinker.genomics.bigscape import BigscapeGCFLoader from nplinker.genomics.mibig import MibigLoader @@ -447,7 +447,7 @@ def _load_genomics(self): add_strain_to_bgc(self.strains, raw_bgcs) # Step 4: assign BGC objects to GCF.bgcs - map_bgc_to_gcf(raw_bgcs, raw_gcfs) + add_bgc_to_gcf(raw_bgcs, raw_gcfs) # Step 5: get GCF objects and their BGC members self.gcfs = raw_gcfs diff --git a/tests/genomics/test_utils.py b/tests/genomics/test_utils.py index febe48ff..bcd5371b 100644 --- a/tests/genomics/test_utils.py +++ b/tests/genomics/test_utils.py @@ -3,11 +3,11 @@ import pytest from nplinker.genomics import BGC from nplinker.genomics import GCF +from nplinker.genomics import add_bgc_to_gcf from nplinker.genomics import add_strain_to_bgc from nplinker.genomics import generate_mappings_genome_id_bgc_id from nplinker.genomics import get_bgcs_from_gcfs from nplinker.genomics import get_strains_from_bgcs -from nplinker.genomics import map_bgc_to_gcf from nplinker.globals import GENOME_BGC_MAPPINGS_FILENAME from nplinker.strain import Strain from nplinker.strain_collection import StrainCollection @@ -131,7 +131,7 @@ def test_map_bgc_to_gcf(bgc_list, gcf_list): assert gcf_list[1].bgc_ids == {"BGC_02", "SAMPLE_BGC_03"} assert len(gcf_list[0].bgcs) == 0 assert len(gcf_list[1].bgcs) == 0 - map_bgc_to_gcf(bgc_list, gcf_list) + add_bgc_to_gcf(bgc_list, gcf_list) assert gcf_list[0].bgc_ids == {"BGC_01"} assert gcf_list[1].bgc_ids == {"BGC_02", "SAMPLE_BGC_03"} assert len(gcf_list[0].bgcs) == 1 @@ -144,12 +144,12 @@ def test_map_bgc_to_gcf_error(bgc_list, gcf_list_error): assert gcf_list_error[0].bgc_ids == {"SAMPLE_BGC_03", "BGC_04"} assert len(gcf_list_error[0].bgcs) == 0 with pytest.raises(KeyError) as e: - map_bgc_to_gcf(bgc_list, gcf_list_error) + add_bgc_to_gcf(bgc_list, gcf_list_error) assert "BGC id 'BGC_04' from GCF object '1' not found" in e.value.args[0] def test_get_bgcs_from_gcfs(bgc_list, gcf_list): - map_bgc_to_gcf(bgc_list, gcf_list) + add_bgc_to_gcf(bgc_list, gcf_list) bgcs = get_bgcs_from_gcfs(gcf_list) assert isinstance(bgcs, list) assert len(bgcs) == 3 From 13ee5bbb527e605d35874a9316cb1eda9dbde7d1 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Tue, 5 Dec 2023 16:51:30 +0100 Subject: [PATCH 5/5] Update test_utils.py - add docstrings - rename test function names --- tests/genomics/test_utils.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/genomics/test_utils.py b/tests/genomics/test_utils.py index bcd5371b..15f07a52 100644 --- a/tests/genomics/test_utils.py +++ b/tests/genomics/test_utils.py @@ -15,6 +15,7 @@ def test_generate_mappings_genome_id_bgc_id(tmp_path): + """Test generate_mappings_genome_id_bgc_id function.""" bgc_dir = DATA_DIR / "antismash" # using default output file path @@ -50,6 +51,7 @@ def test_generate_mappings_genome_id_bgc_id(tmp_path): def test_generate_mappings_genome_id_bgc_id_empty_dir(tmp_path, caplog): + """Test generate_mappings_genome_id_bgc_id function with empty dir.""" # prepare dir and file bgc_dir = tmp_path / "GCF_1" bgc_file = bgc_dir / "BGC_1.gbk" @@ -70,6 +72,7 @@ def test_generate_mappings_genome_id_bgc_id_empty_dir(tmp_path, caplog): @pytest.fixture def strain_collection() -> StrainCollection: + """Return a StrainCollection object.""" sc = StrainCollection() strain = Strain("STRAIN_01") @@ -89,11 +92,13 @@ def strain_collection() -> StrainCollection: @pytest.fixture def bgc_list() -> list[BGC]: + """Return a list of BGC objects.""" return [BGC("BGC_01", "NPR"), BGC("BGC_02", "Alkaloid"), BGC("SAMPLE_BGC_03", "Polyketide")] @pytest.fixture def gcf_list() -> list[GCF]: + """Return a list of GCF objects.""" gcf1 = GCF("1") gcf1.bgc_ids |= {"BGC_01"} gcf2 = GCF("2") @@ -103,12 +108,14 @@ def gcf_list() -> list[GCF]: @pytest.fixture def gcf_list_error() -> list[GCF]: + """Return a list of GCF objects for testing errors.""" gcf1 = GCF("1") gcf1.bgc_ids |= {"SAMPLE_BGC_03", "BGC_04"} return [gcf1] -def test_map_strain_to_bgc(strain_collection, bgc_list): +def test_add_strain_to_bgc(strain_collection, bgc_list): + """Test add_strain_to_bgc function.""" for bgc in bgc_list: assert bgc.strain is None add_strain_to_bgc(strain_collection, bgc_list) @@ -119,14 +126,16 @@ def test_map_strain_to_bgc(strain_collection, bgc_list): assert bgc_list[2].strain.id == "SAMPLE_BGC_03" -def test_map_strain_to_bgc_error(strain_collection): +def test_add_strain_to_bgc_error(strain_collection): + """Test add_strain_to_bgc function error.""" bgcs = [BGC("BGC_04", "NPR")] with pytest.raises(ValueError) as e: add_strain_to_bgc(strain_collection, bgcs) assert "Strain id 'BGC_04' from BGC object 'BGC_04' not found" in e.value.args[0] -def test_map_bgc_to_gcf(bgc_list, gcf_list): +def test_add_bgc_to_gcf(bgc_list, gcf_list): + """Test add_bgc_to_gcf function.""" assert gcf_list[0].bgc_ids == {"BGC_01"} assert gcf_list[1].bgc_ids == {"BGC_02", "SAMPLE_BGC_03"} assert len(gcf_list[0].bgcs) == 0 @@ -140,7 +149,8 @@ def test_map_bgc_to_gcf(bgc_list, gcf_list): assert gcf_list[1].bgcs == set(bgc_list[1:]) -def test_map_bgc_to_gcf_error(bgc_list, gcf_list_error): +def test_add_bgc_to_gcf_error(bgc_list, gcf_list_error): + """Test add_bgc_to_gcf function error.""" assert gcf_list_error[0].bgc_ids == {"SAMPLE_BGC_03", "BGC_04"} assert len(gcf_list_error[0].bgcs) == 0 with pytest.raises(KeyError) as e: @@ -149,6 +159,7 @@ def test_map_bgc_to_gcf_error(bgc_list, gcf_list_error): def test_get_bgcs_from_gcfs(bgc_list, gcf_list): + """Test get_bgcs_from_gcfs function.""" add_bgc_to_gcf(bgc_list, gcf_list) bgcs = get_bgcs_from_gcfs(gcf_list) assert isinstance(bgcs, list) @@ -158,6 +169,7 @@ def test_get_bgcs_from_gcfs(bgc_list, gcf_list): def test_get_strains_from_bgcs(strain_collection, bgc_list): + """Test get_strains_from_bgcs function.""" add_strain_to_bgc(strain_collection, bgc_list) strains = get_strains_from_bgcs(bgc_list) assert isinstance(strains, StrainCollection)