Skip to content

Commit

Permalink
rename genomics.py to utils.py
Browse files Browse the repository at this point in the history
The `genomics.py` contains several helper functions for handling genomics data. So better rename this file to `untils.py` which is now located at `src/nplinker/genomics/utils.py`.

Major changes:
- rename files 
- rename utility functions
- add docstrings and typings
  • Loading branch information
CunliangGeng authored Dec 14, 2023
1 parent 9d0b127 commit e431295
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 28 deletions.
14 changes: 7 additions & 7 deletions src/nplinker/genomics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from .abc import BGCLoaderBase
from .bgc import BGC
from .gcf import GCF
from .genomics import generate_mappings_genome_id_bgc_id
from .genomics import get_bgcs_from_gcfs
from .genomics import get_strains_from_bgcs
from .genomics import map_bgc_to_gcf
from .genomics import map_strain_to_bgc
from .utils import add_bgc_to_gcf
from .utils import add_strain_to_bgc
from .utils import generate_mappings_genome_id_bgc_id
from .utils import get_bgcs_from_gcfs
from .utils import get_strains_from_bgcs


logging.getLogger(__name__).addHandler(logging.NullHandler())
Expand All @@ -18,6 +18,6 @@
"generate_mappings_genome_id_bgc_id",
"get_bgcs_from_gcfs",
"get_strains_from_bgcs",
"map_bgc_to_gcf",
"map_strain_to_bgc",
"add_bgc_to_gcf",
"add_strain_to_bgc",
]
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,13 @@ def generate_mappings_genome_id_bgc_id(
logger.info("Generated genome-BGC mappings file: %s", output_file)


def map_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]):
"""To set BGC object's strain with representative strain object.
def add_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]) -> None:
"""Assign a Strain object to `BGC.strain` for input BGCs.
This method changes the list `bgcs` in place.
BGC id is used to find the corresponding Strain object. It's possible that
no Strain object is found for a BGC id.
Note that the input list `bgcs` will be changed in place.
Args:
strains(StrainCollection): A collection of all strain objects.
Expand All @@ -91,7 +94,7 @@ def map_strain_to_bgc(strains: StrainCollection, bgcs: list[BGC]):
bgc.strain = strain_list[0]


def map_bgc_to_gcf(bgcs: list[BGC], gcfs: list[GCF]):
def add_bgc_to_gcf(bgcs: list[BGC], gcfs: list[GCF]) -> None:
"""To add BGC objects to GCF object based on GCF's BGC ids.
This method changes the lists `bgcs` and `gcfs` in place.
Expand Down
10 changes: 5 additions & 5 deletions src/nplinker/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from nplinker.class_info.chem_classes import ChemClassPredictions
from nplinker.class_info.class_matches import ClassMatches
from nplinker.class_info.runcanopus import run_canopus
from nplinker.genomics import add_bgc_to_gcf
from nplinker.genomics import add_strain_to_bgc
from nplinker.genomics import generate_mappings_genome_id_bgc_id
from nplinker.genomics import get_bgcs_from_gcfs
from nplinker.genomics.antismash import AntismashBGCLoader
from nplinker.genomics.bigscape import BigscapeGCFLoader
from nplinker.genomics.genomics import get_bgcs_from_gcfs
from nplinker.genomics.genomics import map_bgc_to_gcf
from nplinker.genomics.genomics import map_strain_to_bgc
from nplinker.genomics.mibig import MibigLoader
from nplinker.globals import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.globals import GENOME_STATUS_FILENAME
Expand Down Expand Up @@ -444,10 +444,10 @@ def _load_genomics(self):
raw_gcfs = BigscapeGCFLoader(bigscape_cluster_file).get_gcfs()

# Step 3: assign Strain object to BGC.strain
map_strain_to_bgc(self.strains, raw_bgcs)
add_strain_to_bgc(self.strains, raw_bgcs)

# Step 4: assign BGC objects to GCF.bgcs
map_bgc_to_gcf(raw_bgcs, raw_gcfs)
add_bgc_to_gcf(raw_bgcs, raw_gcfs)

# Step 5: get GCF objects and their BGC members
self.gcfs = raw_gcfs
Expand Down
36 changes: 24 additions & 12 deletions tests/genomics/test_genomics.py → tests/genomics/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
import pytest
from nplinker.genomics import BGC
from nplinker.genomics import GCF
from nplinker.genomics import add_bgc_to_gcf
from nplinker.genomics import add_strain_to_bgc
from nplinker.genomics import generate_mappings_genome_id_bgc_id
from nplinker.genomics import get_bgcs_from_gcfs
from nplinker.genomics import get_strains_from_bgcs
from nplinker.genomics import map_bgc_to_gcf
from nplinker.genomics import map_strain_to_bgc
from nplinker.globals import GENOME_BGC_MAPPINGS_FILENAME
from nplinker.strain import Strain
from nplinker.strain_collection import StrainCollection
from .. import DATA_DIR


def test_generate_mappings_genome_id_bgc_id(tmp_path):
"""Test generate_mappings_genome_id_bgc_id function."""
bgc_dir = DATA_DIR / "antismash"

# using default output file path
Expand Down Expand Up @@ -50,6 +51,7 @@ def test_generate_mappings_genome_id_bgc_id(tmp_path):


def test_generate_mappings_genome_id_bgc_id_empty_dir(tmp_path, caplog):
"""Test generate_mappings_genome_id_bgc_id function with empty dir."""
# prepare dir and file
bgc_dir = tmp_path / "GCF_1"
bgc_file = bgc_dir / "BGC_1.gbk"
Expand All @@ -70,6 +72,7 @@ def test_generate_mappings_genome_id_bgc_id_empty_dir(tmp_path, caplog):

@pytest.fixture
def strain_collection() -> StrainCollection:
"""Return a StrainCollection object."""
sc = StrainCollection()

strain = Strain("STRAIN_01")
Expand All @@ -89,11 +92,13 @@ def strain_collection() -> StrainCollection:

@pytest.fixture
def bgc_list() -> list[BGC]:
"""Return a list of BGC objects."""
return [BGC("BGC_01", "NPR"), BGC("BGC_02", "Alkaloid"), BGC("SAMPLE_BGC_03", "Polyketide")]


@pytest.fixture
def gcf_list() -> list[GCF]:
"""Return a list of GCF objects."""
gcf1 = GCF("1")
gcf1.bgc_ids |= {"BGC_01"}
gcf2 = GCF("2")
Expand All @@ -103,35 +108,39 @@ def gcf_list() -> list[GCF]:

@pytest.fixture
def gcf_list_error() -> list[GCF]:
"""Return a list of GCF objects for testing errors."""
gcf1 = GCF("1")
gcf1.bgc_ids |= {"SAMPLE_BGC_03", "BGC_04"}
return [gcf1]


def test_map_strain_to_bgc(strain_collection, bgc_list):
def test_add_strain_to_bgc(strain_collection, bgc_list):
"""Test add_strain_to_bgc function."""
for bgc in bgc_list:
assert bgc.strain is None
map_strain_to_bgc(strain_collection, bgc_list)
add_strain_to_bgc(strain_collection, bgc_list)
for bgc in bgc_list:
assert bgc.strain is not None
assert bgc_list[0].strain.id == "STRAIN_01"
assert bgc_list[1].strain.id == "STRAIN_02"
assert bgc_list[2].strain.id == "SAMPLE_BGC_03"


def test_map_strain_to_bgc_error(strain_collection):
def test_add_strain_to_bgc_error(strain_collection):
"""Test add_strain_to_bgc function error."""
bgcs = [BGC("BGC_04", "NPR")]
with pytest.raises(ValueError) as e:
map_strain_to_bgc(strain_collection, bgcs)
add_strain_to_bgc(strain_collection, bgcs)
assert "Strain id 'BGC_04' from BGC object 'BGC_04' not found" in e.value.args[0]


def test_map_bgc_to_gcf(bgc_list, gcf_list):
def test_add_bgc_to_gcf(bgc_list, gcf_list):
"""Test add_bgc_to_gcf function."""
assert gcf_list[0].bgc_ids == {"BGC_01"}
assert gcf_list[1].bgc_ids == {"BGC_02", "SAMPLE_BGC_03"}
assert len(gcf_list[0].bgcs) == 0
assert len(gcf_list[1].bgcs) == 0
map_bgc_to_gcf(bgc_list, gcf_list)
add_bgc_to_gcf(bgc_list, gcf_list)
assert gcf_list[0].bgc_ids == {"BGC_01"}
assert gcf_list[1].bgc_ids == {"BGC_02", "SAMPLE_BGC_03"}
assert len(gcf_list[0].bgcs) == 1
Expand All @@ -140,16 +149,18 @@ def test_map_bgc_to_gcf(bgc_list, gcf_list):
assert gcf_list[1].bgcs == set(bgc_list[1:])


def test_map_bgc_to_gcf_error(bgc_list, gcf_list_error):
def test_add_bgc_to_gcf_error(bgc_list, gcf_list_error):
"""Test add_bgc_to_gcf function error."""
assert gcf_list_error[0].bgc_ids == {"SAMPLE_BGC_03", "BGC_04"}
assert len(gcf_list_error[0].bgcs) == 0
with pytest.raises(KeyError) as e:
map_bgc_to_gcf(bgc_list, gcf_list_error)
add_bgc_to_gcf(bgc_list, gcf_list_error)
assert "BGC id 'BGC_04' from GCF object '1' not found" in e.value.args[0]


def test_get_bgcs_from_gcfs(bgc_list, gcf_list):
map_bgc_to_gcf(bgc_list, gcf_list)
"""Test get_bgcs_from_gcfs function."""
add_bgc_to_gcf(bgc_list, gcf_list)
bgcs = get_bgcs_from_gcfs(gcf_list)
assert isinstance(bgcs, list)
assert len(bgcs) == 3
Expand All @@ -158,7 +169,8 @@ def test_get_bgcs_from_gcfs(bgc_list, gcf_list):


def test_get_strains_from_bgcs(strain_collection, bgc_list):
map_strain_to_bgc(strain_collection, bgc_list)
"""Test get_strains_from_bgcs function."""
add_strain_to_bgc(strain_collection, bgc_list)
strains = get_strains_from_bgcs(bgc_list)
assert isinstance(strains, StrainCollection)
assert strains == strain_collection

0 comments on commit e431295

Please sign in to comment.