Skip to content

Commit

Permalink
update the loading of genomics data
Browse files Browse the repository at this point in the history
  • Loading branch information
CunliangGeng committed Dec 11, 2023
1 parent 58ccf13 commit f502279
Showing 1 changed file with 4 additions and 7 deletions.
11 changes: 4 additions & 7 deletions src/nplinker/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from nplinker.genomics.antismash import AntismashBGCLoader
from nplinker.genomics.bigscape import BigscapeGCFLoader
from nplinker.genomics.genomics import get_bgcs_from_gcfs
from nplinker.genomics.genomics import get_strains_from_bgcs
from nplinker.genomics.genomics import map_bgc_to_gcf
from nplinker.genomics.genomics import map_strain_to_bgc
from nplinker.genomics.mibig import MibigLoader
Expand Down Expand Up @@ -428,9 +427,8 @@ def _load_metabolomics(self):
)
return True

# TODO CG: self.strains will be overwritten by this method, rename it?
def _load_genomics(self):
"""Loads all genomics data (BGCs and GCFs) into the object."""
"""Loads genomics data to BGC and GCF objects."""
logger.debug("\nLoading genomics data starts...")

# Step 1: load all BGC objects
Expand All @@ -439,22 +437,21 @@ def _load_genomics(self):
raw_bgcs = antismash_bgcs + self.mibig_bgcs

# Step 2: load all GCF objects
# TODO: create a config for "bigscape_cluster_file" and discard "bigscape_dir" and "bigscape_cutoff"?
bigscape_cluster_file = (
Path(self.bigscape_dir) / "mix" / f"mix_clustering_c0.{self._bigscape_cutoff:02d}.tsv"
)
bigscape_gcf_list = BigscapeGCFLoader(bigscape_cluster_file).get_gcfs()
raw_gcfs = bigscape_gcf_list
raw_gcfs = BigscapeGCFLoader(bigscape_cluster_file).get_gcfs()

# Step 3: assign Strain object to BGC.strain
map_strain_to_bgc(self.strains, raw_bgcs)

# Step 4: assign BGC objects to GCF.bgcs
map_bgc_to_gcf(raw_bgcs, raw_gcfs)

# Step 5: get clean GCF objects, BGC objects and Strain objects
# Step 5: get GCF objects and their BGC members
self.gcfs = raw_gcfs
self.bgcs = get_bgcs_from_gcfs(self.gcfs)
self.strains = get_strains_from_bgcs(self.bgcs)

logger.debug("Loading genomics data completed\n")
return True
Expand Down

0 comments on commit f502279

Please sign in to comment.