Skip to content

Commit

Permalink
Enable filtering of singleton molecular families
Browse files Browse the repository at this point in the history
Similar to PR #181, this PR adds a new parameter `keep_singleton` in the method `get_mfs` of MolecularFamily loader. 

Major changes:
- change class property `families` to method `get_mfs`
- add parameter `keep_singleton` to method `get_mfs`
  • Loading branch information
CunliangGeng authored Dec 19, 2023
1 parent 095dcec commit 0d86b08
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 16 deletions.
14 changes: 11 additions & 3 deletions src/nplinker/metabolomics/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,18 @@ def spectra(self) -> Sequence[Spectrum]:


class MolecularFamilyLoaderBase(ABC):
@property
@abstractmethod
def families(self) -> Sequence[MolecularFamily]:
...
def get_mfs(self, keep_singleton: bool) -> Sequence[MolecularFamily]:
"""Get MolecularFamily objects.
Args:
keep_singleton(bool): True to keep singleton molecular families. A
singleton molecular family is a molecular family that contains
only one spectrum.
Returns:
Sequence[MolecularFamily]: a list of MolecularFamily objects.
"""


class FileMappingLoaderBase(ABC):
Expand Down
25 changes: 16 additions & 9 deletions src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,28 @@ def __init__(self, file: str | PathLike):
>>> print(loader.families[0].spectra_ids)
{'1', '3', '7', ...}
"""
self._families: list[MolecularFamily | SingletonFamily] = []
self._mfs: list[MolecularFamily | SingletonFamily] = []
self._file = file

self._validate()
self._load()

@property
def families(self) -> list[MolecularFamily]:
"""Get all molecular families.
def get_mfs(self, keep_singleton: bool = False) -> list[MolecularFamily]:
"""Get MolecularFamily objects.
Args:
keep_singleton(bool): True to keep singleton molecular families. A
singleton molecular family is a molecular family that contains
only one spectrum.
Returns:
list[MolecularFamily]: List of all molecular family objects with
their spectra ids.
list[MolecularFamily]: A list of MolecularFamily objects with their
spectra ids.
"""
return self._families
mfs = self._mfs
if not keep_singleton:
mfs = [mf for mf in mfs if not mf.is_singleton()]
return mfs

def _validate(self):
"""Validate the GNPS molecular family file."""
Expand Down Expand Up @@ -93,8 +100,8 @@ def _load(self) -> None:
for spectrum_id in spectra_ids:
family = SingletonFamily() ## uuid as family id
family.spectra_ids = set([spectrum_id])
self._families.append(family)
self._mfs.append(family)
else:
family = MolecularFamily(family_id)
family.spectra_ids = spectra_ids
self._families.append(family)
self._mfs.append(family)
18 changes: 14 additions & 4 deletions tests/metabolomics/test_gnps_molecular_family_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@


@pytest.mark.parametrize(
"workflow, num_families, num_spectra",
[(GNPSFormat.SNETS, 25769, 19), (GNPSFormat.SNETSV2, 6902, 10), (GNPSFormat.FBMN, 1105, 5)],
"workflow, num_families, num_spectra, keep_singleton",
[
(GNPSFormat.SNETS, 25769, 19, True),
(GNPSFormat.SNETSV2, 6902, 10, True),
(GNPSFormat.FBMN, 1105, 5, True),
(GNPSFormat.SNETS, 29, 19, False),
(GNPSFormat.SNETSV2, 72, 10, False),
(GNPSFormat.FBMN, 60, 5, False),
],
)
def test_has_molecular_families(workflow, num_families, num_spectra, gnps_mf_files):
def test_gnps_molecular_family_loader(
workflow, num_families, num_spectra, keep_singleton, gnps_mf_files
):
"""Test GNPSMolecularFamilyLoader class."""
loader = GNPSMolecularFamilyLoader(gnps_mf_files[workflow])
actual = loader.families
actual = loader.get_mfs(keep_singleton=keep_singleton)
assert len(actual) == num_families
# test molecular family with id "1" has correct number of spectra ids
mf = [mf for mf in actual if mf.family_id == "1"][0]
Expand Down

0 comments on commit 0d86b08

Please sign in to comment.