Skip to content

Commit

Permalink
add single_anchor filter fix #35
Browse files Browse the repository at this point in the history
  • Loading branch information
felipeZ committed Feb 4, 2021
1 parent 854f147 commit 3a44b2e
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 3 deletions.
5 changes: 5 additions & 0 deletions flamingo/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@

#: Schema to validate the filters to apply for screening
SCHEMA_FILTERS = Schema({
# Filter out molecules with more a given anchor
Optional("single_anchor", default=True): bool,

# Include or exclude one or more functional group using smiles
Optional("include_functional_groups"): Schema([str]),
Optional("exclude_functional_groups"): Schema([str]),
Expand All @@ -39,6 +42,7 @@
Optional("bulkiness"): SCHEMA_BULKINESS,

Optional("scscore"): SCHEMA_ORDERING

})

#: Schema to validate the input for screening
Expand All @@ -52,6 +56,7 @@
# Functional group used as anchor
Optional("anchor", default="O(C=O)[H]"): str,


# path to the molecular coordinates of the Core to attach the ligands
Optional("core"): str,

Expand Down
20 changes: 18 additions & 2 deletions flamingo/screen.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Fragments

from .features.featurizer import generate_fingerprints
from .cat_interface import compute_bulkiness
Expand Down Expand Up @@ -45,7 +46,6 @@ def split_filter_in_batches(opts: Options) -> None:
result_path = Path(opts.output_path)
result_path.mkdir(exist_ok=True, parents=True)


# Compute the number of batches to split
nbatches = len(molecules) // 1000
nbatches = nbatches if nbatches > 0 else 1
Expand Down Expand Up @@ -98,7 +98,9 @@ def apply_filters(molecules: pd.DataFrame, opts: Options, output_file: Path) ->
"include_functional_groups": include_functional_groups,
"exclude_functional_groups": exclude_functional_groups,
"bulkiness": filter_by_bulkiness,
"scscore": filter_by_scscore}
"scscore": filter_by_scscore,
"single_anchor": filter_single_anchor,
}

for key in opts.filters.keys():
if key in available_filters:
Expand Down Expand Up @@ -148,6 +150,20 @@ def has_substructure(patterns: FrozenSet, mol: Chem.Mol) -> bool:
return False if mol is None else any(mol.HasSubstructMatch(p) for p in patterns)


def filter_single_anchor(molecules: pd.DataFrame, opts: Options) -> pd.DataFrame:
"""Exclude molecules that containing more that a single functional group used as anchor."""
logger.debug(f"exclude molecules with more than a single {opts.anchor} anchoring group")
anchor = Chem.MolFromSmiles(opts.anchor)
if anchor.HasSubstructMatch(Chem.MolFromSmarts("[OH]C=O")):
fun_fragment = np.vectorize(Fragments.fr_COO)
else:
msg = f"single_anchor_filter not implemented for anchor: {opts.anchor}"
raise NotImplementedError(msg)

singles = fun_fragment(molecules["rdkit_molecules"])
return molecules[singles == 1]


def filter_by_bulkiness(molecules: pd.DataFrame, opts: Options) -> pd.DataFrame:
"""Filter the ligands that have a given bulkiness.
Expand Down
3 changes: 2 additions & 1 deletion tests/files/smiles_carboxylic.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
0,CCCCCCCCC=CCCCCCCCC(=O)O
1,CC(=O)O
2,O=C(O)Cc1ccccc1
3,CC(C(=O)O)O
3,CC(C(=O)O)O
4,C1=CC(=CC(=C1)C(=O)O)C(=O)O
12 changes: 12 additions & 0 deletions tests/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,15 @@ def test_filter_scscore_greater(tmp_path: Path) -> None:

expected = {"O=C(O)C1CNC2C3CC4C2N4C13"}
check_expected(opts, expected)


def test_single_anchor(tmp_path: Path) -> None:
"""Check that only molecules with a single Carboxylic acids are included."""
smiles_file = "smiles_carboxylic.csv"
filters = {"single_anchor": True}
opts = create_options(filters, smiles_file, tmp_path)
opts.anchor = "O(C=O)[H]"

expected = {"CCCCCCCCC=CCCCCCCCC(=O)O", "CC(=O)O", "O=C(O)Cc1ccccc1", "CC(O)C(=O)O"}

check_expected(opts, expected)

0 comments on commit 3a44b2e

Please sign in to comment.