From 2a567e0de503b12f083b3bf018918cc356557bb3 Mon Sep 17 00:00:00 2001 From: felipez Date: Thu, 11 Feb 2021 15:33:29 +0100 Subject: [PATCH 1/2] replace set by frozenset --- flamingo/screen.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/flamingo/screen.py b/flamingo/screen.py index cbb93bf..bc04d93 100644 --- a/flamingo/screen.py +++ b/flamingo/screen.py @@ -14,7 +14,7 @@ from functools import partial from multiprocessing import Pool from pathlib import Path -from typing import Any, Callable, Iterator, List, Mapping, Optional, Set, Tuple +from typing import Any, Callable, FrozenSet, Iterator, List, Mapping, Optional, Tuple import numpy as np import pandas as pd @@ -155,7 +155,7 @@ def filter_single_group( # Now remove the molecules that have more than 2 different functional groups # or the same functional group more than once - patterns = {Chem.MolFromSmarts(f) for f in groups} + patterns = frozenset(Chem.MolFromSmarts(f) for f in groups) pattern_check = np.vectorize(partial(has_single_substructure, patterns)) return molecules[pattern_check(molecules["rdkit_molecules"])] @@ -164,7 +164,7 @@ def filter_by_functional_group( molecules: pd.DataFrame, groups: List[str], exclude: bool) -> pd.DataFrame: """Search for a set of functional_groups.""" # Transform functional_groups to rkdit molecules - patterns = {Chem.MolFromSmarts(f) for f in groups} + patterns = frozenset(Chem.MolFromSmarts(f) for f in groups) # Function to apply predicate pattern_check = np.vectorize(partial(has_substructure, patterns)) @@ -177,12 +177,12 @@ def filter_by_functional_group( return molecules[has_pattern] -def has_substructure(patterns: Set[Chem.rdchem.Mol], mol: Chem.Mol) -> bool: +def has_substructure(patterns: FrozenSet[Chem.rdchem.Mol], mol: Chem.Mol) -> bool: """Check if there is any element of `pattern` in `mol`.""" return False if mol is None else any(mol.HasSubstructMatch(p) for p in patterns) -def has_single_substructure(patterns: Set, mol: Chem.Mol) -> bool: +def has_single_substructure(patterns: FrozenSet[Chem.rdchem.Mol], mol: Chem.Mol) -> bool: """Check if there a single functional pattern in mol.""" acc = 0 for pat in patterns: From 9d85ba3b331819a7863592474d405969b79928f4 Mon Sep 17 00:00:00 2001 From: felipez Date: Thu, 11 Feb 2021 15:37:46 +0100 Subject: [PATCH 2/2] replace set by frozenset --- tests/test_filter.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/tests/test_filter.py b/tests/test_filter.py index 8334b13..cef440c 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -3,7 +3,7 @@ import argparse import shutil from pathlib import Path -from typing import Any, Iterable, Mapping, Set +from typing import Any, Iterable, Mapping, FrozenSet import numpy as np import pandas as pd @@ -49,7 +49,7 @@ def remove_output(output_path: str) -> None: shutil.rmtree(path) -def check_expected(opts: Options, expected: Set[str]) -> None: +def check_expected(opts: Options, expected: FrozenSet[str]) -> None: """Run a filter workflow using `opts` and check the results.""" try: computed = run_workflow(opts) @@ -94,9 +94,9 @@ def test_contain_functional_groups(tmp_path: Path) -> None: smiles_file = "smiles_functional_groups.csv" filters = {"include_functional_groups": {"groups": ["[CX3](=O)[OX2H1]"],"maximum": 1}} opts = create_options(filters, smiles_file, tmp_path) - expected = {"O=C(O)C1CNC2C3CC4C2N4C13", "C#CC12CC(CO1)NCC2C(=O)O", + expected = frozenset({"O=C(O)C1CNC2C3CC4C2N4C13", "C#CC12CC(CO1)NCC2C(=O)O", "CCCCCCCCC=CCCCCCCCC(=O)O", "CC(=O)O", - "O=C(O)Cc1ccccc1", "CC(O)C(=O)O"} + "O=C(O)Cc1ccccc1", "CC(O)C(=O)O"}) check_expected(opts, expected) @@ -107,8 +107,8 @@ def test_exclude_functional_groups(tmp_path: Path) -> None: filters = {"exclude_functional_groups": {"groups": [ "[#7][#6](=[OX1])", "C#C", "[#6](=[OX1])[OX2][#6]", "[NX3]"], "maximum": 1}} opts = create_options(filters, smiles_file, tmp_path) - expected = {"c1ccccc1", "CCO", "CCCCCCCCC=CCCCCCCCC(=O)O", - "CC(=O)O", "O=C(O)Cc1ccccc1", "CC(O)C(=O)O"} + expected = frozenset({"c1ccccc1", "CCO", "CCCCCCCCC=CCCCCCCCC(=O)O", + "CC(=O)O", "O=C(O)Cc1ccccc1", "CC(O)C(=O)O"}) check_expected(opts, expected) @@ -120,7 +120,7 @@ def test_filter_bulkiness(tmp_path: Path) -> None: opts.core = PATH_TEST / "Cd68Se55.xyz" opts.anchor = "O(C=O)[H]" - expected = {"CC(=O)O", "CC(O)C(=O)O"} + expected = frozenset({"CC(=O)O", "CC(O)C(=O)O"}) check_expected(opts, expected) @@ -131,7 +131,7 @@ def test_filter_bulkiness_no_core(tmp_path: Path) -> None: opts = create_options(filters, smiles_file, tmp_path) opts.anchor = "O(C=O)[H]" - expected = set() # type: Set[str] + expected = frozenset() # type: FrozenSet[str] with pytest.raises(RuntimeError) as err: check_expected(opts, expected) @@ -146,7 +146,7 @@ def test_filter_scscore_lower(tmp_path: Path) -> None: filters = {"scscore": {"lower_than": 1.3}} opts = create_options(filters, smiles_file, tmp_path) - expected = {"CC(=O)O"} + expected = frozenset({"CC(=O)O"}) check_expected(opts, expected) @@ -156,7 +156,7 @@ def test_filter_scscore_greater(tmp_path: Path) -> None: filters = {"scscore": {"greater_than": 3.0}} opts = create_options(filters, smiles_file, tmp_path) - expected = {"O=C(O)C1CNC2C3CC4C2N4C13"} + expected = frozenset({"O=C(O)C1CNC2C3CC4C2N4C13"}) check_expected(opts, expected) @@ -167,7 +167,7 @@ def test_single_carboxylic(tmp_path: Path) -> None: opts = create_options(filters, smiles_file, tmp_path) opts.anchor = "O(C=O)[H]" - expected = {"CCCCCCCCC=CCCCCCCCC(=O)O", "CC(=O)O", "O=C(O)Cc1ccccc1", "CC(O)C(=O)O"} + expected = frozenset({"CCCCCCCCC=CCCCCCCCC(=O)O", "CC(=O)O", "O=C(O)Cc1ccccc1", "CC(O)C(=O)O"}) check_expected(opts, expected) @@ -180,7 +180,7 @@ def test_single_functional_group(tmp_path: Path) -> None: opts = create_options(filters, smiles_file, tmp_path) opts.anchor = "O(C=O)[H]" - expected = {"NCCc1ccncc1", "O=C(O)C1CCC1(F)F"} + expected = frozenset({"NCCc1ccncc1", "O=C(O)C1CCC1(F)F"}) check_expected(opts, expected) @@ -191,7 +191,8 @@ def test_multiple_anchor(tmp_path: Path) -> None: opts = create_options(filters, smiles_file, tmp_path) opts.anchor = "O(C=O)[H]" - expected = {"CCCCCCCCC=CCCCCCCCC(=O)O", "CC(=O)O", "O=C(O)Cc1ccccc1", "CC(O)C(=O)O", "O=C(O)c1cccc(C(=O)O)c1"} + expected = frozenset({ + "CCCCCCCCC=CCCCCCCCC(=O)O", "CC(=O)O", "O=C(O)Cc1ccccc1", "CC(O)C(=O)O", "O=C(O)c1cccc(C(=O)O)c1"}) check_expected(opts, expected)