Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WiP: CAT 0.5.3 #47

Merged
merged 22 commits into from
Jul 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ install:
- conda install -n CAT -c conda-forge h5py rdkit
- source activate CAT

# Install qmflows package
# Install tests, data-CAT and nano-CAT
- pip install .[test]
- pip install nano-cat@git+https://github.com/nlesc-nano/nano-CAT@devel
- pip install data-cat@git+https://github.com/nlesc-nano/data-CAT@devel

script:
# Run the unitary tests excluding the expensive computations
Expand Down
2 changes: 1 addition & 1 deletion CAT/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.5.2'
__version__ = '0.5.3'
23 changes: 23 additions & 0 deletions CAT/assertion_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
.. currentmodule:: CAT.assertion_functions
.. autosummary::
Invert
assert_isfile
assert_isdir
assert_len
assert_eq
assert_id
Expand All @@ -26,6 +28,8 @@
API
---
.. autoclass:: Invert
.. autofunction:: assert_isfile
.. autofunction:: assert_isdir
.. autofunction:: assert_len
.. autofunction:: assert_eq
.. autofunction:: assert_id
Expand All @@ -43,6 +47,7 @@
"""

from functools import wraps
from os.path import (isfile, isdir)
from typing import (Any, Callable, Tuple, Sequence, Container, Sized)


Expand Down Expand Up @@ -114,6 +119,24 @@ def wrapper(*args, **kwargs):
return wrapper


def assert_isfile(value: str) -> Tuple[str, str, None]:
"""Assert :code:`os.path.isfile(value)`; returns arguments for :func:`._err_msg`."""
assertion = 'assert os.path.isfile(value)'
assert isfile(value), _err_msg(assertion, value, None)

_assertion = 'assert not os.path.isfile(value)'
return _assertion, value, None


def assert_isdir(value: str) -> Tuple[str, str, None]:
"""Assert :code:`os.path.isdir(value)`; returns arguments for :func:`._err_msg`."""
assertion = 'assert os.path.isdir(value)'
assert isdir(value), _err_msg(assertion, value, None)

_assertion = 'assert not os.path.isdir(value)'
return _assertion, value, None


def assert_len(value: Sized,
ref: int) -> Tuple[str, Any, Any]:
"""Assert :code:`len(value) == ref`; returns arguments for :func:`._err_msg`."""
Expand Down
29 changes: 15 additions & 14 deletions CAT/attachment/ligand_anchoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@

from rdkit import Chem

from ..utils import (get_time, get_template)
from ..logger import logger
from ..utils import get_template
from ..mol_utils import separate_mod
from ..settings_dataframe import SettingsDataFrame
from ..data_handling.validate_mol import santize_smiles
Expand Down Expand Up @@ -76,18 +77,19 @@ def init_ligand_anchoring(ligand_df: SettingsDataFrame) -> SettingsDataFrame:
mol_list = []
for lig in ligand_df[MOL]:
# Functional group search
if not lig.properties.dummies:
dummies = lig.properties.dummies
if not dummies:
mol_list += find_substructure(lig, functional_groups, split)
continue

# Manual specification of a functional group
if len(lig.properties.dummies) == 1: # optional.ligand.split = False
lig.properties.dummies = lig.properties.dummies[0] - 1
split_ = False
elif len(lig.properties.dummies) == 2: # optional.ligand.split = True
lig.properties.dummies = tuple(i - 1 for i in lig.properties.dummies)
split_ = True
mol_list += [substructure_split(lig, lig.properties.dummies, split=split_)]
if len(dummies) == 1: # optional.ligand.split = False
lig.properties.dummies = dummies[0] - 1
_split = False
elif len(dummies) == 2: # optional.ligand.split = True
lig.properties.dummies = tuple(i - 1 for i in dummies)
_split = True
mol_list += [substructure_split(lig, lig.properties.dummies, split=_split)]

# Convert the results into a dataframe
return _get_df(mol_list, ligand_df.settings)
Expand Down Expand Up @@ -167,9 +169,7 @@ def _smiles_to_rdmol(smiles: str) -> Chem.Mol:
mol = Chem.MolFromSmiles(smiles, sanitize=False)
Chem.rdmolops.SanitizeMol(mol, sanitizeOps=sanitize)
except Exception as ex:
err = f'Failed to parse the following SMILES string: {repr(smiles)}\n\n{ex}'
ex_class = ex.__class__
raise ex_class(err)
raise ex.__class__(f'Failed to parse the following SMILES string: {repr(smiles)}\n\n{ex}')
return mol


Expand Down Expand Up @@ -218,8 +218,9 @@ def find_substructure(ligand: Molecule,
if ligand_indices:
return [substructure_split(ligand, tup, split) for tup in ligand_indices]
else:
msg = 'No functional groups were found (optional.ligand.split = {}) for ligand: {}'
print(get_time() + msg.format(split, ligand.properties.smiles))
err = (f"No functional groups were found (optional.ligand.split = {split}) for "
f"ligand: '{ligand.properties.name}'")
logger.error(err)
return []


Expand Down
17 changes: 11 additions & 6 deletions CAT/attachment/ligand_attach.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@
from scm.plams.core.settings import Settings

from ..settings_dataframe import SettingsDataFrame
from ..utils import get_time
from ..logger import logger
from ..mol_utils import (merge_mol, get_index)
from ..data_handling.mol_to_file import mol_to_file

try:
from dataCAT import (Database, mol_to_file)
from dataCAT import Database
DATA_CAT = True
except ImportError:
DATA_CAT = False
Expand Down Expand Up @@ -87,12 +88,12 @@ def init_qd_construction(ligand_df: SettingsDataFrame,
"""
# Extract arguments
settings = ligand_df.settings.optional
overwrite = DATA_CAT and 'qd' in settings.database.overwrite
write = DATA_CAT and 'qd' in settings.database.write
read = DATA_CAT and 'qd' in settings.database.read
qd_path = settings.qd.dirname
db_path = settings.database.dirname
mol_format = settings.database.mol_format
optimize = settings.qd.optimize

# Attempt to pull structures from the database
qd_df = _get_df(core_df.index, ligand_df.index, ligand_df.settings)
Expand All @@ -113,7 +114,11 @@ def init_qd_construction(ligand_df: SettingsDataFrame,
if write:
data = Database(db_path, **settings.database.mongodb)
data.update_csv(qd_df, columns=[HDF5_INDEX], database='QD_no_opt')
mol_to_file(qd_df[MOL], qd_path, overwrite, mol_format)

# Export xyz/pdb files
if 'qd' in settings.database.write and mol_format and not optimize:
mol_to_file(qd_df[MOL], qd_path, mol_format=mol_format)

return qd_df


Expand Down Expand Up @@ -175,14 +180,15 @@ def get_name():
mol_series = mol_series_opt.append(mol_series_no_opt[~slice_])

# Update Molecule.properties
logger.info('Pulling quantum dots from database')
for i, mol in mol_series.iteritems():
mol.properties = Settings({
'indices': _get_indices(mol, i),
'path': path,
'job_path': [],
'name': get_name()
})
print(get_time() + '{}\t has been pulled from the database'.format(mol.properties.name))
logger.info(f'{mol.properties.name} has been pulled from the database')
return mol_series


Expand Down Expand Up @@ -322,7 +328,6 @@ def get_name():
})

# Print and return
print(get_time() + qd.properties.name + '\t has been constructed')
return qd


Expand Down
92 changes: 52 additions & 40 deletions CAT/attachment/ligand_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@
from rdkit.Chem import AllChem

from .ligand_attach import (rot_mol_angle, sanitize_dim_2)
from ..utils import get_time
from ..logger import logger
from ..settings_dataframe import SettingsDataFrame
from ..mol_utils import (to_symbol, fix_carboxyl, get_index,
from_mol_other, from_rdmol, separate_mod)
from ..data_handling.mol_to_file import mol_to_file

try:
from dataCAT import (Database, mol_to_file)
from dataCAT import Database
DATA_CAT = True
except ImportError:
DATA_CAT = False
Expand All @@ -89,25 +90,30 @@ def init_ligand_opt(ligand_df: SettingsDataFrame) -> None:

"""
settings = ligand_df.settings.optional
database = Database(settings.database.dirname, **settings.database.mongodb)
overwrite = DATA_CAT and 'ligand' in settings.database.overwrite
read = DATA_CAT and 'ligand' in settings.database.read
write = DATA_CAT and 'ligand' in settings.database.write
optimize = settings.ligand.optimize
lig_path = settings.ligand.dirname
mol_format = settings.database.mol_format
if DATA_CAT:
database = Database(settings.database.dirname, **settings.database.mongodb)

# Searches for matches between the input ligand and the database; imports the structure
read_data(ligand_df, database, read)
if read:
read_data(ligand_df, database, read)
ligand_df[OPT] = ligand_df[OPT].astype(bool, copy=False)

if write:
_ligand_to_db(ligand_df, database, opt=False)

# Optimize all new ligands
if optimize:
# Identify the to be optimized ligands
idx, message = _parse_overwrite(ligand_df, overwrite)
idx = _parse_overwrite(ligand_df, overwrite)

# Optimize the ligands
lig_new = start_ligand_jobs(ligand_df, idx, message)
lig_new = start_ligand_jobs(ligand_df, idx)

# Update the ligand dataframe
if lig_new:
Expand All @@ -117,54 +123,65 @@ def init_ligand_opt(ligand_df: SettingsDataFrame) -> None:
else:
ligand_df.loc[idx, MOL] = lig_new

print()
remove_duplicates(ligand_df)

# Write newly optimized structures to the database
if write and optimize:
_ligand_to_db(ligand_df, database)

# Export ligands to .xyz, .pdb, .mol and/or .mol format
if 'ligand' in settings.database.write and optimize and mol_format:
mol_to_file(ligand_df[MOL], lig_path, mol_format=mol_format)


def _parse_overwrite(ligand_df: SettingsDataFrame,
overwrite: bool) -> Tuple[pd.Series, str]:
"""Return a series for dataframe slicing and a to-be printer message."""
if overwrite:
idx = pd.Series(True, index=ligand_df.index, name=MOL)
message = '{}\t has been (re-)optimized'
return pd.Series(True, index=ligand_df.index, name=MOL)
else:
idx = np.invert(ligand_df[OPT])
message = '{}\t has been optimized'
return idx, message
return np.invert(ligand_df[OPT])


def read_data(ligand_df: SettingsDataFrame,
database: 'Database',
read: bool) -> None:
"""Read ligands from the database if **read** = ``True``."""
if read:
database.from_csv(ligand_df, database='ligand')
for i, mol in zip(ligand_df[OPT], ligand_df[MOL]):
if i == -1:
continue
print(get_time() + '{}\t has been pulled from the database'.format(mol.properties.name))
logger.info('Pulling ligands from database')
database.from_csv(ligand_df, database='ligand')
for i, mol in zip(ligand_df[OPT], ligand_df[MOL]):
if i == -1:
continue
logger.info(f'{mol.properties.name} has been pulled from the database')
ligand_df[OPT] = ligand_df[OPT].astype(bool, copy=False)


def start_ligand_jobs(ligand_df: SettingsDataFrame,
idx: pd.Series,
message: str) -> List[Molecule]:
idx: pd.Series) -> List[Molecule]:
"""Loop over all molecules in ``ligand_df.loc[idx]`` and perform geometry optimizations."""
if not idx.any():
logger.info(f'No new to-be optimized ligands found\n')
return []
else:
logger.info(f'Starting ligand optimization')

lig_new = []
for ligand in ligand_df[MOL][idx]:
mol_list = split_mol(ligand)
for mol in mol_list:
mol.set_dihed(180.0)
ligand_tmp = recombine_mol(mol_list)
fix_carboxyl(ligand_tmp)
lig_new.append(ligand_tmp)

# Print messages
print(get_time() + message.format(ligand.properties.name))
logger.info(f'UFFGetMoleculeForceField: {ligand.properties.name} optimization has started')
try:
mol_list = split_mol(ligand)
for mol in mol_list:
mol.set_dihed(180.0)
ligand_tmp = recombine_mol(mol_list)
fix_carboxyl(ligand_tmp)
lig_new.append(ligand_tmp)
logger.info(f'UFFGetMoleculeForceField: {ligand.properties.name} optimization '
'is successful')
except Exception:
logger.error(f'UFFGetMoleculeForceField: {ligand.properties.name} optimization '
'has failed')

logger.info('Finishing ligand optimization\n')
return lig_new


Expand All @@ -175,8 +192,6 @@ def _ligand_to_db(ligand_df: SettingsDataFrame,
# Extract arguments
settings = ligand_df.settings.optional
overwrite = DATA_CAT and 'ligand' in settings.database.overwrite
lig_path = settings.ligand.dirname
mol_format = settings.database.mol_format

kwargs: Dict[str, Any] = {'overwrite': overwrite}
if opt:
Expand All @@ -186,7 +201,6 @@ def _ligand_to_db(ligand_df: SettingsDataFrame,
kwargs['columns'] = [FORMULA, HDF5_INDEX, SETTINGS1]
kwargs['database'] = 'ligand'
kwargs['opt'] = True
mol_to_file(ligand_df[MOL], lig_path, overwrite, mol_format)
else:
kwargs['columns'] = [FORMULA, HDF5_INDEX]
kwargs['database'] = 'ligand_no_opt'
Expand Down Expand Up @@ -358,13 +372,11 @@ def get_frag_size(self, bond: Bond,

"""
if bond not in self.bonds:
error = 'get_frag_size: The argument bond should be of type plams.Bond and be part'
error += ' of the Molecule'
raise MoleculeError(error)
raise MoleculeError('get_frag_size: The argument bond should be of type plams.Bond and '
'be part of the Molecule')
elif atom not in self.atoms:
error = 'get_frag_size: The argument atom should be of type plams.Atom and be part'
error += ' of the Molecule'
raise MoleculeError(error)
raise MoleculeError('get_frag_size: The argument atom should be of type plams.Atom and '
'be part of the Molecule')

for at in self:
at._visited = False
Expand Down Expand Up @@ -415,8 +427,8 @@ def recombine_mol(mol_list: Sequence[Molecule]) -> Molecule:
return mol_list[0]
tup_list = mol_list[0].properties.mark
if not tup_list:
error = 'No PLAMS atoms specified in mol_list[0].properties.mark, aborting recombine_mol()'
raise IndexError(error)
raise IndexError('No PLAMS atoms specified in mol_list[0].properties.mark, '
'aborting recombine_mol()')

for tup in tup_list:
# Allign mol1 & mol2
Expand Down
Loading