nlesc-nano · BvB93 · Jul 22, 2019 · Jul 17, 2019 · Jul 18, 2019 · Jul 18, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -23,8 +23,10 @@ install:
     - conda install -n CAT -c conda-forge h5py rdkit
     - source activate CAT
 
-    # Install qmflows package
+    # Install tests, data-CAT and nano-CAT
     - pip install .[test]
+    - pip install nano-cat@git+https://github.com/nlesc-nano/nano-CAT@devel
+    - pip install data-cat@git+https://github.com/nlesc-nano/data-CAT@devel
 
 script:
     # Run the unitary tests excluding the expensive computations

diff --git a/CAT/__version__.py b/CAT/__version__.py
@@ -1 +1 @@
-__version__ = '0.5.2'
+__version__ = '0.5.3'
diff --git a/CAT/assertion_functions.py b/CAT/assertion_functions.py
@@ -9,6 +9,8 @@
 .. currentmodule:: CAT.assertion_functions
 .. autosummary::
     Invert
+    assert_isfile
+    assert_isdir
     assert_len
     assert_eq
     assert_id
@@ -26,6 +28,8 @@
 API
 ---
 .. autoclass:: Invert
+.. autofunction:: assert_isfile
+.. autofunction:: assert_isdir
 .. autofunction:: assert_len
 .. autofunction:: assert_eq
 .. autofunction:: assert_id
@@ -43,6 +47,7 @@
 """
 
 from functools import wraps
+from os.path import (isfile, isdir)
 from typing import (Any, Callable, Tuple, Sequence, Container, Sized)
 
 
@@ -114,6 +119,24 @@ def wrapper(*args, **kwargs):
         return wrapper
 
 
+def assert_isfile(value: str) -> Tuple[str, str, None]:
+    """Assert :code:`os.path.isfile(value)`; returns arguments for :func:`._err_msg`."""
+    assertion = 'assert os.path.isfile(value)'
+    assert isfile(value), _err_msg(assertion, value, None)
+
+    _assertion = 'assert not os.path.isfile(value)'
+    return _assertion, value, None
+
+
+def assert_isdir(value: str) -> Tuple[str, str, None]:
+    """Assert :code:`os.path.isdir(value)`; returns arguments for :func:`._err_msg`."""
+    assertion = 'assert os.path.isdir(value)'
+    assert isdir(value), _err_msg(assertion, value, None)
+
+    _assertion = 'assert not os.path.isdir(value)'
+    return _assertion, value, None
+
+
 def assert_len(value: Sized,
                ref: int) -> Tuple[str, Any, Any]:
     """Assert :code:`len(value) == ref`; returns arguments for :func:`._err_msg`."""

diff --git a/CAT/attachment/ligand_anchoring.py b/CAT/attachment/ligand_anchoring.py
@@ -36,7 +36,8 @@
 
 from rdkit import Chem
 
-from ..utils import (get_time, get_template)
+from ..logger import logger
+from ..utils import get_template
 from ..mol_utils import separate_mod
 from ..settings_dataframe import SettingsDataFrame
 from ..data_handling.validate_mol import santize_smiles
@@ -76,18 +77,19 @@ def init_ligand_anchoring(ligand_df: SettingsDataFrame) -> SettingsDataFrame:
     mol_list = []
     for lig in ligand_df[MOL]:
         # Functional group search
-        if not lig.properties.dummies:
+        dummies = lig.properties.dummies
+        if not dummies:
             mol_list += find_substructure(lig, functional_groups, split)
             continue
 
         # Manual specification of a functional group
-        if len(lig.properties.dummies) == 1:  # optional.ligand.split = False
-            lig.properties.dummies = lig.properties.dummies[0] - 1
-            split_ = False
-        elif len(lig.properties.dummies) == 2:  # optional.ligand.split = True
-            lig.properties.dummies = tuple(i - 1 for i in lig.properties.dummies)
-            split_ = True
-        mol_list += [substructure_split(lig, lig.properties.dummies, split=split_)]
+        if len(dummies) == 1:  # optional.ligand.split = False
+            lig.properties.dummies = dummies[0] - 1
+            _split = False
+        elif len(dummies) == 2:  # optional.ligand.split = True
+            lig.properties.dummies = tuple(i - 1 for i in dummies)
+            _split = True
+        mol_list += [substructure_split(lig, lig.properties.dummies, split=_split)]
 
     # Convert the results into a dataframe
     return _get_df(mol_list, ligand_df.settings)
@@ -167,9 +169,7 @@ def _smiles_to_rdmol(smiles: str) -> Chem.Mol:
         mol = Chem.MolFromSmiles(smiles, sanitize=False)
         Chem.rdmolops.SanitizeMol(mol, sanitizeOps=sanitize)
     except Exception as ex:
-        err = f'Failed to parse the following SMILES string: {repr(smiles)}\n\n{ex}'
-        ex_class = ex.__class__
-        raise ex_class(err)
+        raise ex.__class__(f'Failed to parse the following SMILES string: {repr(smiles)}\n\n{ex}')
     return mol
 
 
@@ -218,8 +218,9 @@ def find_substructure(ligand: Molecule,
     if ligand_indices:
         return [substructure_split(ligand, tup, split) for tup in ligand_indices]
     else:
-        msg = 'No functional groups were found (optional.ligand.split = {}) for ligand: {}'
-        print(get_time() + msg.format(split, ligand.properties.smiles))
+        err = (f"No functional groups were found (optional.ligand.split = {split}) for "
+               f"ligand: '{ligand.properties.name}'")
+        logger.error(err)
         return []
 
 

diff --git a/CAT/attachment/ligand_attach.py b/CAT/attachment/ligand_attach.py
@@ -50,11 +50,12 @@
 from scm.plams.core.settings import Settings
 
 from ..settings_dataframe import SettingsDataFrame
-from ..utils import get_time
+from ..logger import logger
 from ..mol_utils import (merge_mol, get_index)
+from ..data_handling.mol_to_file import mol_to_file
 
 try:
-    from dataCAT import (Database, mol_to_file)
+    from dataCAT import Database
     DATA_CAT = True
 except ImportError:
     DATA_CAT = False
@@ -87,12 +88,12 @@ def init_qd_construction(ligand_df: SettingsDataFrame,
     """
     # Extract arguments
     settings = ligand_df.settings.optional
-    overwrite = DATA_CAT and 'qd' in settings.database.overwrite
     write = DATA_CAT and 'qd' in settings.database.write
     read = DATA_CAT and 'qd' in settings.database.read
     qd_path = settings.qd.dirname
     db_path = settings.database.dirname
     mol_format = settings.database.mol_format
+    optimize = settings.qd.optimize
 
     # Attempt to pull structures from the database
     qd_df = _get_df(core_df.index, ligand_df.index, ligand_df.settings)
@@ -113,7 +114,11 @@ def init_qd_construction(ligand_df: SettingsDataFrame,
     if write:
         data = Database(db_path, **settings.database.mongodb)
         data.update_csv(qd_df, columns=[HDF5_INDEX], database='QD_no_opt')
-        mol_to_file(qd_df[MOL], qd_path, overwrite, mol_format)
+
+    # Export xyz/pdb files
+    if 'qd' in settings.database.write and mol_format and not optimize:
+        mol_to_file(qd_df[MOL], qd_path, mol_format=mol_format)
+
     return qd_df
 
 
@@ -175,14 +180,15 @@ def get_name():
     mol_series = mol_series_opt.append(mol_series_no_opt[~slice_])
 
     # Update Molecule.properties
+    logger.info('Pulling quantum dots from database')
     for i, mol in mol_series.iteritems():
         mol.properties = Settings({
             'indices': _get_indices(mol, i),
             'path': path,
             'job_path': [],
             'name': get_name()
         })
-        print(get_time() + '{}\t has been pulled from the database'.format(mol.properties.name))
+        logger.info(f'{mol.properties.name} has been pulled from the database')
     return mol_series
 
 
@@ -322,7 +328,6 @@ def get_name():
     })
 
     # Print and return
-    print(get_time() + qd.properties.name + '\t has been constructed')
     return qd
 
 

diff --git a/CAT/attachment/ligand_opt.py b/CAT/attachment/ligand_opt.py
@@ -56,13 +56,14 @@
 from rdkit.Chem import AllChem
 
 from .ligand_attach import (rot_mol_angle, sanitize_dim_2)
-from ..utils import get_time
+from ..logger import logger
 from ..settings_dataframe import SettingsDataFrame
 from ..mol_utils import (to_symbol, fix_carboxyl, get_index,
                          from_mol_other, from_rdmol, separate_mod)
+from ..data_handling.mol_to_file import mol_to_file
 
 try:
-    from dataCAT import (Database, mol_to_file)
+    from dataCAT import Database
     DATA_CAT = True
 except ImportError:
     DATA_CAT = False
@@ -89,25 +90,30 @@ def init_ligand_opt(ligand_df: SettingsDataFrame) -> None:
 
     """
     settings = ligand_df.settings.optional
-    database = Database(settings.database.dirname, **settings.database.mongodb)
     overwrite = DATA_CAT and 'ligand' in settings.database.overwrite
     read = DATA_CAT and 'ligand' in settings.database.read
     write = DATA_CAT and 'ligand' in settings.database.write
     optimize = settings.ligand.optimize
+    lig_path = settings.ligand.dirname
+    mol_format = settings.database.mol_format
+    if DATA_CAT:
+        database = Database(settings.database.dirname, **settings.database.mongodb)
 
     # Searches for matches between the input ligand and the database; imports the structure
-    read_data(ligand_df, database, read)
+    if read:
+        read_data(ligand_df, database, read)
+    ligand_df[OPT] = ligand_df[OPT].astype(bool, copy=False)
 
     if write:
         _ligand_to_db(ligand_df, database, opt=False)
 
     # Optimize all new ligands
     if optimize:
         # Identify the to be optimized ligands
-        idx, message = _parse_overwrite(ligand_df, overwrite)
+        idx = _parse_overwrite(ligand_df, overwrite)
 
         # Optimize the ligands
-        lig_new = start_ligand_jobs(ligand_df, idx, message)
+        lig_new = start_ligand_jobs(ligand_df, idx)
 
         # Update the ligand dataframe
         if lig_new:
@@ -117,54 +123,65 @@ def init_ligand_opt(ligand_df: SettingsDataFrame) -> None:
             else:
                 ligand_df.loc[idx, MOL] = lig_new
 
-    print()
     remove_duplicates(ligand_df)
 
     # Write newly optimized structures to the database
     if write and optimize:
         _ligand_to_db(ligand_df, database)
 
+    # Export ligands to .xyz, .pdb, .mol and/or .mol format
+    if 'ligand' in settings.database.write and optimize and mol_format:
+        mol_to_file(ligand_df[MOL], lig_path, mol_format=mol_format)
+
 
 def _parse_overwrite(ligand_df: SettingsDataFrame,
                      overwrite: bool) -> Tuple[pd.Series, str]:
     """Return a series for dataframe slicing and a to-be printer message."""
     if overwrite:
-        idx = pd.Series(True, index=ligand_df.index, name=MOL)
-        message = '{}\t has been (re-)optimized'
+        return pd.Series(True, index=ligand_df.index, name=MOL)
     else:
-        idx = np.invert(ligand_df[OPT])
-        message = '{}\t has been optimized'
-    return idx, message
+        return np.invert(ligand_df[OPT])
 
 
 def read_data(ligand_df: SettingsDataFrame,
               database: 'Database',
               read: bool) -> None:
     """Read ligands from the database if **read** = ``True``."""
-    if read:
-        database.from_csv(ligand_df, database='ligand')
-        for i, mol in zip(ligand_df[OPT], ligand_df[MOL]):
-            if i == -1:
-                continue
-            print(get_time() + '{}\t has been pulled from the database'.format(mol.properties.name))
+    logger.info('Pulling ligands from database')
+    database.from_csv(ligand_df, database='ligand')
+    for i, mol in zip(ligand_df[OPT], ligand_df[MOL]):
+        if i == -1:
+            continue
+        logger.info(f'{mol.properties.name} has been pulled from the database')
     ligand_df[OPT] = ligand_df[OPT].astype(bool, copy=False)
 
 
 def start_ligand_jobs(ligand_df: SettingsDataFrame,
-                      idx: pd.Series,
-                      message: str) -> List[Molecule]:
+                      idx: pd.Series) -> List[Molecule]:
     """Loop over all molecules in ``ligand_df.loc[idx]`` and perform geometry optimizations."""
+    if not idx.any():
+        logger.info(f'No new to-be optimized ligands found\n')
+        return []
+    else:
+        logger.info(f'Starting ligand optimization')
+
     lig_new = []
     for ligand in ligand_df[MOL][idx]:
-        mol_list = split_mol(ligand)
-        for mol in mol_list:
-            mol.set_dihed(180.0)
-        ligand_tmp = recombine_mol(mol_list)
-        fix_carboxyl(ligand_tmp)
-        lig_new.append(ligand_tmp)
-
-        # Print messages
-        print(get_time() + message.format(ligand.properties.name))
+        logger.info(f'UFFGetMoleculeForceField: {ligand.properties.name} optimization has started')
+        try:
+            mol_list = split_mol(ligand)
+            for mol in mol_list:
+                mol.set_dihed(180.0)
+            ligand_tmp = recombine_mol(mol_list)
+            fix_carboxyl(ligand_tmp)
+            lig_new.append(ligand_tmp)
+            logger.info(f'UFFGetMoleculeForceField: {ligand.properties.name} optimization '
+                        'is successful')
+        except Exception:
+            logger.error(f'UFFGetMoleculeForceField: {ligand.properties.name} optimization '
+                         'has failed')
+
+    logger.info('Finishing ligand optimization\n')
     return lig_new
 
 
@@ -175,8 +192,6 @@ def _ligand_to_db(ligand_df: SettingsDataFrame,
     # Extract arguments
     settings = ligand_df.settings.optional
     overwrite = DATA_CAT and 'ligand' in settings.database.overwrite
-    lig_path = settings.ligand.dirname
-    mol_format = settings.database.mol_format
 
     kwargs: Dict[str, Any] = {'overwrite': overwrite}
     if opt:
@@ -186,7 +201,6 @@ def _ligand_to_db(ligand_df: SettingsDataFrame,
         kwargs['columns'] = [FORMULA, HDF5_INDEX, SETTINGS1]
         kwargs['database'] = 'ligand'
         kwargs['opt'] = True
-        mol_to_file(ligand_df[MOL], lig_path, overwrite, mol_format)
     else:
         kwargs['columns'] = [FORMULA, HDF5_INDEX]
         kwargs['database'] = 'ligand_no_opt'
@@ -358,13 +372,11 @@ def get_frag_size(self, bond: Bond,
 
     """
     if bond not in self.bonds:
-        error = 'get_frag_size: The argument bond should be of type plams.Bond and be part'
-        error += ' of the Molecule'
-        raise MoleculeError(error)
+        raise MoleculeError('get_frag_size: The argument bond should be of type plams.Bond and '
+                            'be part of the Molecule')
     elif atom not in self.atoms:
-        error = 'get_frag_size: The argument atom should be of type plams.Atom and be part'
-        error += ' of the Molecule'
-        raise MoleculeError(error)
+        raise MoleculeError('get_frag_size: The argument atom should be of type plams.Atom and '
+                            'be part of the Molecule')
 
     for at in self:
         at._visited = False
@@ -415,8 +427,8 @@ def recombine_mol(mol_list: Sequence[Molecule]) -> Molecule:
         return mol_list[0]
     tup_list = mol_list[0].properties.mark
     if not tup_list:
-        error = 'No PLAMS atoms specified in mol_list[0].properties.mark, aborting recombine_mol()'
-        raise IndexError(error)
+        raise IndexError('No PLAMS atoms specified in mol_list[0].properties.mark, '
+                         'aborting recombine_mol()')
 
     for tup in tup_list:
         # Allign mol1 & mol2