From b852d4747b61a844342f3293270b3d90f02ea59f Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Tue, 18 Jun 2019 14:45:20 +0200 Subject: [PATCH 1/5] Cleaned up the code, * --- CAT/analysis/ligand_bde.py | 145 ++++++++++++++++++------------ CAT/analysis/ligand_solvation.py | 2 +- CAT/data_handling/CAT_database.py | 16 +++- examples/input_settings.yaml | 5 +- 4 files changed, 107 insertions(+), 61 deletions(-) diff --git a/CAT/analysis/ligand_bde.py b/CAT/analysis/ligand_bde.py index 5c41b533..a6f422fc 100644 --- a/CAT/analysis/ligand_bde.py +++ b/CAT/analysis/ligand_bde.py @@ -40,11 +40,10 @@ def init_bde(qd_df, arg): # Check if the calculation has been done already if not overwrite and 'qd' in arg.optional.database.read: with data.open_csv_qd(data.csv_qd, write=False) as db: - try: - for i in db[['BDE label', 'BDE dE', 'BDE dG', 'BDE ddG']]: - qd_df[i] = np.nan - except KeyError: - pass + key_ar = np.array(['BDE label', 'BDE dE', 'BDE dG', 'BDE ddG']) + bool_ar = np.isin(key_ar, db.columns.levels[0]) + for i in db[key_ar[bool_ar]]: + qd_df[i] = np.nan data.from_csv(qd_df, database='QD', get_mol=False) qd_df.dropna(axis='columns', how='all', inplace=True) @@ -67,11 +66,12 @@ def _bde_w_dg(qd_df, arg): """ data = Database(arg.optional.database.dirname) overwrite = 'qd' in arg.optional.database.overwrite - - # Prepare the job settings j1, j2 = arg.optional.qd.dissociate.job1, arg.optional.qd.dissociate.job2 s1, s2 = arg.optional.qd.dissociate.s1, arg.optional.qd.dissociate.s2 + ion = arg.optional.qd.dissociate.core_atom + lig_count = arg.optional.qd.dissociate.lig_count + # Identify previously calculated results try: has_na = qd_df[['BDE dE', 'BDE dG']].isna().all(axis='columns') if not has_na.any(): @@ -79,49 +79,52 @@ def _bde_w_dg(qd_df, arg): except KeyError: has_na = qd_df.index - for i, mol in qd_df['mol'][has_na].iteritems(): + for idx, mol in qd_df['mol'][has_na].iteritems(): # Create XYn and all XYn-dissociated quantum dots - xyn = get_xy2(mol) + xyn = get_xy2(mol, ion, lig_count) if not arg.optional.qd.dissociate.core_index: mol_wo_xyn = dissociate_ligand(mol, arg) else: mol_wo_xyn = dissociate_ligand2(mol, arg) # Construct new columns for **qd_df** - values = [i.properties.df_index for i in mol_wo_xyn] - n = len(values) - sub_idx = np.arange(n) - super_idx = ['BDE label', 'BDE dE', 'BDE ddG', 'BDE dG'] - idx = list(product(super_idx, sub_idx)) - for j in idx: - if j not in qd_df.index: - qd_df[j] = np.nan + labels = [m.properties.df_index for m in mol_wo_xyn] + sub_idx = np.arange(len(labels)).astype(str, copy=False) + try: + n = qd_df['BDE label'].shape[1] + except KeyError: + n = 0 + if len(labels) > n: + for i in sub_idx[n:]: + qd_df[('BDE label', i)] = qd_df[('BDE dE', i)] = qd_df[('BDE ddG', i)] = np.nan # Prepare slices - label_slice = i, idx[0:n] - dE_slice = i, idx[n:2*n] - ddG_slice = i, idx[2*n:3*n] + label_slice = idx, list(product(['BDE label'], sub_idx)) + dE_slice = idx, list(product(['BDE dE'], sub_idx)) + ddG_slice = idx, list(product(['BDE ddG'], sub_idx)) # Run the BDE calculations init(path=mol.properties.path, folder='BDE') config.default_jobmanager.settings.hashing = None - qd_df.loc[label_slice] = values + qd_df.loc[label_slice] = labels qd_df.loc[dE_slice] = get_bde_dE(mol, xyn, mol_wo_xyn, job=j1, s=s1) qd_df.loc[ddG_slice] = get_bde_ddG(mol, xyn, mol_wo_xyn, job=j2, s=s2) finish() + qd_df['BDE dG'] = qd_df['BDE dE'] + qd_df['BDE ddG'] # Update the database if 'qd' in arg.optional.database.write: - value1 = qmflows.singlepoint['specific'][type_to_string(j1)].copy() - value1.update(s1) - value2 = qmflows.freq['specific'][type_to_string(j2)].copy() - value2.update(s2) - recipe = Settings() - recipe['BDE 1'] = {'key': j1, 'value': value1} - recipe['BDE 2'] = {'key': j2, 'value': value2} - data.update_csv(qd_df, database='QD', job_recipe=recipe, overwrite=overwrite, - columns=[('settings', 'BDE 1'), ('settings', 'BDE 2')]+idx) + qd_df.sort_index(axis='columns', inplace=True) + column_tup = ('BDE label', 'BDE dE', 'BDE ddG', 'BDE dG') + kwarg = { + 'database': 'QD', + 'job_recipe': get_recipe(j1, s1, j2, s2), + 'overwrite': overwrite, + 'columns': [('settings', 'BDE 1'), ('settings', 'BDE 2')] + } + kwarg['columns'] += [(i, j) for i, j in qd_df.columns if i in column_tup] + data.update_csv(qd_df, **kwarg) def _bde_wo_dg(qd_df, arg): @@ -132,13 +135,15 @@ def _bde_wo_dg(qd_df, arg): :parameter arg: A settings object containing all (optional) arguments. :type arg: |plams.Settings|_ (superclass: |dict|_). """ + # Unpack arguments data = Database(arg.optional.database.dirname) overwrite = 'qd' in arg.optional.database.overwrite - - # Prepare the job settings j1 = arg.optional.qd.dissociate.job1 s1 = arg.optional.qd.dissociate.s1 + ion = arg.optional.qd.dissociate.core_atom + lig_count = arg.optional.qd.dissociate.lig_count + # Identify previously calculated results try: has_na = qd_df['BDE dE'].isna().all(axis='columns') if not has_na.any(): @@ -146,48 +151,67 @@ def _bde_wo_dg(qd_df, arg): except KeyError: has_na = qd_df.index - for i, mol in qd_df['mol'][has_na].iteritems(): + for idx, mol in qd_df['mol'][has_na].iteritems(): # Create XYn and all XYn-dissociated quantum dots - xyn = get_xy2(mol) + xyn = get_xy2(mol, ion, lig_count) if not arg.optional.qd.dissociate.core_index: mol_wo_xyn = dissociate_ligand(mol, arg) else: mol_wo_xyn = dissociate_ligand2(mol, arg) # Construct new columns for **qd_df** - values = [i.properties.df_index for i in mol_wo_xyn] - n = len(values) - sub_idx = np.arange(n) - super_idx = ['BDE label', 'BDE dE'] - idx = list(product(super_idx, sub_idx)) - for j in idx: - if j not in qd_df.index: - qd_df[j] = np.nan + labels = [m.properties.df_index for m in mol_wo_xyn] + sub_idx = np.arange(len(labels)).astype(str, copy=False) + try: + n = qd_df['BDE label'].shape[1] + except KeyError: + n = 0 + if len(labels) > n: + for i in sub_idx[n:]: + qd_df[('BDE label', i)] = qd_df[('BDE dE', i)] = np.nan # Prepare slices - label_slice = i, idx[0:n] - dE_slice = i, idx[n:2*n] + label_slice = idx, list(product(['BDE label'], sub_idx)) + dE_slice = idx, list(product(['BDE dE'], sub_idx)) # Run the BDE calculations init(path=mol.properties.path, folder='BDE') config.default_jobmanager.settings.hashing = None - qd_df.loc[label_slice] = values + qd_df.loc[label_slice] = labels qd_df.loc[dE_slice] = get_bde_dE(mol, xyn, mol_wo_xyn, job=j1, s=s1) finish() # Update the database if 'qd' in arg.optional.database.write: - recipe = Settings() - value = qmflows.singlepoint[type_to_string(j1)] - value.update(s1) - recipe['BDE 1'] = {'key': j1, 'value': value} - data.update_csv(qd_df, database='QD', job_recipe=recipe, overwrite=overwrite, - columns=[('settings', 'BDE 1')]+idx) + qd_df.sort_index(axis='columns', inplace=True) + column_tup = ('BDE label', 'BDE dE') + kwarg = { + 'database': 'QD', + 'job_recipe': get_recipe(j1, s1), + 'overwrite': overwrite, + 'columns': [('settings', 'BDE 1')] + } + kwarg['columns'] += [(i, j) for i, j in qd_df.columns if i in column_tup] + data.update_csv(qd_df, **kwarg) + + +def get_recipe(job1, s1, job2=None, s2=None): + """Return the a dictionary with job types and job settings.""" + ret = Settings() + value1 = qmflows.singlepoint['specific'][type_to_string(job1)].copy() + value1.update(s1) + ret['BDE 1'] = {'key': job1, 'value': value1} + + if job2 is not None and s2 is not None: + value2 = qmflows.freq['specific'][type_to_string(job2)].copy() + value2.update(s2) + ret['BDE 2'] = {'key': job2, 'value': value2} + + return ret def get_bde_dE(tot, lig, core, job=None, s=None): - """ Calculate the bond dissociation energy: dE = dE(mopac) + (dG(uff) - dE(uff)) - """ + """Calculate the bond dissociation energy: dE = dE(mopac) + (dG(uff) - dE(uff)).""" # Optimize XYn if job == AMSJob: s_cp = s.copy() @@ -283,7 +307,7 @@ def get_ligand(mol): else: ret = Molecule() ret.atoms = at_list - ret.bonds = set(chain.from_iterable(at.bonds for at in at_list)) + ret.bonds = list(set(chain.from_iterable(at.bonds for at in at_list))) return ret.copy() # Translate the ligands to their final position @@ -291,6 +315,14 @@ def get_ligand(mol): lig2 = lig1.copy() idx1, anchor1 = get_anchor(lig1) idx2, anchor2 = get_anchor(lig2) + + # Return a the ligand without the ion + if ion is None: + lig1.properties.name = 'XYn' + lig1.properties.path = mol.properties.path + lig1.properties.indices = [idx1] + return lig1 + radius = anchor1.radius + PeriodicTable.get_radius(ion) target = np.array([radius, 0.0, 0.0]) lig1.translate(anchor1.vector_to(target)) @@ -450,13 +482,14 @@ def remove_ligands(mol, combinations_dict, indices, idx_lig): for core in combinations_dict: for lig in combinations_dict[core]: mol_tmp = mol.copy() + mol_tmp.properties = Settings() mol_tmp.properties.core_topology = str(mol[core].properties.topology) + '_' + str(core) mol_tmp.properties.lig_residue = sorted([mol[i[0]].properties.pdb_info.ResidueNumber for i in lig]) mol_tmp.properties.df_index = mol_tmp.properties.core_topology - mol_tmp.properties.df_index += ''.join([' ' + str(i) - for i in mol_tmp.properties.lig_residue]) + mol_tmp.properties.df_index += ' '.join(str(i) for i in mol_tmp.properties.lig_residue) + delete_idx = sorted([core] + list(chain.from_iterable(lig)), reverse=True) for i in delete_idx: mol_tmp.delete_atom(mol_tmp[i]) diff --git a/CAT/analysis/ligand_solvation.py b/CAT/analysis/ligand_solvation.py index ab663898..d9a414f9 100644 --- a/CAT/analysis/ligand_solvation.py +++ b/CAT/analysis/ligand_solvation.py @@ -129,7 +129,7 @@ def get_solv(mol, solvent_list, coskf, job=None, s=None): E_solv.append(np.nan) Gamma.append(np.nan) - # Return the solvation energies and activity coefficients as dict + # Return the solvation energies and activity coefficients as two lists return E_solv, Gamma diff --git a/CAT/data_handling/CAT_database.py b/CAT/data_handling/CAT_database.py index c314e134..1c7e0624 100644 --- a/CAT/data_handling/CAT_database.py +++ b/CAT/data_handling/CAT_database.py @@ -386,11 +386,12 @@ def __exit__(self, type, value, traceback): self.df = None class open_csv_qd(): - """ Context manager for opening and closing the quantum dot database. + """Context manager for opening and closing the quantum dot database. :param str path: The path+filename to the database component. :param bool write: Whether or not the database file should be updated after closing **self**. + """ def __init__(self, path=None, write=True): @@ -417,7 +418,16 @@ def __exit__(self, type, value, traceback): self.df = None class DF(dict): - """A mutable container for holding dataframes.""" + """A mutable container for holding dataframes. + + A subclass of :class:`dict` containing a single key (``"df"``) and value + (a Pandas DataFrame). + Calling an item or attribute of :class:`.DF` will call said method on the + underlaying DataFrame (``self["df"]``). + An exception to this is the ``"df"`` key, which will get/set the DataFrame + instead. + + """ def __init__(self, df: pd.DataFrame) -> None: super().__init__() @@ -449,7 +459,7 @@ def __setitem__(self, key, value): def __getitem__(self, key): df = super().__getitem__('df') - if key == 'df': + if isinstance(key, str) and key == 'df': return df return df.__getitem__(key) diff --git a/examples/input_settings.yaml b/examples/input_settings.yaml index 1a5a34d8..a333fffb 100644 --- a/examples/input_settings.yaml +++ b/examples/input_settings.yaml @@ -1,4 +1,4 @@ -path: /Users/basvanbeek/Documents/CdSe/Week_5 +path: /Users/bvanbeek/Documents/CdSe/Week_5 input_cores: - Cd68Se55.xyz: @@ -6,6 +6,9 @@ input_cores: input_ligands: - CO + - CCO + - CCCO + - CCCCO # - FragranceDB2.txt optional: From e9ed707907331a0ffe8aa4932046a07c8a50e8b8 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Tue, 18 Jun 2019 14:58:31 +0200 Subject: [PATCH 2/5] Fixed issues related to optimize=True --- CAT/attachment/ligand_opt.py | 6 ++++-- CAT/attachment/qd_opt.py | 4 +++- examples/input_settings.yaml | 5 +---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/CAT/attachment/ligand_opt.py b/CAT/attachment/ligand_opt.py index c8dfb322..8bce724f 100644 --- a/CAT/attachment/ligand_opt.py +++ b/CAT/attachment/ligand_opt.py @@ -3,7 +3,9 @@ __all__ = ['init_ligand_opt'] import itertools + import numpy as np +import pandas as pd from scm.plams.mol.molecule import Molecule from scm.plams.mol.atom import Atom @@ -48,7 +50,7 @@ def init_ligand_opt(ligand_df, arg): if arg.optional.ligand.optimize: # Identify the to be optimized ligands if overwrite: - idx = ligand_df.index + idx = pd.Series(True, index=ligand_df.index, name='mol') message = '\t has been (re-)optimized' else: idx = ligand_df['hdf5 index'] < 0 @@ -56,7 +58,7 @@ def init_ligand_opt(ligand_df, arg): # Optimize the ligands lig_new = [] - for i, ligand in ligand_df['mol'][idx].iteritems(): + for ligand in ligand_df['mol'][idx]: mol_list = split_mol(ligand) for mol in mol_list: mol.set_dihed(180.0) diff --git a/CAT/attachment/qd_opt.py b/CAT/attachment/qd_opt.py index 221fc1f8..1d37c4e1 100644 --- a/CAT/attachment/qd_opt.py +++ b/CAT/attachment/qd_opt.py @@ -2,6 +2,8 @@ __all__ = ['init_qd_opt'] +import pandas as pd + from scm.plams.core.settings import Settings from scm.plams.core.functions import (init, finish) from scm.plams.interfaces.adfsuite.ams import AMSJob @@ -27,7 +29,7 @@ def init_qd_opt(qd_df, arg): job_recipe = arg.optional.qd.optimize overwrite = 'qd' in arg.optional.database.overwrite if overwrite: - idx = qd_df['hdf5 index'] + idx = pd.Series(True, index=qd_df.index, name='mol') message = '\t has been (re-)optimized' else: idx = qd_df['hdf5 index'] < 0 diff --git a/examples/input_settings.yaml b/examples/input_settings.yaml index a333fffb..88f043e8 100644 --- a/examples/input_settings.yaml +++ b/examples/input_settings.yaml @@ -6,9 +6,6 @@ input_cores: input_ligands: - CO - - CCO - - CCCO - - CCCCO # - FragranceDB2.txt optional: @@ -16,7 +13,7 @@ optional: dirname: database read: True write: True - overwrite: False + overwrite: True mol_format: [pdb] mongodb: False From 11fb6d1073032661c46cb5943bd58477f91ed056 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Tue, 25 Jun 2019 14:25:32 +0200 Subject: [PATCH 3/5] Update CAT_database.py --- CAT/data_handling/CAT_database.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CAT/data_handling/CAT_database.py b/CAT/data_handling/CAT_database.py index 1c7e0624..9b87ce9b 100644 --- a/CAT/data_handling/CAT_database.py +++ b/CAT/data_handling/CAT_database.py @@ -584,7 +584,12 @@ def update_hdf5(self, df, database='ligand', overwrite=False): # If **overwrite** is *True* if overwrite and old.any(): - f[database][old] = as_pdb_array(df['mol'][old.index], min_size=j) + ar = as_pdb_array(df['mol'][old.index], min_size=j) + + # Ensure that the hdf5 indices are sorted + idx = np.argsort(old) + old = old[idx] + f[database][old] = ar[idx] return ret From a0656e5a16f617cca0e70b746a12574e3d01ed32 Mon Sep 17 00:00:00 2001 From: Bas van Beek Date: Tue, 25 Jun 2019 14:27:24 +0200 Subject: [PATCH 4/5] CAT 0.4.3 --- CAT/__version__.py | 2 +- README.rst | 2 +- docs/conf.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CAT/__version__.py b/CAT/__version__.py index a9873473..908c0bb7 100644 --- a/CAT/__version__.py +++ b/CAT/__version__.py @@ -1 +1 @@ -__version__ = '0.4.2' +__version__ = '0.4.3' diff --git a/README.rst b/README.rst index 08179933..a61daae9 100644 --- a/README.rst +++ b/README.rst @@ -7,7 +7,7 @@ :target: https://www.python.org ####################################### -Compound Attachment/Analysis Tool 0.4.2 +Compound Attachment/Analysis Tool 0.4.3 ####################################### **CAT** is a collection of tools designed for the construction, diff --git a/docs/conf.py b/docs/conf.py index f0941353..cba69823 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -65,7 +65,7 @@ # The short X.Y version. version = '0.4' # The full version, including alpha/beta/rc tags. -release = '0.4.2' +release = '0.4.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 821f2fa494192874ce3848b7f0bc74de27809754 Mon Sep 17 00:00:00 2001 From: Bas van Beek <43369155+BvB93@users.noreply.github.com> Date: Tue, 25 Jun 2019 14:29:10 +0200 Subject: [PATCH 5/5] Update CHANGELOG.rst --- CHANGELOG.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c6beef6a..2494386f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,14 @@ Change Log All notable changes to this project will be documented in this file. This project adheres to `Semantic Versioning `_. +0.4.3 +***** + +* Improved interaction between the database and BDE module +* Cleaned up BDE module +* HDF5 indices are now always sorted when itneraction with the database + + 0.4.2 *****