Skip to content

Commit

Permalink
Merge pull request #35 from nlesc-nano/bde_update
Browse files Browse the repository at this point in the history
0.4.3
*****

* Improved interaction between the database and BDE module
* Cleaned up BDE module
* HDF5 indices are now always sorted when itneraction with the database
  • Loading branch information
BvB93 authored Jun 25, 2019
2 parents 3e661d7 + 821f2fa commit d5c4029
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 68 deletions.
2 changes: 1 addition & 1 deletion CAT/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.4.2'
__version__ = '0.4.3'
145 changes: 89 additions & 56 deletions CAT/analysis/ligand_bde.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,10 @@ def init_bde(qd_df, arg):
# Check if the calculation has been done already
if not overwrite and 'qd' in arg.optional.database.read:
with data.open_csv_qd(data.csv_qd, write=False) as db:
try:
for i in db[['BDE label', 'BDE dE', 'BDE dG', 'BDE ddG']]:
qd_df[i] = np.nan
except KeyError:
pass
key_ar = np.array(['BDE label', 'BDE dE', 'BDE dG', 'BDE ddG'])
bool_ar = np.isin(key_ar, db.columns.levels[0])
for i in db[key_ar[bool_ar]]:
qd_df[i] = np.nan
data.from_csv(qd_df, database='QD', get_mol=False)
qd_df.dropna(axis='columns', how='all', inplace=True)

Expand All @@ -67,61 +66,65 @@ def _bde_w_dg(qd_df, arg):
"""
data = Database(arg.optional.database.dirname)
overwrite = 'qd' in arg.optional.database.overwrite

# Prepare the job settings
j1, j2 = arg.optional.qd.dissociate.job1, arg.optional.qd.dissociate.job2
s1, s2 = arg.optional.qd.dissociate.s1, arg.optional.qd.dissociate.s2
ion = arg.optional.qd.dissociate.core_atom
lig_count = arg.optional.qd.dissociate.lig_count

# Identify previously calculated results
try:
has_na = qd_df[['BDE dE', 'BDE dG']].isna().all(axis='columns')
if not has_na.any():
return
except KeyError:
has_na = qd_df.index

for i, mol in qd_df['mol'][has_na].iteritems():
for idx, mol in qd_df['mol'][has_na].iteritems():
# Create XYn and all XYn-dissociated quantum dots
xyn = get_xy2(mol)
xyn = get_xy2(mol, ion, lig_count)
if not arg.optional.qd.dissociate.core_index:
mol_wo_xyn = dissociate_ligand(mol, arg)
else:
mol_wo_xyn = dissociate_ligand2(mol, arg)

# Construct new columns for **qd_df**
values = [i.properties.df_index for i in mol_wo_xyn]
n = len(values)
sub_idx = np.arange(n)
super_idx = ['BDE label', 'BDE dE', 'BDE ddG', 'BDE dG']
idx = list(product(super_idx, sub_idx))
for j in idx:
if j not in qd_df.index:
qd_df[j] = np.nan
labels = [m.properties.df_index for m in mol_wo_xyn]
sub_idx = np.arange(len(labels)).astype(str, copy=False)
try:
n = qd_df['BDE label'].shape[1]
except KeyError:
n = 0
if len(labels) > n:
for i in sub_idx[n:]:
qd_df[('BDE label', i)] = qd_df[('BDE dE', i)] = qd_df[('BDE ddG', i)] = np.nan

# Prepare slices
label_slice = i, idx[0:n]
dE_slice = i, idx[n:2*n]
ddG_slice = i, idx[2*n:3*n]
label_slice = idx, list(product(['BDE label'], sub_idx))
dE_slice = idx, list(product(['BDE dE'], sub_idx))
ddG_slice = idx, list(product(['BDE ddG'], sub_idx))

# Run the BDE calculations
init(path=mol.properties.path, folder='BDE')
config.default_jobmanager.settings.hashing = None
qd_df.loc[label_slice] = values
qd_df.loc[label_slice] = labels
qd_df.loc[dE_slice] = get_bde_dE(mol, xyn, mol_wo_xyn, job=j1, s=s1)
qd_df.loc[ddG_slice] = get_bde_ddG(mol, xyn, mol_wo_xyn, job=j2, s=s2)
finish()

qd_df['BDE dG'] = qd_df['BDE dE'] + qd_df['BDE ddG']

# Update the database
if 'qd' in arg.optional.database.write:
value1 = qmflows.singlepoint['specific'][type_to_string(j1)].copy()
value1.update(s1)
value2 = qmflows.freq['specific'][type_to_string(j2)].copy()
value2.update(s2)
recipe = Settings()
recipe['BDE 1'] = {'key': j1, 'value': value1}
recipe['BDE 2'] = {'key': j2, 'value': value2}
data.update_csv(qd_df, database='QD', job_recipe=recipe, overwrite=overwrite,
columns=[('settings', 'BDE 1'), ('settings', 'BDE 2')]+idx)
qd_df.sort_index(axis='columns', inplace=True)
column_tup = ('BDE label', 'BDE dE', 'BDE ddG', 'BDE dG')
kwarg = {
'database': 'QD',
'job_recipe': get_recipe(j1, s1, j2, s2),
'overwrite': overwrite,
'columns': [('settings', 'BDE 1'), ('settings', 'BDE 2')]
}
kwarg['columns'] += [(i, j) for i, j in qd_df.columns if i in column_tup]
data.update_csv(qd_df, **kwarg)


def _bde_wo_dg(qd_df, arg):
Expand All @@ -132,62 +135,83 @@ def _bde_wo_dg(qd_df, arg):
:parameter arg: A settings object containing all (optional) arguments.
:type arg: |plams.Settings|_ (superclass: |dict|_).
"""
# Unpack arguments
data = Database(arg.optional.database.dirname)
overwrite = 'qd' in arg.optional.database.overwrite

# Prepare the job settings
j1 = arg.optional.qd.dissociate.job1
s1 = arg.optional.qd.dissociate.s1
ion = arg.optional.qd.dissociate.core_atom
lig_count = arg.optional.qd.dissociate.lig_count

# Identify previously calculated results
try:
has_na = qd_df['BDE dE'].isna().all(axis='columns')
if not has_na.any():
return
except KeyError:
has_na = qd_df.index

for i, mol in qd_df['mol'][has_na].iteritems():
for idx, mol in qd_df['mol'][has_na].iteritems():
# Create XYn and all XYn-dissociated quantum dots
xyn = get_xy2(mol)
xyn = get_xy2(mol, ion, lig_count)
if not arg.optional.qd.dissociate.core_index:
mol_wo_xyn = dissociate_ligand(mol, arg)
else:
mol_wo_xyn = dissociate_ligand2(mol, arg)

# Construct new columns for **qd_df**
values = [i.properties.df_index for i in mol_wo_xyn]
n = len(values)
sub_idx = np.arange(n)
super_idx = ['BDE label', 'BDE dE']
idx = list(product(super_idx, sub_idx))
for j in idx:
if j not in qd_df.index:
qd_df[j] = np.nan
labels = [m.properties.df_index for m in mol_wo_xyn]
sub_idx = np.arange(len(labels)).astype(str, copy=False)
try:
n = qd_df['BDE label'].shape[1]
except KeyError:
n = 0
if len(labels) > n:
for i in sub_idx[n:]:
qd_df[('BDE label', i)] = qd_df[('BDE dE', i)] = np.nan

# Prepare slices
label_slice = i, idx[0:n]
dE_slice = i, idx[n:2*n]
label_slice = idx, list(product(['BDE label'], sub_idx))
dE_slice = idx, list(product(['BDE dE'], sub_idx))

# Run the BDE calculations
init(path=mol.properties.path, folder='BDE')
config.default_jobmanager.settings.hashing = None
qd_df.loc[label_slice] = values
qd_df.loc[label_slice] = labels
qd_df.loc[dE_slice] = get_bde_dE(mol, xyn, mol_wo_xyn, job=j1, s=s1)
finish()

# Update the database
if 'qd' in arg.optional.database.write:
recipe = Settings()
value = qmflows.singlepoint[type_to_string(j1)]
value.update(s1)
recipe['BDE 1'] = {'key': j1, 'value': value}
data.update_csv(qd_df, database='QD', job_recipe=recipe, overwrite=overwrite,
columns=[('settings', 'BDE 1')]+idx)
qd_df.sort_index(axis='columns', inplace=True)
column_tup = ('BDE label', 'BDE dE')
kwarg = {
'database': 'QD',
'job_recipe': get_recipe(j1, s1),
'overwrite': overwrite,
'columns': [('settings', 'BDE 1')]
}
kwarg['columns'] += [(i, j) for i, j in qd_df.columns if i in column_tup]
data.update_csv(qd_df, **kwarg)


def get_recipe(job1, s1, job2=None, s2=None):
"""Return the a dictionary with job types and job settings."""
ret = Settings()
value1 = qmflows.singlepoint['specific'][type_to_string(job1)].copy()
value1.update(s1)
ret['BDE 1'] = {'key': job1, 'value': value1}

if job2 is not None and s2 is not None:
value2 = qmflows.freq['specific'][type_to_string(job2)].copy()
value2.update(s2)
ret['BDE 2'] = {'key': job2, 'value': value2}

return ret


def get_bde_dE(tot, lig, core, job=None, s=None):
""" Calculate the bond dissociation energy: dE = dE(mopac) + (dG(uff) - dE(uff))
"""
"""Calculate the bond dissociation energy: dE = dE(mopac) + (dG(uff) - dE(uff))."""
# Optimize XYn
if job == AMSJob:
s_cp = s.copy()
Expand Down Expand Up @@ -283,14 +307,22 @@ def get_ligand(mol):
else:
ret = Molecule()
ret.atoms = at_list
ret.bonds = set(chain.from_iterable(at.bonds for at in at_list))
ret.bonds = list(set(chain.from_iterable(at.bonds for at in at_list)))
return ret.copy()

# Translate the ligands to their final position
lig1 = get_ligand(mol)
lig2 = lig1.copy()
idx1, anchor1 = get_anchor(lig1)
idx2, anchor2 = get_anchor(lig2)

# Return a the ligand without the ion
if ion is None:
lig1.properties.name = 'XYn'
lig1.properties.path = mol.properties.path
lig1.properties.indices = [idx1]
return lig1

radius = anchor1.radius + PeriodicTable.get_radius(ion)
target = np.array([radius, 0.0, 0.0])
lig1.translate(anchor1.vector_to(target))
Expand Down Expand Up @@ -450,13 +482,14 @@ def remove_ligands(mol, combinations_dict, indices, idx_lig):
for core in combinations_dict:
for lig in combinations_dict[core]:
mol_tmp = mol.copy()

mol_tmp.properties = Settings()
mol_tmp.properties.core_topology = str(mol[core].properties.topology) + '_' + str(core)
mol_tmp.properties.lig_residue = sorted([mol[i[0]].properties.pdb_info.ResidueNumber
for i in lig])
mol_tmp.properties.df_index = mol_tmp.properties.core_topology
mol_tmp.properties.df_index += ''.join([' ' + str(i)
for i in mol_tmp.properties.lig_residue])
mol_tmp.properties.df_index += ' '.join(str(i) for i in mol_tmp.properties.lig_residue)

delete_idx = sorted([core] + list(chain.from_iterable(lig)), reverse=True)
for i in delete_idx:
mol_tmp.delete_atom(mol_tmp[i])
Expand Down
6 changes: 4 additions & 2 deletions CAT/attachment/ligand_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
__all__ = ['init_ligand_opt']

import itertools

import numpy as np
import pandas as pd

from scm.plams.mol.molecule import Molecule
from scm.plams.mol.atom import Atom
Expand Down Expand Up @@ -48,15 +50,15 @@ def init_ligand_opt(ligand_df, arg):
if arg.optional.ligand.optimize:
# Identify the to be optimized ligands
if overwrite:
idx = ligand_df.index
idx = pd.Series(True, index=ligand_df.index, name='mol')
message = '\t has been (re-)optimized'
else:
idx = ligand_df['hdf5 index'] < 0
message = '\t has been optimized'

# Optimize the ligands
lig_new = []
for i, ligand in ligand_df['mol'][idx].iteritems():
for ligand in ligand_df['mol'][idx]:
mol_list = split_mol(ligand)
for mol in mol_list:
mol.set_dihed(180.0)
Expand Down
4 changes: 3 additions & 1 deletion CAT/attachment/qd_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

__all__ = ['init_qd_opt']

import pandas as pd

from scm.plams.core.settings import Settings
from scm.plams.core.functions import (init, finish)
from scm.plams.interfaces.adfsuite.ams import AMSJob
Expand All @@ -27,7 +29,7 @@ def init_qd_opt(qd_df, arg):
job_recipe = arg.optional.qd.optimize
overwrite = 'qd' in arg.optional.database.overwrite
if overwrite:
idx = qd_df['hdf5 index']
idx = pd.Series(True, index=qd_df.index, name='mol')
message = '\t has been (re-)optimized'
else:
idx = qd_df['hdf5 index'] < 0
Expand Down
23 changes: 19 additions & 4 deletions CAT/data_handling/CAT_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,11 +386,12 @@ def __exit__(self, type, value, traceback):
self.df = None

class open_csv_qd():
""" Context manager for opening and closing the quantum dot database.
"""Context manager for opening and closing the quantum dot database.
:param str path: The path+filename to the database component.
:param bool write: Whether or not the database file should be updated after
closing **self**.
"""

def __init__(self, path=None, write=True):
Expand All @@ -417,7 +418,16 @@ def __exit__(self, type, value, traceback):
self.df = None

class DF(dict):
"""A mutable container for holding dataframes."""
"""A mutable container for holding dataframes.
A subclass of :class:`dict` containing a single key (``"df"``) and value
(a Pandas DataFrame).
Calling an item or attribute of :class:`.DF` will call said method on the
underlaying DataFrame (``self["df"]``).
An exception to this is the ``"df"`` key, which will get/set the DataFrame
instead.
"""

def __init__(self, df: pd.DataFrame) -> None:
super().__init__()
Expand Down Expand Up @@ -449,7 +459,7 @@ def __setitem__(self, key, value):

def __getitem__(self, key):
df = super().__getitem__('df')
if key == 'df':
if isinstance(key, str) and key == 'df':
return df
return df.__getitem__(key)

Expand Down Expand Up @@ -574,7 +584,12 @@ def update_hdf5(self, df, database='ligand', overwrite=False):

# If **overwrite** is *True*
if overwrite and old.any():
f[database][old] = as_pdb_array(df['mol'][old.index], min_size=j)
ar = as_pdb_array(df['mol'][old.index], min_size=j)

# Ensure that the hdf5 indices are sorted
idx = np.argsort(old)
old = old[idx]
f[database][old] = ar[idx]

return ret

Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ Change Log
All notable changes to this project will be documented in this file.
This project adheres to `Semantic Versioning <http://semver.org/>`_.

0.4.3
*****

* Improved interaction between the database and BDE module
* Cleaned up BDE module
* HDF5 indices are now always sorted when itneraction with the database


0.4.2
*****

Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
:target: https://www.python.org

#######################################
Compound Attachment/Analysis Tool 0.4.2
Compound Attachment/Analysis Tool 0.4.3
#######################################

**CAT** is a collection of tools designed for the construction,
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
# The short X.Y version.
version = '0.4'
# The full version, including alpha/beta/rc tags.
release = '0.4.2'
release = '0.4.3'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
Loading

0 comments on commit d5c4029

Please sign in to comment.