Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bde update #35

Merged
merged 6 commits into from
Jun 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CAT/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.4.2'
__version__ = '0.4.3'
145 changes: 89 additions & 56 deletions CAT/analysis/ligand_bde.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,10 @@ def init_bde(qd_df, arg):
# Check if the calculation has been done already
if not overwrite and 'qd' in arg.optional.database.read:
with data.open_csv_qd(data.csv_qd, write=False) as db:
try:
for i in db[['BDE label', 'BDE dE', 'BDE dG', 'BDE ddG']]:
qd_df[i] = np.nan
except KeyError:
pass
key_ar = np.array(['BDE label', 'BDE dE', 'BDE dG', 'BDE ddG'])
bool_ar = np.isin(key_ar, db.columns.levels[0])
for i in db[key_ar[bool_ar]]:
qd_df[i] = np.nan
data.from_csv(qd_df, database='QD', get_mol=False)
qd_df.dropna(axis='columns', how='all', inplace=True)

Expand All @@ -67,61 +66,65 @@ def _bde_w_dg(qd_df, arg):
"""
data = Database(arg.optional.database.dirname)
overwrite = 'qd' in arg.optional.database.overwrite

# Prepare the job settings
j1, j2 = arg.optional.qd.dissociate.job1, arg.optional.qd.dissociate.job2
s1, s2 = arg.optional.qd.dissociate.s1, arg.optional.qd.dissociate.s2
ion = arg.optional.qd.dissociate.core_atom
lig_count = arg.optional.qd.dissociate.lig_count

# Identify previously calculated results
try:
has_na = qd_df[['BDE dE', 'BDE dG']].isna().all(axis='columns')
if not has_na.any():
return
except KeyError:
has_na = qd_df.index

for i, mol in qd_df['mol'][has_na].iteritems():
for idx, mol in qd_df['mol'][has_na].iteritems():
# Create XYn and all XYn-dissociated quantum dots
xyn = get_xy2(mol)
xyn = get_xy2(mol, ion, lig_count)
if not arg.optional.qd.dissociate.core_index:
mol_wo_xyn = dissociate_ligand(mol, arg)
else:
mol_wo_xyn = dissociate_ligand2(mol, arg)

# Construct new columns for **qd_df**
values = [i.properties.df_index for i in mol_wo_xyn]
n = len(values)
sub_idx = np.arange(n)
super_idx = ['BDE label', 'BDE dE', 'BDE ddG', 'BDE dG']
idx = list(product(super_idx, sub_idx))
for j in idx:
if j not in qd_df.index:
qd_df[j] = np.nan
labels = [m.properties.df_index for m in mol_wo_xyn]
sub_idx = np.arange(len(labels)).astype(str, copy=False)
try:
n = qd_df['BDE label'].shape[1]
except KeyError:
n = 0
if len(labels) > n:
for i in sub_idx[n:]:
qd_df[('BDE label', i)] = qd_df[('BDE dE', i)] = qd_df[('BDE ddG', i)] = np.nan

# Prepare slices
label_slice = i, idx[0:n]
dE_slice = i, idx[n:2*n]
ddG_slice = i, idx[2*n:3*n]
label_slice = idx, list(product(['BDE label'], sub_idx))
dE_slice = idx, list(product(['BDE dE'], sub_idx))
ddG_slice = idx, list(product(['BDE ddG'], sub_idx))

# Run the BDE calculations
init(path=mol.properties.path, folder='BDE')
config.default_jobmanager.settings.hashing = None
qd_df.loc[label_slice] = values
qd_df.loc[label_slice] = labels
qd_df.loc[dE_slice] = get_bde_dE(mol, xyn, mol_wo_xyn, job=j1, s=s1)
qd_df.loc[ddG_slice] = get_bde_ddG(mol, xyn, mol_wo_xyn, job=j2, s=s2)
finish()

qd_df['BDE dG'] = qd_df['BDE dE'] + qd_df['BDE ddG']

# Update the database
if 'qd' in arg.optional.database.write:
value1 = qmflows.singlepoint['specific'][type_to_string(j1)].copy()
value1.update(s1)
value2 = qmflows.freq['specific'][type_to_string(j2)].copy()
value2.update(s2)
recipe = Settings()
recipe['BDE 1'] = {'key': j1, 'value': value1}
recipe['BDE 2'] = {'key': j2, 'value': value2}
data.update_csv(qd_df, database='QD', job_recipe=recipe, overwrite=overwrite,
columns=[('settings', 'BDE 1'), ('settings', 'BDE 2')]+idx)
qd_df.sort_index(axis='columns', inplace=True)
column_tup = ('BDE label', 'BDE dE', 'BDE ddG', 'BDE dG')
kwarg = {
'database': 'QD',
'job_recipe': get_recipe(j1, s1, j2, s2),
'overwrite': overwrite,
'columns': [('settings', 'BDE 1'), ('settings', 'BDE 2')]
}
kwarg['columns'] += [(i, j) for i, j in qd_df.columns if i in column_tup]
data.update_csv(qd_df, **kwarg)


def _bde_wo_dg(qd_df, arg):
Expand All @@ -132,62 +135,83 @@ def _bde_wo_dg(qd_df, arg):
:parameter arg: A settings object containing all (optional) arguments.
:type arg: |plams.Settings|_ (superclass: |dict|_).
"""
# Unpack arguments
data = Database(arg.optional.database.dirname)
overwrite = 'qd' in arg.optional.database.overwrite

# Prepare the job settings
j1 = arg.optional.qd.dissociate.job1
s1 = arg.optional.qd.dissociate.s1
ion = arg.optional.qd.dissociate.core_atom
lig_count = arg.optional.qd.dissociate.lig_count

# Identify previously calculated results
try:
has_na = qd_df['BDE dE'].isna().all(axis='columns')
if not has_na.any():
return
except KeyError:
has_na = qd_df.index

for i, mol in qd_df['mol'][has_na].iteritems():
for idx, mol in qd_df['mol'][has_na].iteritems():
# Create XYn and all XYn-dissociated quantum dots
xyn = get_xy2(mol)
xyn = get_xy2(mol, ion, lig_count)
if not arg.optional.qd.dissociate.core_index:
mol_wo_xyn = dissociate_ligand(mol, arg)
else:
mol_wo_xyn = dissociate_ligand2(mol, arg)

# Construct new columns for **qd_df**
values = [i.properties.df_index for i in mol_wo_xyn]
n = len(values)
sub_idx = np.arange(n)
super_idx = ['BDE label', 'BDE dE']
idx = list(product(super_idx, sub_idx))
for j in idx:
if j not in qd_df.index:
qd_df[j] = np.nan
labels = [m.properties.df_index for m in mol_wo_xyn]
sub_idx = np.arange(len(labels)).astype(str, copy=False)
try:
n = qd_df['BDE label'].shape[1]
except KeyError:
n = 0
if len(labels) > n:
for i in sub_idx[n:]:
qd_df[('BDE label', i)] = qd_df[('BDE dE', i)] = np.nan

# Prepare slices
label_slice = i, idx[0:n]
dE_slice = i, idx[n:2*n]
label_slice = idx, list(product(['BDE label'], sub_idx))
dE_slice = idx, list(product(['BDE dE'], sub_idx))

# Run the BDE calculations
init(path=mol.properties.path, folder='BDE')
config.default_jobmanager.settings.hashing = None
qd_df.loc[label_slice] = values
qd_df.loc[label_slice] = labels
qd_df.loc[dE_slice] = get_bde_dE(mol, xyn, mol_wo_xyn, job=j1, s=s1)
finish()

# Update the database
if 'qd' in arg.optional.database.write:
recipe = Settings()
value = qmflows.singlepoint[type_to_string(j1)]
value.update(s1)
recipe['BDE 1'] = {'key': j1, 'value': value}
data.update_csv(qd_df, database='QD', job_recipe=recipe, overwrite=overwrite,
columns=[('settings', 'BDE 1')]+idx)
qd_df.sort_index(axis='columns', inplace=True)
column_tup = ('BDE label', 'BDE dE')
kwarg = {
'database': 'QD',
'job_recipe': get_recipe(j1, s1),
'overwrite': overwrite,
'columns': [('settings', 'BDE 1')]
}
kwarg['columns'] += [(i, j) for i, j in qd_df.columns if i in column_tup]
data.update_csv(qd_df, **kwarg)


def get_recipe(job1, s1, job2=None, s2=None):
"""Return the a dictionary with job types and job settings."""
ret = Settings()
value1 = qmflows.singlepoint['specific'][type_to_string(job1)].copy()
value1.update(s1)
ret['BDE 1'] = {'key': job1, 'value': value1}

if job2 is not None and s2 is not None:
value2 = qmflows.freq['specific'][type_to_string(job2)].copy()
value2.update(s2)
ret['BDE 2'] = {'key': job2, 'value': value2}

return ret


def get_bde_dE(tot, lig, core, job=None, s=None):
""" Calculate the bond dissociation energy: dE = dE(mopac) + (dG(uff) - dE(uff))
"""
"""Calculate the bond dissociation energy: dE = dE(mopac) + (dG(uff) - dE(uff))."""
# Optimize XYn
if job == AMSJob:
s_cp = s.copy()
Expand Down Expand Up @@ -283,14 +307,22 @@ def get_ligand(mol):
else:
ret = Molecule()
ret.atoms = at_list
ret.bonds = set(chain.from_iterable(at.bonds for at in at_list))
ret.bonds = list(set(chain.from_iterable(at.bonds for at in at_list)))
return ret.copy()

# Translate the ligands to their final position
lig1 = get_ligand(mol)
lig2 = lig1.copy()
idx1, anchor1 = get_anchor(lig1)
idx2, anchor2 = get_anchor(lig2)

# Return a the ligand without the ion
if ion is None:
lig1.properties.name = 'XYn'
lig1.properties.path = mol.properties.path
lig1.properties.indices = [idx1]
return lig1

radius = anchor1.radius + PeriodicTable.get_radius(ion)
target = np.array([radius, 0.0, 0.0])
lig1.translate(anchor1.vector_to(target))
Expand Down Expand Up @@ -450,13 +482,14 @@ def remove_ligands(mol, combinations_dict, indices, idx_lig):
for core in combinations_dict:
for lig in combinations_dict[core]:
mol_tmp = mol.copy()

mol_tmp.properties = Settings()
mol_tmp.properties.core_topology = str(mol[core].properties.topology) + '_' + str(core)
mol_tmp.properties.lig_residue = sorted([mol[i[0]].properties.pdb_info.ResidueNumber
for i in lig])
mol_tmp.properties.df_index = mol_tmp.properties.core_topology
mol_tmp.properties.df_index += ''.join([' ' + str(i)
for i in mol_tmp.properties.lig_residue])
mol_tmp.properties.df_index += ' '.join(str(i) for i in mol_tmp.properties.lig_residue)

delete_idx = sorted([core] + list(chain.from_iterable(lig)), reverse=True)
for i in delete_idx:
mol_tmp.delete_atom(mol_tmp[i])
Expand Down
6 changes: 4 additions & 2 deletions CAT/attachment/ligand_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
__all__ = ['init_ligand_opt']

import itertools

import numpy as np
import pandas as pd

from scm.plams.mol.molecule import Molecule
from scm.plams.mol.atom import Atom
Expand Down Expand Up @@ -48,15 +50,15 @@ def init_ligand_opt(ligand_df, arg):
if arg.optional.ligand.optimize:
# Identify the to be optimized ligands
if overwrite:
idx = ligand_df.index
idx = pd.Series(True, index=ligand_df.index, name='mol')
message = '\t has been (re-)optimized'
else:
idx = ligand_df['hdf5 index'] < 0
message = '\t has been optimized'

# Optimize the ligands
lig_new = []
for i, ligand in ligand_df['mol'][idx].iteritems():
for ligand in ligand_df['mol'][idx]:
mol_list = split_mol(ligand)
for mol in mol_list:
mol.set_dihed(180.0)
Expand Down
4 changes: 3 additions & 1 deletion CAT/attachment/qd_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

__all__ = ['init_qd_opt']

import pandas as pd

from scm.plams.core.settings import Settings
from scm.plams.core.functions import (init, finish)
from scm.plams.interfaces.adfsuite.ams import AMSJob
Expand All @@ -27,7 +29,7 @@ def init_qd_opt(qd_df, arg):
job_recipe = arg.optional.qd.optimize
overwrite = 'qd' in arg.optional.database.overwrite
if overwrite:
idx = qd_df['hdf5 index']
idx = pd.Series(True, index=qd_df.index, name='mol')
message = '\t has been (re-)optimized'
else:
idx = qd_df['hdf5 index'] < 0
Expand Down
23 changes: 19 additions & 4 deletions CAT/data_handling/CAT_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,11 +386,12 @@ def __exit__(self, type, value, traceback):
self.df = None

class open_csv_qd():
""" Context manager for opening and closing the quantum dot database.
"""Context manager for opening and closing the quantum dot database.

:param str path: The path+filename to the database component.
:param bool write: Whether or not the database file should be updated after
closing **self**.

"""

def __init__(self, path=None, write=True):
Expand All @@ -417,7 +418,16 @@ def __exit__(self, type, value, traceback):
self.df = None

class DF(dict):
"""A mutable container for holding dataframes."""
"""A mutable container for holding dataframes.

A subclass of :class:`dict` containing a single key (``"df"``) and value
(a Pandas DataFrame).
Calling an item or attribute of :class:`.DF` will call said method on the
underlaying DataFrame (``self["df"]``).
An exception to this is the ``"df"`` key, which will get/set the DataFrame
instead.

"""

def __init__(self, df: pd.DataFrame) -> None:
super().__init__()
Expand Down Expand Up @@ -449,7 +459,7 @@ def __setitem__(self, key, value):

def __getitem__(self, key):
df = super().__getitem__('df')
if key == 'df':
if isinstance(key, str) and key == 'df':
return df
return df.__getitem__(key)

Expand Down Expand Up @@ -574,7 +584,12 @@ def update_hdf5(self, df, database='ligand', overwrite=False):

# If **overwrite** is *True*
if overwrite and old.any():
f[database][old] = as_pdb_array(df['mol'][old.index], min_size=j)
ar = as_pdb_array(df['mol'][old.index], min_size=j)

# Ensure that the hdf5 indices are sorted
idx = np.argsort(old)
old = old[idx]
f[database][old] = ar[idx]

return ret

Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ Change Log
All notable changes to this project will be documented in this file.
This project adheres to `Semantic Versioning <http://semver.org/>`_.

0.4.3
*****

* Improved interaction between the database and BDE module
* Cleaned up BDE module
* HDF5 indices are now always sorted when itneraction with the database


0.4.2
*****

Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
:target: https://www.python.org

#######################################
Compound Attachment/Analysis Tool 0.4.2
Compound Attachment/Analysis Tool 0.4.3
#######################################

**CAT** is a collection of tools designed for the construction,
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
# The short X.Y version.
version = '0.4'
# The full version, including alpha/beta/rc tags.
release = '0.4.2'
release = '0.4.3'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
Loading