Skip to content

Commit

Permalink
Merge pull request #12 from nlesc-nano/devel
Browse files Browse the repository at this point in the history
Data-CAT 0.1.1
  • Loading branch information
BvB93 committed Jul 22, 2019
2 parents fa666f7 + 21bf987 commit 9454243
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 68 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ All notable changes to this project will be documented in this file.
This project adheres to `Semantic Versioning <http://semver.org/>`_.


0.1.1
*****

* Introduced a proper logger (see https://github.com/nlesc-nano/CAT/issues/46 and
https://github.com/nlesc-nano/CAT/pull/47).


[Unreleased]
************

Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


##############
Data-CAT 0.1.0
Data-CAT 0.1.1
##############

Data-CAT is a databasing framework for the Compound Attachment Tools package (CAT_).
Expand Down
4 changes: 2 additions & 2 deletions dataCAT/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from .__version__ import __version__

from .database import Database
from .database_functions import (mol_to_file, df_to_mongo_dict)
from .database_functions import (df_to_mongo_dict)


__version__ = __version__
Expand All @@ -18,5 +18,5 @@

__all__ = [
'Database',
'mol_to_file', 'df_to_mongo_dict'
'df_to_mongo_dict'
]
2 changes: 1 addition & 1 deletion dataCAT/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.0'
__version__ = '0.1.1'
9 changes: 4 additions & 5 deletions dataCAT/create_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import pandas as pd
from pymongo import MongoClient, ASCENDING

from CAT.utils import get_time
from CAT.logger import logger

__all__: List[str] = []

Expand All @@ -62,16 +62,15 @@ def _create_csv(path: str,

# Check if the database exists and has the proper keys; create it if it does not
if not isfile(path):
msg = get_time() + '{}_database.csv not found in {}, creating {} database'
print(msg.format(database, path, database))
msg = f'{database}_database.csv not found in {path}, creating {database} database'
logger.info(msg)

if database == 'ligand':
_create_csv_lig(path)
elif database == 'QD':
_create_csv_qd(path)
else:
err = "'{}' is not an accepated value for the 'database' argument"
raise ValueError(err.format(database))
raise ValueError(f"'{database}' is not an accepated value for the 'database' argument")
return path


Expand Down
21 changes: 12 additions & 9 deletions dataCAT/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from rdkit.Chem import Mol
from scm.plams import (Settings, Molecule)

from CAT.logger import logger
from CAT.mol_utils import from_rdmol
from .database_functions import (
df_to_mongo_dict, even_index, from_pdb_array, sanitize_yaml_settings, as_pdb_array
Expand Down Expand Up @@ -355,12 +356,14 @@ def __setitem__(self, key: Immutable,
if not isinstance(value, pd.DataFrame):
raise KeyError
super().__setitem__('df', value)

except KeyError:
err = ("Instance of 'pandas.DataFrame' or 'CAT.Database.DF' expected;"
" observed type: '{}'")
raise TypeError(err.format(value.__class__.__name__))
raise TypeError("Instance of 'pandas.DataFrame' or 'CAT.Database.DF' expected;"
" observed type: '{value.__class__.__name__}'")

elif key == 'df':
super().__setitem__('df', value)

else:
self['df'].__setitem__(key, value)

Expand All @@ -382,8 +385,7 @@ def _parse_database(self, database: str) -> Tuple[str, Callable]:
path = self.csv_qd
open_csv = self.OpenCsvQd
else:
err = "database={}; accepted values for database are 'ligand' and 'QD'"
raise ValueError(err.format(database))
raise ValueError(f"database={database}; accepted values for are 'ligand' and 'QD'")
return path, open_csv

def update_mongodb(self, database: Union[str, Dict[str, pd.DataFrame]] = 'ligand',
Expand Down Expand Up @@ -421,7 +423,7 @@ def update_mongodb(self, database: Union[str, Dict[str, pd.DataFrame]] = 'ligand
"""
if self.mongodb is None:
raise ValueError
raise ValueError('Database.Mongodb is None')

# Open the MongoDB database
client = MongoClient(**self.mongodb)
Expand Down Expand Up @@ -864,16 +866,17 @@ def hdf5_availability(self, timeout: float = 5.0,
Raised if **max_attempts** is exceded.
"""
warning = "OSWarning: '{}' is currently unavailable; repeating attempt in {:.0f} seconds"
err = f"'{self.hdf5}' is currently unavailable; repeating attempt in {timeout:.0f} seconds"
i = max_attempts or np.inf

while i:
try:
with h5py.File(self.hdf5, 'r+') as _:
return None # the .hdf5 file can safely be opened
except OSError as ex: # the .hdf5 file cannot be safely opened yet
print((warning).format(self.hdf5, timeout))
logger.warn(err)
error = ex
sleep(timeout)
i -= 1
raise error

raise error.__class__(error)
53 changes: 3 additions & 50 deletions dataCAT/database_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,58 +44,12 @@
from rdkit.Chem import Mol

from CAT.utils import get_template
from CAT.logger import logger
from CAT.mol_utils import from_rdmol

__all__ = ['mol_to_file', 'df_to_mongo_dict']


def mol_to_file(mol_list: Iterable[Molecule],
path: Optional[str] = None,
overwrite: bool = False,
mol_format: Collection[str] = ('xyz', 'pdb')) -> None:
"""Export all molecules in **mol_list** to .pdb and/or .xyz files.
Parameters
----------
mol_list: |list|_ [|plams.Molecule|_]
An iterable consisting of PLAMS molecules.
path : str
Optional: The path to the directory where the molecules will be stored.
Defaults to the current working directory if ``None``.
overwrite : bool
If previously generated files can be overwritten or not.
mol_format : |list|_ [|str|_]
A list of strings with the to-be exported file types.
Accepted values are ``"xyz"`` and/or ``"pdb"``.
"""
# Set the export path
path = path or getcwd()
assert isdir(path)

if not mol_format:
return None

if overwrite: # Export molecules while allowing for file overriding
for mol in mol_list:
mol_path = join(path, mol.properties.name)
if 'pdb' in mol_format:
molkit.writepdb(mol, mol_path + '.pdb')
if 'xyz' in mol_format:
mol.write(mol_path + '.xyz')

else: # Export molecules without allowing for file overriding
for mol in mol_list:
mol_path = join(path, mol.properties.name)
if 'pdb' in mol_format and not isfile(mol_path + '.pdb'):
molkit.writepdb(mol, mol_path + '.pdb')
if 'xyz' in mol_format and not isfile(mol_path + '.xyz'):
mol.write(mol_path + '.xyz')


Immutable = Union[str, int, float, frozenset, tuple] # Immutable objects


Expand Down Expand Up @@ -154,9 +108,8 @@ def _get_dict(idx: Sequence[Immutable],
return ret

if not (isinstance(df.index, pd.MultiIndex) and isinstance(df.columns, pd.MultiIndex)):
raise TypeError(
"DataFrame.index and DataFrame.columns should be instances of pandas.MultiIndex"
)
raise TypeError("DataFrame.index and DataFrame.columns should be "
"instances of pandas.MultiIndex")

idx_names = df.index.names
if as_gen:
Expand Down

0 comments on commit 9454243

Please sign in to comment.