Skip to content

Commit

Permalink
Merge pull request #13 from nlesc-nano/devel
Browse files Browse the repository at this point in the history
Data-CAT 0.1.2
  • Loading branch information
BvB93 authored Jul 29, 2019
2 parents 9454243 + 89bcf0f commit 88d7b14
Show file tree
Hide file tree
Showing 34 changed files with 1,500 additions and 1,108 deletions.
37 changes: 37 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
sudo: false
language:
python: 3.7

env:
global:
- COMMIT_AUTHOR_EMAIL: b.f.van.beek@vu.nl

install:
# Install python
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
- conda config --set always_yes yes --set changeps1 no --set auto_update_conda False
- conda update -q conda

# Useful for debugging any issues with conda
- conda info -a

# Install virtual enviroment
- conda create -n CAT python=3.7
- conda install -n CAT -c conda-forge h5py rdkit
- source activate CAT

# Install tests, data-CAT and nano-CAT
- pip install .[test]
- pip install cat@git+https://github.com/nlesc-nano/CAT@devel --upgrade

script:
# Run the unitary tests excluding the expensive computations
- pytest -m "not (slow or long)" --cov=CAT tests
- coverage xml && coverage report -m

branches:
only:
- master
9 changes: 9 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ Change Log
All notable changes to this project will be documented in this file.
This project adheres to `Semantic Versioning <http://semver.org/>`_.

0.1.2
*****

* Updated many ``__str__`` and ``__repr__`` methods.
* Added the ``Database.__eq__`` method.
* Moved context managers to ``dataCAT.context_managers``
* Moved (and renamed) the ``DF()`` class to ``dataCAT.df_collection.DFCollection()``.
* Added more tests.


0.1.1
*****
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


##############
Data-CAT 0.1.1
Data-CAT 0.1.2
##############

Data-CAT is a databasing framework for the Compound Attachment Tools package (CAT_).
Expand Down
2 changes: 1 addition & 1 deletion dataCAT/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.1'
__version__ = '0.1.2'
249 changes: 249 additions & 0 deletions dataCAT/context_managers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
"""
dataCAT.context_managers
========================
A module which holds context managers for the :class:`.Database` class.
Index
-----
.. currentmodule:: dataCAT.context_managers
.. autosummary::
MetaManager
OpenYaml
OpenLig
OpenQD
API
---
.. autoclass:: MetaManager
:members:
.. autoclass:: OpenYaml
.. autoclass:: OpenLig
.. autoclass:: OpenQD
"""

from os import getcwd, sep
from os.path import basename
from typing import (Callable, Optional, Any)
from contextlib import AbstractContextManager
from dataclasses import dataclass

import yaml
import pandas as pd

from scm.plams import Settings

from .df_collection import get_df_collection

__all__ = ['MetaManager', 'OpenYaml', 'OpenLig', 'OpenQD']


@dataclass(frozen=True)
class MetaManager:
"""A wrapper for context managers.
Has a single important method, :meth:`MetaManager.open`,
which calls and returns the context manager stored in :attr:`MetaManager.manager`.
Note
----
:attr:`MetaManager.filename` will be the first positional argument provided
to :attr:`MetaManager.manager`.
Paramaters
----------
filename : str
The path+filename of a database component
See :attr:`MetaManager.filename`.
manager : |type|_ [|AbstractContextManager|_]
A type object of a context manager.
TThe first positional argument of the context manager should be the filename.
See :attr:`MetaManager.manager`.
Attributes
----------
filename : str
The path+filename of a database component.
manager : |type|_ [|AbstractContextManager|_]
A type object of a context manager.
The first positional argument of the context manager should be the filename.
"""

filename: str
manager: Callable[..., AbstractContextManager]

def __repr__(self) -> str:
filename = repr(f'...{sep}{basename(self.filename)}')
return f'{self.__class__.__name__}(filename={filename}, manager={repr(self.manager)})'

def __str__(self) -> str:
args = self.__class__.__name__, repr(self.filename), repr(self.manager)
return '{}(\n filename = {},\n manager = {}\n)'.format(*args)

def open(self, *args: Any, **kwargs: Any) -> AbstractContextManager:
"""Call and return :attr:`MetaManager.manager`."""
return self.manager(self.filename, *args, **kwargs)


class OpenYaml(AbstractContextManager):
"""Context manager for opening and closing job settings (:attr:`.Database.yaml`).
Parameters
----------
filename : str
The path+filename to the database component.
write : bool
Whether or not the database file should be updated after closing this instance.
Attributes
----------
filename : str
The path+filename to the database component.
write : bool
Whether or not the database file should be updated after closing this instance.
settings : |None|_ or |plams.Settings|_
An attribute for (temporary) storing the opened .yaml file
(:attr:`OpenYaml.filename`) as :class:`.Settings` instance.
"""

def __init__(self, filename: Optional[str] = None,
write: bool = True) -> None:
"""Initialize the :class:`.OpenYaml` context manager."""
self.filename: str = filename or getcwd()
self.write: bool = write
self.settings = None

def __enter__(self) -> Settings:
"""Open the :class:`.OpenYaml` context manager, importing :attr:`.settings`."""
with open(self.filename, 'r') as f:
self.settings = Settings(yaml.load(f, Loader=yaml.FullLoader))
return self.settings

def __exit__(self, exc_type, exc_value, traceback) -> None:
"""Close the :class:`.OpenYaml` context manager, exporting :attr:`.settings`."""
if self.write:
yml_dict = self.settings.as_dict()

# A fix for Settings.as_dict() not functioning when containg a lists of Settings
for key in yml_dict:
for i, value in enumerate(yml_dict[key]):
if isinstance(value, Settings):
yml_dict[key][i] = value.as_dict()

# Write to the .yaml file
with open(self.filename, 'w') as f:
f.write(yaml.dump(yml_dict, default_flow_style=False, indent=4))
self.settings = None
assert self.settings is None


class OpenLig(AbstractContextManager):
"""Context manager for opening and closing the ligand database (:attr:`.Database.csv_lig`).
Parameters
----------
filename : str
The path+filename to the database component.
write : bool
Whether or not the database file should be updated after closing this instance.
Attributes
----------
filename : str
The path+filename to the database component.
write : bool
Whether or not the database file should be updated after closing this instance.
df : |None|_ or |pd.DataFrame|_
An attribute for (temporary) storing the opened .csv file
(see :attr:`OpenLig.filename`) as a :class:`.DataFrame` instance.
"""

def __init__(self, filename: Optional[str] = None,
write: bool = True) -> None:
"""Initialize the :class:`.OpenLig` context manager."""
self.filename: str = filename or getcwd()
self.write: bool = write
self.df: Optional['DFCollection'] = None

def __enter__(self) -> 'DFCollection':
"""Open the :class:`.OpenLig` context manager, importing :attr:`.df`."""
# Open the .csv file
dtype = {'hdf5 index': int, 'formula': str, 'settings': str, 'opt': bool}
self.df = df = get_df_collection(
pd.read_csv(self.filename, index_col=[0, 1], header=[0, 1], dtype=dtype)
)

# Fix the columns
idx_tups = [(i, '') if 'Unnamed' in j else (i, j) for i, j in df.columns]
df.columns = pd.MultiIndex.from_tuples(idx_tups, names=df.columns.names)
return df

def __exit__(self, exc_type, exc_value, traceback) -> None:
"""Close the :class:`.OpenLig` context manager, exporting :attr:`.df`."""
if self.write:
self.df.to_csv(self.filename)
self.df = None


class OpenQD(AbstractContextManager):
"""Context manager for opening and closing the QD database (:attr:`.Database.csv_qd`).
Parameters
----------
filename : str
The path+filename to the database component.
write : bool
Whether or not the database file should be updated after closing this instance.
Attributes
----------
filename : str
The path+filename to the database component.
write : bool
Whether or not the database file should be updated after closing this instance.
df : |None|_ or |pd.DataFrame|_
An attribute for (temporary) storing the opened .csv file
(:attr:`OpenQD.filename`) as :class:`.DataFrame` instance.
"""

def __init__(self, filename: Optional[str] = None,
write: bool = True) -> None:
"""Initialize the :class:`.OpenQD` context manager."""
self.filename: str = filename or getcwd()
self.write: bool = write
self.df: Optional['DFCollection'] = None

def __enter__(self) -> 'DFCollection':
"""Open the :class:`.OpenQD` context manager, importing :attr:`.df`."""
# Open the .csv file
dtype = {'hdf5 index': int, 'settings': str, 'opt': bool}
self.df = df = get_df_collection(
pd.read_csv(self.filename, index_col=[0, 1, 2, 3], header=[0, 1], dtype=dtype)
)

# Fix the columns
idx_tups = [(i, '') if 'Unnamed' in j else (i, j) for i, j in df.columns]
df.columns = pd.MultiIndex.from_tuples(idx_tups, names=df.columns.names)
return df

def __exit__(self, exc_type, exc_value, traceback) -> None:
"""Close the :class:`.OpenQD` context manager, exporting :attr:`.df`."""
if self.write:
self.df.to_csv(self.filename)
self.df = None
22 changes: 10 additions & 12 deletions dataCAT/create_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,18 @@ def _create_csv(path: str,
The absolute path to the ligand or QD database.
"""
path = join(path, database + '_database.csv')
filename = join(path, database + '_database.csv')

# Check if the database exists and has the proper keys; create it if it does not
if not isfile(path):
msg = f'{database}_database.csv not found in {path}, creating {database} database'
logger.info(msg)

if not isfile(filename):
if database == 'ligand':
_create_csv_lig(path)
_create_csv_lig(filename)
elif database == 'QD':
_create_csv_qd(path)
_create_csv_qd(filename)
else:
raise ValueError(f"'{database}' is not an accepated value for the 'database' argument")
return path
logger.info(f'{database}_database.csv not found in {path}, creating {database} database')
return filename


def _create_csv_lig(filename: str) -> None:
Expand Down Expand Up @@ -186,13 +184,13 @@ def _create_yaml(path: str,
"""
# Define arguments
path = join(path, name)
filename = join(path, name)

# Create a new .yaml file if it does not yet exist
if not isfile(path):
with open(path, 'w') as f:
if not isfile(filename):
with open(filename, 'w') as f:
f.write(yaml.dump({None: [None]}, default_flow_style=False, indent=4))
return path
return filename


def _create_mongodb(host: str = 'localhost',
Expand Down
Loading

0 comments on commit 88d7b14

Please sign in to comment.