diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d37755f..05ecc4a 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,11 @@ All notable changes to this project will be documented in this file.
This project adheres to `Semantic Versioning `_.
+0.2.2
+*****
+* Updated the documentation (see https://github.com/nlesc-nano/CAT/pull/123).
+
+
0.2.1
*****
* Store the ``__version__`` of CAT, Nano-CAT and Data-CAT in the hdf5 file.
diff --git a/README.rst b/README.rst
index 1fec02e..61f8d39 100644
--- a/README.rst
+++ b/README.rst
@@ -15,7 +15,7 @@
##############
-Data-CAT 0.2.1
+Data-CAT 0.2.2
##############
Data-CAT is a databasing framework for the Compound Attachment Tools package (CAT_).
diff --git a/dataCAT/__version__.py b/dataCAT/__version__.py
index e40844b..722de2a 100644
--- a/dataCAT/__version__.py
+++ b/dataCAT/__version__.py
@@ -1,3 +1,3 @@
"""The **Data-CAT** version."""
-__version__ = '0.2.1'
+__version__ = '0.2.2'
diff --git a/dataCAT/context_managers.py b/dataCAT/context_managers.py
index 59d328e..dd74fa5 100644
--- a/dataCAT/context_managers.py
+++ b/dataCAT/context_managers.py
@@ -58,7 +58,7 @@ def filename(self) -> AnyStr:
@property
def write(self) -> bool:
- """:class:`bool`: Get whether or not :attr:`~FileManagerABC.filename` should be written to when closing the context manager.""" # noqa: E501
+ """:class:`bool`: Get whether or not :attr:`.filename` should be written to when closing the context manager.""" # noqa: E501
return self._write
@final
@@ -127,17 +127,7 @@ def __exit__(self, exc_type: Optional[Type[BaseException]],
class OpenYaml(FileManagerABC[AnyStr, Settings]):
- """Context manager for opening and closing job settings (:attr:`.Database.yaml`).
-
- Attributes
- ----------
- filename : str
- The path+filename to the database component.
-
- write : bool
- Whether or not the database file should be updated after closing this instance.
-
- """
+ """Context manager for opening and closing job settings (:attr:`.Database.yaml`)."""
def __enter__(self) -> Settings:
"""Open the :class:`.OpenYaml` context manager, importing :attr:`.settings`."""
@@ -157,7 +147,7 @@ def __exit__(self, exc_type: Optional[Type[BaseException]],
class OpenLig(FileManagerABC[AnyStr, DFProxy]):
- """Context manager for opening and closing the ligand database (:attr:`.Database.csv_lig`)."""
+ """Context manager for opening and closing the ligand database (:attr:`Database.csv_lig`)."""
def __enter__(self) -> DFProxy:
"""Open the :class:`.OpenLig` context manager, importing :attr:`.df`."""
diff --git a/dataCAT/database.py b/dataCAT/database.py
index 1642ea3..f4f8404 100644
--- a/dataCAT/database.py
+++ b/dataCAT/database.py
@@ -62,31 +62,7 @@ class JobRecipe(TypedDict):
class Database:
- """The Database class.
-
- Attributes
- ----------
- dirname : :class:`str`
- The path+filename of the directory containing all database components.
- csv_lig : :data:`Callable[..., ContextManager]`
- A function for accesing the context manager for opening
- the .csv file containing all ligand related results.
- csv_qd : :data:`Callable[..., ContextManager]`
- A function for accesing the context manager for opening
- the .csv file containing all quantum dot related results.
- yaml : :data:`Callable[..., ContextManager]`
- A function for accesing the context manager for opening
- the .yaml file containing all job settings.
- hdf5 : :data:`Callable[..., ContextManager]`
- A function for accesing the context manager for opening
- the .hdf5 file containing all structures (as partiallize de-serialized .pdb files).
- mongodb : :class:`Mapping[str, Any]`, optional
- Optional: A dictionary with keyword arguments for :class:`pymongo.MongoClient`.
- Defaults to :data:`None` if a :exc:`~pymongo.errors.ServerSelectionTimeoutError` is raised
- when failing to contact the host.
- See the **host**, **port** and **kwargs** parameter.
-
- """ # noqa: E501
+ """The Database class."""
__slots__ = ('__weakref__', '_dirname', '_csv_lig', '_csv_qd', '_yaml',
'_hdf5', '_mongodb', '_hash')
@@ -98,27 +74,27 @@ def dirname(self) -> str:
@property
def csv_lig(self) -> 'partial[OpenLig]':
- """Get the :attr:`Database.csv_lig` context manager."""
+ """:data:`Callable[..., dataCAT.OpenLig]`: Get a function for constructing an :class:`dataCAT.OpenLig` context manager.""" # noqa: E501
return self._csv_lig
@property
def csv_qd(self) -> 'partial[OpenQD]':
- """Get the :attr:`Database.csv_qd` context manager."""
+ """:data:`Callable[..., dataCAT.OpenQD]`: Get a function for constructing an :class:`dataCAT.OpenQD` context manager.""" # noqa: E501
return self._csv_qd
@property
def yaml(self) -> 'partial[OpenYaml]':
- """Get the :attr:`Database.yaml` context manager."""
+ """:data:`Callable[..., dataCAT.OpenYaml]`: Get a function for constructing an :class:`dataCAT.OpenYaml` context manager.""" # noqa: E501
return self._yaml
@property
def hdf5(self) -> 'partial[h5py.File]':
- """Get the :attr:`Database.hdf5` context manager."""
+ """:data:`Callable[..., h5py.File]`: Get a function for constructing a :class:`h5py.File` context manager.""" # noqa: E501
return self._hdf5
@property
def mongodb(self) -> Optional[Mapping[str, Any]]:
- """Get the :attr:`Database.mongodb` context manager."""
+ """:class:`Mapping[str, Any]`, optional: Get a mapping with keyword arguments for :class:`pymongo.MongoClient`.""" # noqa: E501
return self._mongodb
def __init__(self, path: Union[str, 'PathLike[str]', None] = None,
@@ -143,7 +119,7 @@ def __init__(self, path: Union[str, 'PathLike[str]', None] = None,
port number on which to connect.
See :attr:`Database.mongodb`.
**kwargs
- Optional keyword argument for `pymongo.MongoClient `_.
+ Optional keyword argument for :class:`pymongo.MongoClient`.
See :attr:`Database.mongodb`.
""" # noqa: E501
@@ -247,7 +223,7 @@ def _parse_database(self, database): # noqa: E301
return self.csv_qd
raise ValueError(f"database={database!r}; accepted values for are 'ligand' and 'qd'")
- def update_mongodb(self, database: Union[str, Dict[str, pd.DataFrame]] = 'ligand',
+ def update_mongodb(self, database: Union[str, Mapping[str, pd.DataFrame]] = 'ligand',
overwrite: bool = False) -> None:
"""Export ligand or qd results to the MongoDB database.
@@ -270,16 +246,18 @@ def update_mongodb(self, database: Union[str, Dict[str, pd.DataFrame]] = 'ligand
Parameters
----------
- database : |str|_ or |dict|_ [|str|_, |pd.DataFrame|_]
+ database : :class:`str` or :class:`Mapping[str, pandas.DataFrame]`
The type of database.
Accepted values are ``"ligand"`` and ``"qd"``,
opening :attr:`Database.csv_lig` and :attr:`Database.csv_qd`, respectivelly.
Alternativelly, a dictionary with the database name and a matching DataFrame
can be passed directly.
-
- overwrite : bool
+ overwrite : :class:`bool`
Whether or not previous entries can be overwritten or not.
+
+ :rtype: :data:`None`
+
"""
if self.mongodb is None:
raise ValueError('Database.Mongodb is None')
@@ -288,8 +266,8 @@ def update_mongodb(self, database: Union[str, Dict[str, pd.DataFrame]] = 'ligand
client = MongoClient(**self.mongodb)
mongo_db = client.cat_database
- if isinstance(database, dict):
- database, db = next(iter(database.items()))
+ if callable(getattr(database, 'items', None)):
+ database, db = next(iter(database.items())) # type: ignore
dict_gen = df_to_mongo_dict(db)
idx_keys = db.index.names
collection = mongo_db.ligand_database if database == 'ligand' else mongo_db.qd_database
@@ -336,28 +314,26 @@ def update_csv(self, df: pd.DataFrame,
Parameters
----------
- df : |pd.DataFrame|_
+ df : :class:`pandas.DataFrame`
A dataframe of new (potential) database entries.
-
- database : str
+ database : :class:`str`
The type of database; accepted values are ``"ligand"`` (:attr:`Database.csv_lig`)
and ``"qd"`` (:attr:`Database.csv_qd`).
-
- columns : |Sequence|_
- Optional: A list of column keys in **df** which
+ columns : :class:`~collections.abc.Sequence`, optional
+ Optional: A sequence of column keys in **df** which
(potentially) are to be added to this instance.
- If ``None``: Add all columns.
-
- overwrite : |bool|_
+ If :data:`None` Add all columns.
+ overwrite : :class:`bool`
Whether or not previous entries can be overwritten or not.
-
- job_recipe : |plams.Settings|_
- Optional: A :class:`.Settings` instance with settings specific to a job.
-
+ job_recipe : :class:`plams.Settings`
+ Optional: A Settings instance with settings specific to a job.
status : :class:`str`, optional
A descriptor of the status of the moleculair structures.
Set to ``"optimized"`` to treat them as optimized geometries.
+
+ :rtype: :data:`None`
+
"""
# Operate on either the ligand or quantum dot database
manager = self._parse_database(database)
@@ -408,14 +384,29 @@ def update_csv(self, df: pd.DataFrame,
def update_yaml(self, job_recipe: Mapping[KT, JobRecipe]) -> Dict[KT, str]:
"""Update :attr:`Database.yaml` with (potentially) new user provided settings.
+ Examples
+ --------
+ .. code:: python
+
+ >>> from dataCAT import Database
+
+ >>> db = Database(...) # doctest: +SKIP
+ >>> job_recipe = {
+ ... 'job1': {'key': 'ADFJob', 'value': ...},
+ ... 'job2': {'key': 'AMSJob', 'value': ...}
+ ... }
+
+ >>> db.update_yaml(job_recipe) # doctest: +SKIP
+
+
Parameters
----------
- job_recipe : |plams.Settings|_
- A settings object with one or more settings specific to a job.
+ job_recipe : :class:`~collections.abc.Mapping`
+ A mapping with the settings of one or more jobs.
Returns
-------
- |dict|_
+ :class:`Dict[str, str]`
A dictionary with the column names as keys and the key for :attr:`Database.yaml`
as matching values.
@@ -448,29 +439,26 @@ def update_yaml(self, job_recipe: Mapping[KT, JobRecipe]) -> Dict[KT, str]:
def update_hdf5(self, df: pd.DataFrame,
database: Union[Ligand, QD] = 'ligand',
overwrite: bool = False,
- status: Optional[str] = None):
+ status: Optional[str] = None) -> pd.Series:
"""Export molecules (see the ``"mol"`` column in **df**) to the structure database.
Returns a series with the :attr:`Database.hdf5` indices of all new entries.
Parameters
----------
- df : |pd.DataFrame|_
+ df : :class:`pandas.DataFrame`
A dataframe of new (potential) database entries.
-
- database : str
+ database : :class:`str`
The type of database; accepted values are ``"ligand"`` and ``"qd"``.
-
- overwrite : bool
+ overwrite : :class:`bool`
Whether or not previous entries can be overwritten or not.
-
status : :class:`str`, optional
A descriptor of the status of the moleculair structures.
Set to ``"optimized"`` to treat them as optimized geometries.
Returns
-------
- |pd.Series|_
+ :class:`pandas.Series`
A series with the indices of all new molecules in :attr:`Database.hdf5`.
"""
@@ -573,29 +561,27 @@ def from_csv(self, df: pd.DataFrame, database: Union[Ligand, QD] = 'ligand',
get_mol: bool = True, inplace: bool = True) -> Optional[pd.Series]:
"""Pull results from :attr:`Database.csv_lig` or :attr:`Database.csv_qd`.
- Performs in inplace update of **df** if **inplace** = ``True``, thus returing ``None``.
+ Performs in inplace update of **df** if **inplace** = :data:`True`,
+ thus returing :data:`None`.
Parameters
----------
- df : |pd.DataFrame|_
+ df : :class:`pandas.DataFrame`
A dataframe of new (potential) database entries.
-
- database : str
+ database : :class:`str`
The type of database; accepted values are ``"ligand"`` and ``"qd"``.
-
- get_mol : bool
+ get_mol : :class:`bool`
Attempt to pull preexisting molecules from the database.
See the **inplace** argument for more details.
-
- inplace : bool
- If ``True`` perform an inplace update of the ``"mol"`` column in **df**.
+ inplace : :class:`bool`
+ If :data:`True` perform an inplace update of the ``"mol"`` column in **df**.
Otherwise return a new series of PLAMS molecules.
Returns
-------
- |pd.Series|_ [|plams.Molecule|_]
- Optional: A Series of PLAMS molecules if **get_mol** = ``True``
- and **inplace** = ``False``.
+ :class:`pandas.Series`, optional
+ Optional: A Series of PLAMS molecules if **get_mol** = :data:`True`
+ and **inplace** = :data:`False`.
"""
# Operate on either the ligand or quantum dot database
@@ -616,24 +602,22 @@ def _get_csv_mol(self, df: pd.DataFrame,
inplace: bool = True) -> Optional[pd.Series]:
"""A method which handles the retrieval and subsequent formatting of molecules.
- Called internally by :meth:`.Database.from_csv`.
+ Called internally by :meth:`Database.from_csv`.
Parameters
----------
- df : |pd.DataFrame|_
+ df : :class:`pandas.DataFrame`
A dataframe of new (potential) database entries.
-
- database : str
+ database : :class:`str`
The type of database; accepted values are ``"ligand"`` and ``"qd"``.
-
- inplace : bool
- If ``True`` perform an inplace update of the ``("mol", "")`` column in **df**.
+ inplace : :class:`bool`
+ If :data:`True` perform an inplace update of the ``("mol", "")`` column in **df**.
Otherwise return a new series of PLAMS molecules.
Returns
-------
- |pd.Series|_ [|plams.Molecule|_]
- Optional: A Series of PLAMS molecules if **inplace** is ``False``.
+ :class:`pandas.Series`, optional
+ Optional: A Series of PLAMS molecules if **inplace** is :data:`False`.
"""
# Sort and find all valid HDF5 indices
@@ -671,21 +655,16 @@ def from_hdf5(self, index: Union[slice, Sequence[int]],
Parameters
----------
- index : |list|_ [|int|_]
+ index : :class:`Sequence[int]` or :class:`slice`
The indices of the to be retrieved structures.
-
- database : str
+ database : :class:`str`
The type of database; accepted values are ``"ligand"`` and ``"qd"``.
-
- rdmol : bool
- If ``True``, return an RDKit molecule instead of a PLAMS molecule.
-
- close : bool
- If the database component (:attr:`Database.hdf5`) should be closed afterwards.
+ rdmol : :class:`bool`
+ If :data:`True`, return an RDKit molecule instead of a PLAMS molecule.
Returns
-------
- |list|_ [|plams.Molecule|_ or |rdkit.Chem.Mol|_]
+ :class:`List[plams.Molecule]` or :class:`List[rdkit.Mol]`
A list of PLAMS or RDKit molecules.
"""
@@ -706,27 +685,24 @@ def hdf5_availability(self, timeout: float = 5.0,
"""Check if a .hdf5 file is opened by another process; return once it is not.
If two processes attempt to simultaneously open a single hdf5 file then
- h5py will raise an :class:`OSError`.
+ h5py will raise an :exc:`OSError`.
The purpose of this method is ensure that a .hdf5 file is actually closed,
thus allowing the :meth:`Database.from_hdf5` method to safely access **filename** without
- the risk of raising an :class:`OSError`.
+ the risk of raising an :exc:`OSError`.
Parameters
----------
- filename : str
- The path+filename of the hdf5 file.
-
- timeout : float
+ timeout : :class:`float`
Time timeout, in seconds, between subsequent attempts of opening **filename**.
-
- max_attempts : int
+ max_attempts : :class:`int`, optional
Optional: The maximum number attempts for opening **filename**.
- If the maximum number of attempts is exceeded, raise an ``OSError``.
+ If the maximum number of attempts is exceeded, raise an :exc:`OSError`.
+ Setting this value to :data:`None` will set the number of attempts to unlimited.
Raises
------
- OSError
+ :exc:`OSError`
Raised if **max_attempts** is exceded.
"""
diff --git a/dataCAT/df_proxy.py b/dataCAT/df_proxy.py
index 2a13d95..e5d5774 100644
--- a/dataCAT/df_proxy.py
+++ b/dataCAT/df_proxy.py
@@ -95,7 +95,14 @@ def __new__(mcls: Type[TT], name: str, bases: Tuple[type, ...], # noqa: N804
@final
class DFProxy(metaclass=_DFMeta):
- """A mutable wrapper for holding dataframes."""
+ """A mutable wrapper providing a view of the underlying dataframes.
+
+ Attributes
+ ----------
+ ndframe : :class:`pandas.DataFrame`
+ The embedded DataFrame.
+
+ """
__slots__ = ('__weakref__', 'ndframe')
@@ -110,7 +117,7 @@ def __init__(self, ndframe: pd.DataFrame) -> None:
Parameters
----------
- ndframe : |pd.DataFrame|_
+ ndframe : :class:`pandas.DataFrame`
A Pandas DataFrame (see :attr:`DFProxy.df`).