From 89084cd55538b25fbb520154a08bb389e1db4b7d Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Thu, 5 Sep 2024 18:33:06 -0400 Subject: [PATCH 1/4] qcsk: copy models/v1 to top-level for Levi's PR to act upon --- qcelemental/models/align.py | 165 +++ qcelemental/models/basemodels.py | 205 ++++ qcelemental/models/basis.py | 231 +++++ qcelemental/models/common_models.py | 145 +++ qcelemental/models/molecule.py | 1451 +++++++++++++++++++++++++++ qcelemental/models/procedures.py | 276 +++++ qcelemental/models/results.py | 827 +++++++++++++++ qcelemental/models/types.py | 40 + 8 files changed, 3340 insertions(+) create mode 100644 qcelemental/models/align.py create mode 100644 qcelemental/models/basemodels.py create mode 100644 qcelemental/models/basis.py create mode 100644 qcelemental/models/common_models.py create mode 100644 qcelemental/models/molecule.py create mode 100644 qcelemental/models/procedures.py create mode 100644 qcelemental/models/results.py create mode 100644 qcelemental/models/types.py diff --git a/qcelemental/models/align.py b/qcelemental/models/align.py new file mode 100644 index 00000000..ca09504f --- /dev/null +++ b/qcelemental/models/align.py @@ -0,0 +1,165 @@ +from typing import Optional + +import numpy as np + +try: + from pydantic.v1 import Field, validator +except ImportError: # Will also trap ModuleNotFoundError + from pydantic import Field, validator + +from ...util import blockwise_contract, blockwise_expand +from .basemodels import ProtoModel +from .types import Array + +__all__ = ["AlignmentMill"] + + +class AlignmentMill(ProtoModel): + """Facilitates the application of the simple transformation operations + defined by ``shift``, ``rotation``, ``atommap`` arrays and ``mirror`` + boolean as recipe to the data structures + describing Cartesian molecular coordinates. Attaches functions to + transform the geometry, element list, gradient, etc. to the + AlignmentRecipe. When `mirror` attribute (defaults to False) active, + then molecular system can be substantively changed by procedure. + + """ + + shift: Optional[Array[float]] = Field(None, description="Translation array (3,) for coordinates.") # type: ignore + rotation: Optional[Array[float]] = Field(None, description="Rotation array (3, 3) for coordinates.") # type: ignore + atommap: Optional[Array[int]] = Field(None, description="Atom exchange map (nat,) for coordinates.") # type: ignore + mirror: bool = Field(False, description="Do mirror invert coordinates?") + + class Config: + force_skip_defaults = True + + @validator("shift") + def _must_be_3(cls, v, values, **kwargs): + try: + v = v.reshape(3) + except (ValueError, AttributeError): + raise ValueError("Shift must be castable to shape (3,)!") + return v + + @validator("rotation") + def _must_be_33(cls, v, values, **kwargs): + try: + v = v.reshape(3, 3) + except (ValueError, AttributeError): + raise ValueError("Rotation must be castable to shape (3, 3)!") + return v + + ### Non-Pydantic API functions + + def pretty_print(self, label: str = "") -> str: + width = 40 + text = [] + text.append("-" * width) + text.append("{:^{width}}".format("AlignmentMill", width=width)) + if label: + text.append("{:^{width}}".format(label, width=width)) + text.append("-" * width) + text.append("Mirror: {}".format(self.mirror)) + text.append("Atom Map: {}".format(self.atommap)) + text.append("Shift: {}".format(self.shift)) + text.append("Rotation:") + text.append("{}".format(self.rotation)) + text.append("-" * width) + return "\n".join(x.rstrip() for x in text) + + def align_coordinates(self, geom, *, reverse=False) -> Array: + """suitable for geometry or displaced geometry""" + + algeom = np.copy(geom) + if reverse: + algeom = algeom.dot(self.rotation) + algeom = algeom + self.shift + if self.mirror: + algeom[:, 1] *= -1.0 + else: + if self.mirror: + algeom[:, 1] *= -1.0 + algeom = algeom - self.shift + algeom = algeom.dot(self.rotation) + algeom = algeom[self.atommap, :] + # mirror-wise, rsm/msr == rms/msr + + return algeom + + def align_atoms(self, ats): + """suitable for masses, symbols, Zs, etc.""" + + return ats[self.atommap] + + def align_vector(self, vec): + """suitable for vector attached to molecule""" + + # sensible? TODO + # alvec = np.copy(vec) + # if self.mirror: + # alvec[:, 1] *= -1 + return vec.dot(self.rotation) + + def align_gradient(self, grad) -> Array: + """suitable for vector system attached to atoms""" + + algrad = np.copy(grad) + if self.mirror: + algrad[:, 1] *= -1.0 + algrad = algrad.dot(self.rotation) + algrad = algrad[self.atommap] + + return algrad + + def align_hessian(self, hess) -> Array: + blocked_hess = blockwise_expand(hess, (3, 3), False) + alhess = np.zeros_like(blocked_hess) + + nat = blocked_hess.shape[0] + for iat in range(nat): + for jat in range(nat): + alhess[iat, jat] = (self.rotation.T).dot(blocked_hess[iat, jat].dot(self.rotation)) + + alhess = alhess[np.ix_(self.atommap, self.atommap)] + + alhess = blockwise_contract(alhess) + return alhess + + def align_vector_gradient(self, mu_derivatives): + """Align the nuclear gradients of vector components (e.g. dipole derivatives).""" + # Input data is assumed to be organized into outermost x, y, z vector components. + # Organize derivatives for each atom into 3x3 and transform it. + mu_x, mu_y, mu_z = mu_derivatives + nat = mu_x.shape[0] // 3 + al_mu = np.zeros((3, 3 * nat)) + + Datom = np.zeros((3, 3)) # atom whose nuclear derivatives are taken + for at in range(nat): + Datom.fill(0) + Datom[0, :] = mu_x[3 * self.atommap[at] : 3 * self.atommap[at] + 3] + Datom[1, :] = mu_y[3 * self.atommap[at] : 3 * self.atommap[at] + 3] + Datom[2, :] = mu_z[3 * self.atommap[at] : 3 * self.atommap[at] + 3] + Datom[:] = np.dot(self.rotation.T, np.dot(Datom, self.rotation)) + al_mu[0, 3 * at : 3 * at + 3] = Datom[0, :] + al_mu[1, 3 * at : 3 * at + 3] = Datom[1, :] + al_mu[2, 3 * at : 3 * at + 3] = Datom[2, :] + return al_mu + + def align_system(self, geom, mass, elem, elez, uniq, *, reverse: bool = False): + """For AlignmentRecipe `ar`, apply its translation, rotation, and atom map.""" + + nugeom = self.align_coordinates(geom, reverse=reverse) + numass = self.align_atoms(mass) + nuelem = self.align_atoms(elem) + nuelez = self.align_atoms(elez) + nuuniq = self.align_atoms(uniq) + + return nugeom, numass, nuelem, nuelez, nuuniq + + def align_mini_system(self, geom, uniq, *, reverse: bool = False): + """For AlignmentRecipe `ar`, apply its translation, rotation, and atom map.""" + + nugeom = self.align_coordinates(geom, reverse=reverse) + nuuniq = self.align_atoms(uniq) + + return nugeom, nuuniq diff --git a/qcelemental/models/basemodels.py b/qcelemental/models/basemodels.py new file mode 100644 index 00000000..2fecef26 --- /dev/null +++ b/qcelemental/models/basemodels.py @@ -0,0 +1,205 @@ +import json +from pathlib import Path +from typing import Any, Dict, Optional, Set, Union + +import numpy as np + +try: + from pydantic.v1 import BaseSettings # remove when QCFractal merges `next` + from pydantic.v1 import BaseModel +except ImportError: # Will also trap ModuleNotFoundError + from pydantic import BaseSettings # remove when QCFractal merges `next` + from pydantic import BaseModel + +from qcelemental.util import deserialize, serialize +from qcelemental.util.autodocs import AutoPydanticDocGenerator # remove when QCFractal merges `next` + + +def _repr(self) -> str: + return f'{self.__repr_name__()}({self.__repr_str__(", ")})' + + +class ProtoModel(BaseModel): + class Config: + allow_mutation: bool = False + extra: str = "forbid" + json_encoders: Dict[str, Any] = {np.ndarray: lambda v: v.flatten().tolist()} + serialize_default_excludes: Set = set() + serialize_skip_defaults: bool = False + force_skip_defaults: bool = False + + def __init_subclass__(cls, **kwargs) -> None: + super().__init_subclass__(**kwargs) + cls.__base_doc__ = "" # remove when QCFractal merges `next` + + if "pydantic" in cls.__repr__.__module__: + cls.__repr__ = _repr + + if "pydantic" in cls.__str__.__module__: + cls.__str__ = _repr + + @classmethod + def parse_raw(cls, data: Union[bytes, str], *, encoding: Optional[str] = None) -> "ProtoModel": # type: ignore + r""" + Parses raw string or bytes into a Model object. + + Parameters + ---------- + data + A serialized data blob to be deserialized into a Model. + encoding + The type of the serialized array, available types are: {'json', 'json-ext', 'msgpack-ext', 'pickle'} + + Returns + ------- + Model + The requested model from a serialized format. + """ + + if encoding is None: + if isinstance(data, str): + encoding = "json" + elif isinstance(data, bytes): + encoding = "msgpack-ext" + else: + raise TypeError("Input is neither str nor bytes, please specify an encoding.") + + if encoding.endswith(("json", "javascript", "pickle")): + return super().parse_raw(data, content_type=encoding) + elif encoding in ["msgpack-ext", "json-ext", "msgpack"]: + obj = deserialize(data, encoding) + else: + raise TypeError(f"Content type '{encoding}' not understood.") + + return cls.parse_obj(obj) + + @classmethod + def parse_file(cls, path: Union[str, Path], *, encoding: Optional[str] = None) -> "ProtoModel": # type: ignore + r"""Parses a file into a Model object. + + Parameters + ---------- + path + The path to the file. + encoding + The type of the files, available types are: {'json', 'msgpack', 'pickle'}. Attempts to + automatically infer the file type from the file extension if None. + + Returns + ------- + Model + The requested model from a serialized format. + + """ + path = Path(path) + if encoding is None: + if path.suffix in [".json", ".js"]: + encoding = "json" + elif path.suffix in [".msgpack"]: + encoding = "msgpack-ext" + elif path.suffix in [".pickle"]: + encoding = "pickle" + else: + raise TypeError("Could not infer `encoding`, please provide a `encoding` for this file.") + + return cls.parse_raw(path.read_bytes(), encoding=encoding) + + def dict(self, **kwargs) -> Dict[str, Any]: + encoding = kwargs.pop("encoding", None) + + kwargs["exclude"] = ( + kwargs.get("exclude", None) or set() + ) | self.__config__.serialize_default_excludes # type: ignore + kwargs.setdefault("exclude_unset", self.__config__.serialize_skip_defaults) # type: ignore + if self.__config__.force_skip_defaults: # type: ignore + kwargs["exclude_unset"] = True + + data = super().dict(**kwargs) + + if encoding is None: + return data + elif encoding == "json": + return json.loads(serialize(data, encoding="json")) + else: + raise KeyError(f"Unknown encoding type '{encoding}', valid encoding types: 'json'.") + + def serialize( + self, + encoding: str, + *, + include: Optional[Set[str]] = None, + exclude: Optional[Set[str]] = None, + exclude_unset: Optional[bool] = None, + exclude_defaults: Optional[bool] = None, + exclude_none: Optional[bool] = None, + ) -> Union[bytes, str]: + r"""Generates a serialized representation of the model + + Parameters + ---------- + encoding + The serialization type, available types are: {'json', 'json-ext', 'msgpack-ext'} + include + Fields to be included in the serialization. + exclude + Fields to be excluded in the serialization. + exclude_unset + If True, skips fields that have default values provided. + exclude_defaults + If True, skips fields that have set or defaulted values equal to the default. + exclude_none + If True, skips fields that have value ``None``. + + Returns + ------- + ~typing.Union[bytes, str] + The serialized model. + """ + + kwargs = {} + if include: + kwargs["include"] = include + if exclude: + kwargs["exclude"] = exclude + if exclude_unset: + kwargs["exclude_unset"] = exclude_unset + if exclude_defaults: + kwargs["exclude_defaults"] = exclude_defaults + if exclude_none: + kwargs["exclude_none"] = exclude_none + + data = self.dict(**kwargs) + + return serialize(data, encoding=encoding) + + def json(self, **kwargs): + # Alias JSON here from BaseModel to reflect dict changes + return self.serialize("json", **kwargs) + + def compare(self, other: Union["ProtoModel", BaseModel], **kwargs) -> bool: + r"""Compares the current object to the provided object recursively. + + Parameters + ---------- + other + The model to compare to. + **kwargs + Additional kwargs to pass to :func:`~qcelemental.compare_recursive`. + + Returns + ------- + bool + True if the objects match. + """ + from ..testing import compare_recursive + + return compare_recursive(self, other, **kwargs) + + +# remove when QCFractal merges `next` +class AutodocBaseSettings(BaseSettings): + def __init_subclass__(cls) -> None: + cls.__doc__ = AutoPydanticDocGenerator(cls, always_apply=True) + + +qcschema_draft = "http://json-schema.org/draft-04/schema#" diff --git a/qcelemental/models/basis.py b/qcelemental/models/basis.py new file mode 100644 index 00000000..2a4b2c88 --- /dev/null +++ b/qcelemental/models/basis.py @@ -0,0 +1,231 @@ +from enum import Enum +from typing import Dict, List, Optional + +try: + from pydantic.v1 import ConstrainedInt, Field, constr, validator +except ImportError: # Will also trap ModuleNotFoundError + from pydantic import ConstrainedInt, Field, constr, validator + +from ...exceptions import ValidationError +from .basemodels import ProtoModel, qcschema_draft + + +class NonnegativeInt(ConstrainedInt): + ge = 0 + + +class HarmonicType(str, Enum): + """The angular momentum representation of a shell.""" + + spherical = "spherical" + cartesian = "cartesian" + + +class ElectronShell(ProtoModel): + """Information for a single electronic shell.""" + + angular_momentum: List[NonnegativeInt] = Field( + ..., description="Angular momentum for the shell as an array of integers.", min_items=1 + ) + harmonic_type: HarmonicType = Field(..., description=str(HarmonicType.__doc__)) + exponents: List[float] = Field(..., description="Exponents for the contracted shell.", min_items=1) + coefficients: List[List[float]] = Field( + ..., + description="General contraction coefficients for the shell; individual list components will be the individual segment contraction coefficients.", + min_items=1, + ) + + class Config(ProtoModel.Config): + def schema_extra(schema, model): + # edit to allow string storage of basis sets as BSE uses. alternately, could `Union[float, str]` above but that loses some validation + schema["properties"]["exponents"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} + schema["properties"]["coefficients"]["items"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} + schema["properties"]["angular_momentum"].update({"uniqueItems": True}) + + @validator("coefficients") + def _check_coefficient_length(cls, v, values): + len_exp = len(values["exponents"]) + for row in v: + if len(row) != len_exp: + raise ValueError("The length of coefficients does not match the length of exponents.") + + return v + + @validator("coefficients") + def _check_general_contraction_or_fused(cls, v, values): + if len(values["angular_momentum"]) > 1: + if len(values["angular_momentum"]) != len(v): + raise ValueError("The length for a fused shell must equal the length of coefficients.") + + return v + + def nfunctions(self) -> int: + r""" + Computes the number of basis functions on this shell. + + Returns + ------- + int + The number of basis functions on this shell. + """ + + if self.harmonic_type == "spherical": + return sum((2 * L + 1) for L in self.angular_momentum) + else: + return sum(((L + 1) * (L + 2) // 2) for L in self.angular_momentum) + + def is_contracted(self) -> bool: + r""" + Checks if the shell represents a contracted Gaussian or not. + + Returns + ------- + bool + True if the shell is contracted. + """ + + return (len(self.coefficients) != 1) and (len(self.angular_momentum) == 1) + + +class ECPType(str, Enum): + """The type of the ECP potential.""" + + scalar = "scalar" + spinorbit = "spinorbit" + + +class ECPPotential(ProtoModel): + """Information for a single ECP potential.""" + + ecp_type: ECPType = Field(..., description=str(ECPType.__doc__)) + angular_momentum: List[NonnegativeInt] = Field( + ..., description="Angular momentum for the potential as an array of integers.", min_items=1 + ) + r_exponents: List[int] = Field(..., description="Exponents of the 'r' term.", min_items=1) + gaussian_exponents: List[float] = Field(..., description="Exponents of the 'gaussian' term.", min_items=1) + coefficients: List[List[float]] = Field( + ..., + description="General contraction coefficients for the potential; individual list components will be the individual segment contraction coefficients.", + min_items=1, + ) + + class Config(ProtoModel.Config): + def schema_extra(schema, model): + # edit to allow string storage of basis sets as BSE uses. alternately, could `Union[float, str]` above but that loses some validation + schema["properties"]["gaussian_exponents"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} + schema["properties"]["coefficients"]["items"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} + schema["properties"]["angular_momentum"].update({"uniqueItems": True}) + + @validator("gaussian_exponents") + def _check_gaussian_exponents_length(cls, v, values): + len_exp = len(values["r_exponents"]) + if len(v) != len_exp: + raise ValueError("The length of gaussian_exponents does not match the length of `r` exponents.") + + return v + + @validator("coefficients") + def _check_coefficient_length(cls, v, values): + len_exp = len(values["r_exponents"]) + for row in v: + if len(row) != len_exp: + raise ValueError("The length of coefficients does not match the length of `r` exponents.") + + return v + + +class BasisCenter(ProtoModel): + """Data for a single atom/center in a basis set.""" + + electron_shells: List[ElectronShell] = Field(..., description="Electronic shells for this center.", min_items=1) + ecp_electrons: int = Field(0, description="Number of electrons replaced by ECP, MCP, or other field potentials.") + ecp_potentials: Optional[List[ECPPotential]] = Field( + None, description="ECPs, MCPs, or other field potentials for this center.", min_items=1 + ) + + class Config(ProtoModel.Config): + def schema_extra(schema, model): + schema["properties"]["electron_shells"].update({"uniqueItems": True}) + schema["properties"]["ecp_potentials"].update({"uniqueItems": True}) + + +class BasisSet(ProtoModel): + """ + A quantum chemistry basis description. + """ + + schema_name: constr(strip_whitespace=True, regex="^(qcschema_basis)$") = Field( # type: ignore + "qcschema_basis", + description=(f"The QCSchema specification to which this model conforms. Explicitly fixed as qcschema_basis."), + ) + schema_version: int = Field( # type: ignore + 1, + description="The version number of :attr:`~qcelemental.models.BasisSet.schema_name` to which this model conforms.", + ) + + name: str = Field(..., description="The standard basis name if available (e.g., 'cc-pVDZ').") + description: Optional[str] = Field(None, description="Brief description of the basis set.") + center_data: Dict[str, BasisCenter] = Field( + ..., description="Shared basis data for all atoms/centers in the parent molecule" + ) + atom_map: List[str] = Field( + ..., description="Mapping of all atoms/centers in the parent molecule to centers in ``center_data``." + ) + + nbf: Optional[int] = Field(None, description="The number of basis functions. Use for convenience or as checksum") + + class Config(ProtoModel.Config): + def schema_extra(schema, model): + schema["$schema"] = qcschema_draft + + @validator("atom_map") + def _check_atom_map(cls, v, values): + sv = set(v) + + # Center_data validation error, skipping + try: + missing = sv - values["center_data"].keys() + except KeyError: + return v + + if missing: + raise ValueError(f"'atom_map' contains unknown keys to 'center_data': {missing}.") + + return v + + @validator("nbf", always=True) + def _check_nbf(cls, v, values): + # Bad construction, pass on errors + try: + nbf = cls._calculate_nbf(values["atom_map"], values["center_data"]) + except KeyError: + return v + + if v is None: + v = nbf + else: + if v != nbf: + raise ValidationError("Calculated nbf does not match supplied nbf.") + + return v + + @classmethod + def _calculate_nbf(cls, atom_map, center_data) -> int: + r""" + Number of basis functions in the basis set. + + Returns + ------- + int + The number of basis functions. + """ + + center_count = {} + for k, center in center_data.items(): + center_count[k] = sum(x.nfunctions() for x in center.electron_shells) + + ret = 0 + for center in atom_map: + ret += center_count[center] + + return ret diff --git a/qcelemental/models/common_models.py b/qcelemental/models/common_models.py new file mode 100644 index 00000000..f848449d --- /dev/null +++ b/qcelemental/models/common_models.py @@ -0,0 +1,145 @@ +from enum import Enum +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +import numpy as np + +try: + from pydantic.v1 import Field +except ImportError: # Will also trap ModuleNotFoundError + from pydantic import Field + +from .basemodels import ProtoModel, qcschema_draft +from .basis import BasisSet + +if TYPE_CHECKING: + try: + from pydantic.v1.typing import ReprArgs + except ImportError: # Will also trap ModuleNotFoundError + from pydantic.typing import ReprArgs + + +# Encoders, to be deprecated +ndarray_encoder = {np.ndarray: lambda v: v.flatten().tolist()} + + +class Provenance(ProtoModel): + """Provenance information.""" + + creator: str = Field(..., description="The name of the program, library, or person who created the object.") + version: str = Field( + "", + description="The version of the creator, blank otherwise. This should be sortable by the very broad `PEP 440 `_.", + ) + routine: str = Field("", description="The name of the routine or function within the creator, blank otherwise.") + + class Config(ProtoModel.Config): + canonical_repr = True + extra: str = "allow" + + def schema_extra(schema, model): + schema["$schema"] = qcschema_draft + + +class Model(ProtoModel): + """The computational molecular sciences model to run.""" + + method: str = Field( # type: ignore + ..., + description="The quantum chemistry method to evaluate (e.g., B3LYP, PBE, ...). " + "For MM, name of the force field.", + ) + basis: Optional[Union[str, BasisSet]] = Field( # type: ignore + None, + description="The quantum chemistry basis set to evaluate (e.g., 6-31g, cc-pVDZ, ...). Can be ``None`` for " + "methods without basis sets. For molecular mechanics, name of the atom-typer.", + ) + + # basis_spec: BasisSpec = None # This should be exclusive with basis, but for now will be omitted + + class Config(ProtoModel.Config): + canonical_repr = True + extra: str = "allow" + + +class DriverEnum(str, Enum): + """Allowed computation driver values.""" + + energy = "energy" + gradient = "gradient" + hessian = "hessian" + properties = "properties" + + def derivative_int(self): + egh = ["energy", "gradient", "hessian", "third", "fourth", "fifth"] + if self == "properties": + return 0 + else: + return egh.index(self) + + +class ComputeError(ProtoModel): + """Complete description of the error from an unsuccessful program execution.""" + + error_type: str = Field( # type: ignore + ..., # Error enumeration not yet strict + description="The type of error which was thrown. Restrict this field to short classifiers e.g. 'input_error'. Suggested classifiers: https://github.com/MolSSI/QCEngine/blob/master/qcengine/exceptions.py", + ) + error_message: str = Field( # type: ignore + ..., + description="Text associated with the thrown error. This is often the backtrace, but it can contain additional " + "information as well.", + ) + extras: Optional[Dict[str, Any]] = Field( # type: ignore + None, + description="Additional information to bundle with the error.", + ) + + class Config: + repr_style = ["error_type", "error_message"] + + def __repr_args__(self) -> "ReprArgs": + return [("error_type", self.error_type), ("error_message", self.error_message)] + + +class FailedOperation(ProtoModel): + """Record indicating that a given operation (program, procedure, etc.) has failed and containing the reason and input data which generated the failure.""" + + id: str = Field( # type: ignore + None, + description="A unique identifier which links this FailedOperation, often of the same Id of the operation " + "should it have been successful. This will often be set programmatically by a database such as " + "Fractal.", + ) + input_data: Any = Field( # type: ignore + None, + description="The input data which was passed in that generated this failure. This should be the complete " + "input which when attempted to be run, caused the operation to fail.", + ) + success: bool = Field( # type: ignore + False, + description="A boolean indicator that the operation failed consistent with the model of successful operations. " + "Should always be False. Allows programmatic assessment of all operations regardless of if they failed or " + "succeeded", + ) + error: ComputeError = Field( # type: ignore + ..., + description="A container which has details of the error that failed this operation. See the " + ":class:`ComputeError` for more details.", + ) + extras: Optional[Dict[str, Any]] = Field( # type: ignore + None, + description="Additional information to bundle with the failed operation. Details which pertain specifically " + "to a thrown error should be contained in the `error` field. See :class:`ComputeError` for details.", + ) + + def __repr_args__(self) -> "ReprArgs": + return [("error", self.error)] + + +qcschema_input_default = "qcschema_input" +qcschema_output_default = "qcschema_output" +qcschema_optimization_input_default = "qcschema_optimization_input" +qcschema_optimization_output_default = "qcschema_optimization_output" +qcschema_torsion_drive_input_default = "qcschema_torsion_drive_input" +qcschema_torsion_drive_output_default = "qcschema_torsion_drive_output" +qcschema_molecule_default = "qcschema_molecule" diff --git a/qcelemental/models/molecule.py b/qcelemental/models/molecule.py new file mode 100644 index 00000000..d2261f63 --- /dev/null +++ b/qcelemental/models/molecule.py @@ -0,0 +1,1451 @@ +""" +Molecule Object Model +""" + +import hashlib +import json +import warnings +from functools import partial +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast + +import numpy as np + +try: + from pydantic.v1 import ConstrainedFloat, ConstrainedInt, Field, constr, validator +except ImportError: # Will also trap ModuleNotFoundError + from pydantic import ConstrainedFloat, ConstrainedInt, Field, constr, validator + +# molparse imports separated b/c https://github.com/python/mypy/issues/7203 +from ...molparse.from_arrays import from_arrays +from ...molparse.from_schema import from_schema +from ...molparse.from_string import from_string +from ...molparse.to_schema import to_schema +from ...molparse.to_string import to_string +from ...periodic_table import periodictable +from ...physical_constants import constants +from ...testing import compare, compare_values +from ...util import deserialize, measure_coordinates, msgpackext_loads, provenance_stamp, which_import +from .basemodels import ProtoModel, qcschema_draft +from .common_models import Provenance, qcschema_molecule_default +from .types import Array + +if TYPE_CHECKING: + try: + from pydantic.v1.typing import ReprArgs + except ImportError: # Will also trap ModuleNotFoundError + from pydantic.typing import ReprArgs + +# Rounding quantities for hashing +GEOMETRY_NOISE = 8 +MASS_NOISE = 6 +CHARGE_NOISE = 4 + +_extension_map = { + ".npy": "numpy", + ".json": "json", + ".xyz": "xyz", + ".psimol": "psi4", + ".psi4": "psi4", + ".msgpack": "msgpack-ext", +} + + +def float_prep(array, around): + r""" + Rounds floats to a common value and build positive zeros to prevent hash conflicts. + """ + if isinstance(array, (list, np.ndarray)): + # Round array + array = np.around(array, around) + # Flip zeros + array[np.abs(array) < 5 ** (-(around + 1))] = 0 + + elif isinstance(array, (float, int)): + array = round(array, around) + if array == -0.0: + array = 0.0 + else: + raise TypeError("Type '{}' not recognized".format(type(array).__name__)) + + return array + + +class NonnegativeInt(ConstrainedInt): + ge = 0 + + +class BondOrderFloat(ConstrainedFloat): + ge = 0 + le = 5 + + +class Identifiers(ProtoModel): + r"""Canonical chemical identifiers""" + + molecule_hash: Optional[str] = None + molecular_formula: Optional[str] = None + smiles: Optional[str] = None + inchi: Optional[str] = None + inchikey: Optional[str] = None + canonical_explicit_hydrogen_smiles: Optional[str] = None + canonical_isomeric_explicit_hydrogen_mapped_smiles: Optional[str] = None + canonical_isomeric_explicit_hydrogen_smiles: Optional[str] = None + canonical_isomeric_smiles: Optional[str] = None + canonical_smiles: Optional[str] = None + pubchem_cid: Optional[str] = Field(None, description="PubChem Compound ID") + pubchem_sid: Optional[str] = Field(None, description="PubChem Substance ID") + pubchem_conformerid: Optional[str] = Field(None, description="PubChem Conformer ID") + + class Config(ProtoModel.Config): + serialize_skip_defaults = True + + +class Molecule(ProtoModel): + r""" + The physical Cartesian representation of the molecular system. + + A QCSchema representation of a Molecule. This model contains + data for symbols, geometry, connectivity, charges, fragmentation, etc while also supporting a wide array of I/O and manipulation capabilities. + + Molecule objects geometry, masses, and charges are truncated to 8, 6, and 4 decimal places respectively to assist with duplicate detection. + + Notes + ----- + All arrays are stored flat but must be reshapable into the dimensions in attribute ``shape``, with abbreviations as follows: + + * nat: number of atomic = calcinfo_natom + * nfr: number of fragments + * : irregular dimension not systematically reshapable + + """ + + schema_name: constr(strip_whitespace=True, regex="^(qcschema_molecule)$") = Field( # type: ignore + qcschema_molecule_default, + description=( + f"The QCSchema specification to which this model conforms. Explicitly fixed as {qcschema_molecule_default}." + ), + ) + schema_version: int = Field( # type: ignore + 2, + description="The version number of :attr:`~qcelemental.models.Molecule.schema_name` to which this model conforms.", + ) + validated: bool = Field( # type: ignore + False, + description="A boolean indicator (for speed purposes) that the input Molecule data has been previously checked " + "for schema (data layout and type) and physics (e.g., non-overlapping atoms, feasible " + "multiplicity) compliance. This should be False in most cases. A ``True`` setting " + "should only ever be set by the constructor for this class itself or other trusted sources such as " + "a Fractal Server or previously serialized Molecules.", + ) + + # Required data + symbols: Array[str] = Field( # type: ignore + ..., + description="The ordered array of atomic elemental symbols in title case. This field's index " + "sets atomic order for all other per-atom fields like :attr:`~qcelemental.models.Molecule.real` and the first dimension of " + ":attr:`~qcelemental.models.Molecule.geometry`. Ghost/virtual atoms must have an entry here in :attr:`~qcelemental.models.Molecule.symbols`; ghostedness is " + "indicated through the :attr:`~qcelemental.models.Molecule.real` field.", + shape=["nat"], + ) + geometry: Array[float] = Field( # type: ignore + ..., + description="The ordered array for Cartesian XYZ atomic coordinates [a0]. " + "Atom ordering is fixed; that is, a consumer who shuffles atoms must not reattach the input " + "(pre-shuffling) molecule schema instance to any output (post-shuffling) per-atom results " + "(e.g., gradient). Index of the first dimension matches the 0-indexed indices of all other " + "per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`." + "\n" + "Serialized storage is always flat, (3*nat,), but QCSchema implementations will want to reshape it. " + "QCElemental can also accept array-likes which can be mapped to (nat,3) such as a 1-D list of length 3*nat, " + "or the serialized version of the array in (3*nat,) shape; all forms will be reshaped to " + "(nat,3) for this attribute.", + shape=["nat", 3], + units="a0", + ) + + # Molecule data + name: Optional[str] = Field( # type: ignore + None, + description="Common or human-readable name to assign to this molecule. This field can be arbitrary; see :attr:`~qcelemental.models.Molecule.identifiers` for well-defined labels.", + ) + identifiers: Optional[Identifiers] = Field( # type: ignore + None, + description="An optional dictionary of additional identifiers by which this molecule can be referenced, " + "such as INCHI, canonical SMILES, etc. See the :class:`~qcelemental.models.results.Identifiers` model for more details.", + ) + comment: Optional[str] = Field( # type: ignore + None, + description="Additional comments for this molecule. Intended for pure human/user consumption and clarity.", + ) + molecular_charge: float = Field(0.0, description="The net electrostatic charge of the molecule.") # type: ignore + molecular_multiplicity: int = Field(1, description="The total multiplicity of the molecule.") # type: ignore + + # Atom data + masses_: Optional[Array[float]] = Field( # type: ignore + None, + description="The ordered array of atomic masses. Index order " + "matches the 0-indexed indices of all other per-atom fields like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. If " + "this is not provided, the mass of each atom is inferred from its most common isotope. If this " + "is provided, it must be the same length as :attr:`~qcelemental.models.Molecule.symbols` but can accept ``None`` entries for " + "standard masses to infer from the same index in the :attr:`~qcelemental.models.Molecule.symbols` field.", + shape=["nat"], + units="u", + ) + real_: Optional[Array[bool]] = Field( # type: ignore + None, + description="The ordered array indicating if each atom is real (``True``) or " + "ghost/virtual (``False``). Index " + "matches the 0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and the first " + "dimension of :attr:`~qcelemental.models.Molecule.geometry`. If this is not provided, all atoms are assumed to be real (``True``)." + "If this is provided, the reality or ghostedness of every atom must be specified.", + shape=["nat"], + ) + atom_labels_: Optional[Array[str]] = Field( # type: ignore + None, + description="Additional per-atom labels as an array of strings. Typical use is in " + "model conversions, such as Elemental <-> Molpro and not typically something which should be user " + "assigned. See the :attr:`~qcelemental.models.Molecule.comment` field for general human-consumable text to affix to the molecule.", + shape=["nat"], + ) + atomic_numbers_: Optional[Array[np.int16]] = Field( # type: ignore + None, + description="An optional ordered 1-D array-like object of atomic numbers of shape (nat,). Index " + "matches the 0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. " + "Values are inferred from the :attr:`~qcelemental.models.Molecule.symbols` list if not explicitly set. " + "Ghostedness should be indicated through :attr:`~qcelemental.models.Molecule.real` field, not zeros here.", + shape=["nat"], + ) + mass_numbers_: Optional[Array[np.int16]] = Field( # type: ignore + None, + description="An optional ordered 1-D array-like object of atomic *mass* numbers of shape (nat). Index " + "matches the 0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. " + "Values are inferred from the most common isotopes of the :attr:`~qcelemental.models.Molecule.symbols` list if not explicitly set. " + "If single isotope not (yet) known for an atom, -1 is placeholder.", + shape=["nat"], + ) + + # Fragment and connection data + connectivity_: Optional[List[Tuple[NonnegativeInt, NonnegativeInt, BondOrderFloat]]] = Field( # type: ignore + None, + description="A list of bonds within the molecule. Each entry is a tuple " + "of ``(atom_index_A, atom_index_B, bond_order)`` where the ``atom_index`` " + "matches the 0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. " + "Bonds may be freely reordered and inverted.", + min_items=1, + ) + fragments_: Optional[List[Array[np.int32]]] = Field( # type: ignore + None, + description="List of indices grouping atoms (0-indexed) into molecular fragments within the molecule. " + "Each entry in the outer list is a new fragment; index matches the ordering in :attr:`~qcelemental.models.Molecule.fragment_charges` and " + ":attr:`~qcelemental.models.Molecule.fragment_multiplicities`. Inner lists are 0-indexed atoms which compose the fragment; every atom must " + "be in exactly one inner list. Noncontiguous fragments are allowed, though no QM program is known to support them. " + "Fragment ordering is fixed; that is, a consumer who shuffles fragments must not reattach the input " + "(pre-shuffling) molecule schema instance to any output (post-shuffling) per-fragment results (e.g., n-body energy arrays).", + shape=["nfr", ""], + ) + fragment_charges_: Optional[List[float]] = Field( # type: ignore + None, + description="The total charge of each fragment in the :attr:`~qcelemental.models.Molecule.fragments` list. The index of this " + "list matches the 0-index indices of :attr:`~qcelemental.models.Molecule.fragments` list. Will be filled in based on a set of rules " + "if not provided (and :attr:`~qcelemental.models.Molecule.fragments` are specified).", + shape=["nfr"], + ) + fragment_multiplicities_: Optional[List[int]] = Field( # type: ignore + None, + description="The multiplicity of each fragment in the :attr:`~qcelemental.models.Molecule.fragments` list. The index of this " + "list matches the 0-index indices of :attr:`~qcelemental.models.Molecule.fragments` list. Will be filled in based on a set of " + "rules if not provided (and :attr:`~qcelemental.models.Molecule.fragments` are specified).", + shape=["nfr"], + ) + + # Orientation + fix_com: bool = Field( # type: ignore + False, + description="Whether translation of geometry is allowed (fix F) or disallowed (fix T)." + "When False, QCElemental will pre-process the Molecule object to translate the center of mass " + "to (0,0,0) in Euclidean coordinate space, resulting in a different :attr:`~qcelemental.models.Molecule.geometry` than the " + "one provided. 'Fix' is used in the sense of 'specify': that is, `fix_com=True` signals that " + "the origin in `geometry` is a deliberate part of the Molecule spec, whereas `fix_com=False` " + "(default) allows that the origin is happenstance and may be adjusted. " + "guidance: A consumer who translates the geometry must not reattach the input (pre-translation) molecule schema instance to any output (post-translation) origin-sensitive results (e.g., an ordinary energy when EFP present).", + ) + fix_orientation: bool = Field( # type: ignore + False, + description="Whether rotation of geometry is allowed (fix F) or disallowed (fix T). " + "When False, QCElemental will pre-process the Molecule object to orient via the intertial tensor, " + "resulting in a different :attr:`~qcelemental.models.Molecule.geometry` than the one provided. " + "'Fix' is used in the sense of 'specify': that is, `fix_orientation=True` signals that " + "the frame orientation in `geometry` is a deliberate part of the Molecule spec, whereas " + "`fix_orientation=False` (default) allows that the frame is happenstance and may be adjusted. " + "guidance: A consumer who rotates the geometry must not reattach the input (pre-rotation) molecule schema instance to any output (post-rotation) frame-sensitive results (e.g., molecular vibrations).", + ) + fix_symmetry: Optional[str] = Field( # type: ignore + None, + description="Maximal point group symmetry which :attr:`~qcelemental.models.Molecule.geometry` should be treated. Lowercase.", + ) + # Extra + provenance: Provenance = Field( + default_factory=partial(provenance_stamp, __name__), + description="The provenance information about how this Molecule (and its attributes) were generated, " + "provided, and manipulated.", + ) + id: Optional[Any] = Field( # type: ignore + None, + description="A unique identifier for this Molecule object. This field exists primarily for Databases " + "(e.g. Fractal's Server) to track and lookup this specific object and should virtually " + "never need to be manually set.", + ) + extras: Dict[str, Any] = Field( # type: ignore + None, + description="Additional information to bundle with the molecule. Use for schema development and scratch space.", + ) + + class Config(ProtoModel.Config): + serialize_skip_defaults = True + repr_style = lambda self: [ + ("name", self.name), + ("formula", self.get_molecular_formula()), + ("hash", self.get_hash()[:7]), + ] + fields = { + "masses_": "masses", + "real_": "real", + "atom_labels_": "atom_labels", + "atomic_numbers_": "atomic_numbers", + "mass_numbers_": "mass_numbers", + "connectivity_": "connectivity", + "fragments_": "fragments", + "fragment_charges_": "fragment_charges", + "fragment_multiplicities_": "fragment_multiplicities", + } + + def schema_extra(schema, model): + # below addresses the draft-04 issue until https://github.com/samuelcolvin/pydantic/issues/1478 . + schema["$schema"] = qcschema_draft + + def __init__(self, orient: bool = False, validate: Optional[bool] = None, **kwargs: Any) -> None: + r"""Initializes the molecule object from dictionary-like values. + + Parameters + ---------- + orient + If True, orientates the Molecule to a common reference frame. + validate + If ``None`` validation is always applied unless the ``validated`` flag is set. Otherwise uses the boolean to decide to validate the Molecule or not. + **kwargs + The values of the Molecule object attributes. + """ + if validate is None: + validate = not kwargs.get("validated", False) + + geometry_prep = kwargs.pop("_geometry_prep", False) + geometry_noise = kwargs.pop("geometry_noise", GEOMETRY_NOISE) + + if validate: + kwargs["schema_name"] = kwargs.pop("schema_name", "qcschema_molecule") + kwargs["schema_version"] = kwargs.pop("schema_version", 2) + # original_keys = set(kwargs.keys()) # revive when ready to revisit sparsity + + nonphysical = kwargs.pop("nonphysical", False) + schema = to_schema( + from_schema(kwargs, nonphysical=nonphysical), dtype=kwargs["schema_version"], copy=False, np_out=True + ) + schema = _filter_defaults(schema) + + kwargs["validated"] = True + kwargs = {**kwargs, **schema} # Allow any extra fields + validate = True + + if "extras" not in kwargs: + kwargs["extras"] = {} + super().__init__(**kwargs) + + # We are pulling out the values *explicitly* so that the pydantic skip_defaults works as expected + # All attributes set below are equivalent to the default set. + values = self.__dict__ + + if validate: + # Title case for consistency + if np.lib.NumpyVersion(np.__version__) >= "2.0.0b1": + values["symbols"] = np.char.chararray.title(self.symbols) + else: + values["symbols"] = np.core.defchararray.title(self.symbols) + + if orient: + values["geometry"] = float_prep(self._orient_molecule_internal(), geometry_noise) + elif validate or geometry_prep: + values["geometry"] = float_prep(values["geometry"], geometry_noise) + + @validator("geometry") + def _must_be_3n(cls, v, values, **kwargs): + n = len(values["symbols"]) + try: + v = v.reshape(n, 3) + except (ValueError, AttributeError): + raise ValueError("Geometry must be castable to shape (N,3)!") + return v + + @validator("masses_", "real_") + def _must_be_n(cls, v, values, **kwargs): + n = len(values["symbols"]) + if len(v) != n: + raise ValueError("Masses and Real must be same number of entries as Symbols") + return v + + @validator("real_") + def _populate_real(cls, v, values, **kwargs): + # Can't use geometry here since its already been validated and not in values + n = len(values["symbols"]) + if len(v) == 0: + v = np.array([True for _ in range(n)]) + return v + + @validator("fragment_charges_", "fragment_multiplicities_") + def _must_be_n_frag(cls, v, values, **kwargs): + if "fragments_" in values and values["fragments_"] is not None: + n = len(values["fragments_"]) + if len(v) != n: + raise ValueError( + "Fragment Charges and Fragment Multiplicities must be same number of entries as Fragments" + ) + return v + + @property + def hash_fields(self): + return [ + "symbols", + "masses", + "molecular_charge", + "molecular_multiplicity", + "real", + "geometry", + "fragments", + "fragment_charges", + "fragment_multiplicities", + "connectivity", + ] + + @property + def masses(self) -> Array[float]: + masses = self.__dict__.get("masses_") + if masses is None: + masses = np.array([periodictable.to_mass(x) for x in self.symbols]) + return masses + + @property + def real(self) -> Array[bool]: + real = self.__dict__.get("real_") + if real is None: + real = np.array([True for x in self.symbols]) + return real + + @property + def atom_labels(self) -> Array[str]: + atom_labels = self.__dict__.get("atom_labels_") + if atom_labels is None: + atom_labels = np.array(["" for x in self.symbols]) + return atom_labels + + @property + def atomic_numbers(self) -> Array[np.int16]: + atomic_numbers = self.__dict__.get("atomic_numbers_") + if atomic_numbers is None: + atomic_numbers = np.array([periodictable.to_Z(x) for x in self.symbols]) + return atomic_numbers + + @property + def mass_numbers(self) -> Array[np.int16]: + mass_numbers = self.__dict__.get("mass_numbers_") + if mass_numbers is None: + mass_numbers = np.array([periodictable.to_A(x) for x in self.symbols]) + return mass_numbers + + @property + def connectivity(self) -> List[Tuple[int, int, float]]: + connectivity = self.__dict__.get("connectivity_") + # default is None, not [] + return connectivity + + @property + def fragments(self) -> List[Array[np.int32]]: + fragments = self.__dict__.get("fragments_") + if fragments is None: + fragments = [np.arange(len(self.symbols), dtype=np.int32)] + return fragments + + @property + def fragment_charges(self) -> List[float]: + fragment_charges = self.__dict__.get("fragment_charges_") + if fragment_charges is None: + fragment_charges = [self.molecular_charge] + return fragment_charges + + @property + def fragment_multiplicities(self) -> List[int]: + fragment_multiplicities = self.__dict__.get("fragment_multiplicities_") + if fragment_multiplicities is None: + fragment_multiplicities = [self.molecular_multiplicity] + return fragment_multiplicities + + ### Non-Pydantic API functions + + def show(self, ngl_kwargs: Optional[Dict[str, Any]] = None) -> "nglview.NGLWidget": # type: ignore + r"""Creates a 3D representation of a molecule that can be manipulated in Jupyter Notebooks and exported as + images (`.png`). + + Parameters + ---------- + ngl_kwargs + Addition nglview NGLWidget kwargs + + Returns + ------- + nglview.NGLWidget + A nglview view of the molecule + + """ + if not which_import("nglview", return_bool=True): + raise ModuleNotFoundError( + f"Python module nglwview not found. Solve by installing it: `conda install -c conda-forge nglview`" + ) # pragma: no cover + + import nglview as nv # type: ignore + + if ngl_kwargs is None: + ngl_kwargs = {} + + structure = nv.TextStructure(self.to_string("nglview-sdf"), ext="sdf") + widget = nv.NGLWidget(structure, **ngl_kwargs) + return widget + + def measure( + self, measurements: Union[List[int], List[List[int]]], *, degrees: bool = True + ) -> Union[float, List[float]]: + r""" + Takes a measurement of the moleucle from the indicies provided. + + Parameters + ---------- + measurements + Either a single list of indices or multiple. Return a distance, angle, or dihedral depending if + 2, 3, or 4 indices is provided, respectively. Values are returned in Bohr (distance) or degree. + degrees + Returns degrees by default, radians otherwise. + + Returns + ------- + Union[float, List[float]] + Either a value or list of the measured values. + """ + + return measure_coordinates(self.geometry, measurements, degrees=degrees) + + def orient_molecule(self): + r""" + Centers the molecule and orients via inertia tensor before returning a new Molecule + """ + return Molecule(orient=True, **self.dict()) + + def compare(self, other): + warnings.warn( + "Molecule.compare is deprecated and will be removed in v0.13.0. Use == instead.", DeprecationWarning + ) + return self == other + + def __eq__(self, other): + r""" + Checks if two molecules are identical. This is a molecular identity defined + by scientific terms, and not programing terms, so it's less rigorous than + a programmatic equality or a memory equivalent `is`. + """ + + if isinstance(other, dict): + other = Molecule(orient=False, **other) + elif isinstance(other, Molecule): + pass + else: + raise TypeError("Comparison molecule not understood of type '{}'.".format(type(other))) + + return self.get_hash() == other.get_hash() + + def dict(self, *args, **kwargs): + kwargs["by_alias"] = True + kwargs["exclude_unset"] = True + return super().dict(*args, **kwargs) + + def pretty_print(self): + r"""Print the molecule in Angstroms. Same as :py:func:`print_out` only always in Angstroms. + (method name in libmints is print_in_angstrom) + + """ + text = "" + + text += """ Geometry (in {0:s}), charge = {1:.1f}, multiplicity = {2:d}:\n\n""".format( + "Angstrom", self.molecular_charge, self.molecular_multiplicity + ) + text += """ Center X Y Z \n""" + text += """ ------------ ----------------- ----------------- -----------------\n""" + + for i in range(len(self.geometry)): + text += """ {0:8s}{1:4s} """.format(self.symbols[i], "" if self.real[i] else "(Gh)") + for j in range(3): + text += """ {0:17.12f}""".format( + self.geometry[i][j] * constants.conversion_factor("bohr", "angstroms") + ) + text += "\n" + # text += "\n" + + return text + + def get_fragment( + self, + real: Union[int, List], + ghost: Optional[Union[int, List]] = None, + orient: bool = False, + group_fragments: bool = True, + ) -> "Molecule": + r"""Get new Molecule with fragments preserved, dropped, or ghosted. + + Parameters + ---------- + real + Fragment index or list of indices (0-indexed) to be real atoms in new Molecule. + ghost + Fragment index or list of indices (0-indexed) to be ghost atoms (basis fns only) in new Molecule. + orient + Whether or not to align (inertial frame) and phase geometry upon new Molecule instantiation + (according to _orient_molecule_internal)? + group_fragments + Whether or not to group real fragments at the start of the atom list and ghost fragments toward the back. + Previous to ``v0.5``, this was always effectively True. True is handy for finding duplicate + (atom-order-independent) molecules by hash. False preserves fragment order (though collapsing gaps for + absent fragments) like Psi4's ``extract_subsets``. False is handy for gradients where atom order of + returned values matters. + + Returns + ------- + Molecule + New qcelemental.models.Molecule with ``self``\'s fragments present, ghosted, or absent. + + """ + if isinstance(real, int): + real = [real] + + if isinstance(ghost, int): + ghost = [ghost] + elif ghost is None: + ghost = [] + + constructor_dict: Dict = {} + + ret_name = (self.name if self.name is not None else "") + " (" + str(real) + "," + str(ghost) + ")" + constructor_dict["name"] = ret_name + # ret = Molecule(None, name=ret_name) + + if len(set(real) & set(ghost)): + raise TypeError( + "Molecule:get_fragment: real and ghost sets are overlapping! ({0}, {1}).".format(str(real), str(ghost)) + ) + + geom_blocks = [] + symbols = [] + masses = [] + real_atoms = [] + fragments = [] + fragment_charges = [] + fragment_multiplicities = [] + atom_size = 0 + + if group_fragments: + # Loop through the real blocks + frag_start = 0 + for frag in real: + frag_size = len(self.fragments[frag]) + geom_blocks.append(self.geometry[self.fragments[frag]]) + + for idx in self.fragments[frag]: + symbols.append(self.symbols[idx]) + real_atoms.append(True) + masses.append(self.masses[idx]) + + fragments.append(list(range(frag_start, frag_start + frag_size))) + frag_start += frag_size + + fragment_charges.append(float(self.fragment_charges[frag])) + fragment_multiplicities.append(self.fragment_multiplicities[frag]) + + # Set charge and multiplicity + constructor_dict["molecular_charge"] = sum(fragment_charges) + constructor_dict["molecular_multiplicity"] = sum(x - 1 for x in fragment_multiplicities) + 1 + + # Loop through the ghost blocks + for frag in ghost: + frag_size = len(self.fragments[frag]) + geom_blocks.append(self.geometry[self.fragments[frag]]) + + for idx in self.fragments[frag]: + symbols.append(self.symbols[idx]) + real_atoms.append(False) + masses.append(self.masses[idx]) + + fragments.append(list(range(frag_start, frag_start + frag_size))) + frag_start += frag_size + + fragment_charges.append(0) + fragment_multiplicities.append(1) + + else: + # List[Array[np.int32]] + at2fr: List[Union[int, None]] = [None] * len(self.symbols) + for ifr, fr in enumerate(self.fragments): + for iat in fr: + at2fr[iat] = ifr + + at2at: List[Union[int, None]] = [None] * len(self.symbols) + for iat in range(len(self.symbols)): + ifr = at2fr[iat] + + if ifr in real or ifr in ghost: + geom_blocks.append(self.geometry[iat]) + symbols.append(self.symbols[iat]) + real_atoms.append(ifr in real) + masses.append(self.masses[iat]) + + at2at[iat] = atom_size + atom_size += 1 + + else: + at2at[iat] = None + + for ifr, fr in enumerate(self.fragments): + if ifr in real or ifr in ghost: + fragments.append([at2at[iat] for iat in fr]) + + if ifr in real: + fragment_charges.append(self.fragment_charges[ifr]) + fragment_multiplicities.append(self.fragment_multiplicities[ifr]) + + elif ifr in ghost: + fragment_charges.append(0) + fragment_multiplicities.append(1) + + assert None not in fragments + + constructor_dict["fragments"] = fragments + constructor_dict["fragment_charges"] = fragment_charges + constructor_dict["fragment_multiplicities"] = fragment_multiplicities + constructor_dict["symbols"] = symbols + constructor_dict["geometry"] = np.vstack(geom_blocks) + constructor_dict["real"] = real_atoms + constructor_dict["masses"] = masses + + return Molecule(orient=orient, **constructor_dict) + + def to_string( # type: ignore + self, + dtype: str, + units: str = None, + *, + atom_format: str = None, + ghost_format: str = None, + width: int = 17, + prec: int = 12, + return_data: bool = False, + ): + r"""Returns a string that can be used by a variety of programs. + + Unclear if this will be removed or renamed to "to_psi4_string" in the future + + Suggest psi4 --> psi4frag and psi4 route to to_string + """ + molrec = from_schema(self.dict(), nonphysical=True) + return to_string( + molrec, + dtype=dtype, + units=units, + atom_format=atom_format, + ghost_format=ghost_format, + width=width, + prec=prec, + return_data=return_data, + ) + + def get_hash(self): + r""" + Returns the hash of the molecule. + """ + + m = hashlib.sha1() + concat = "" + + for field in self.hash_fields: + data = getattr(self, field) + if field == "geometry": + data = float_prep(data, GEOMETRY_NOISE) + elif field == "fragment_charges": + data = float_prep(data, CHARGE_NOISE) + elif field == "molecular_charge": + data = float_prep(data, CHARGE_NOISE) + elif field == "masses": + data = float_prep(data, MASS_NOISE) + + concat += json.dumps(data, default=lambda x: x.ravel().tolist()) + + m.update(concat.encode("utf-8")) + return m.hexdigest() + + def get_molecular_formula(self, order: str = "alphabetical") -> str: + r""" + Returns the molecular formula for a molecule. + + Parameters + ---------- + order: str, optional + Sorting order of the formula. Valid choices are "alphabetical" and "hill". + + Returns + ------- + str + The molecular formula. + + Examples + -------- + + >>> methane = qcelemental.models.Molecule(''' + ... H 0.5288 0.1610 0.9359 + ... C 0.0000 0.0000 0.0000 + ... H 0.2051 0.8240 -0.6786 + ... H 0.3345 -0.9314 -0.4496 + ... H -1.0685 -0.0537 0.1921 + ... ''') + >>> methane.get_molecular_formula() + CH4 + + >>> hcl = qcelemental.models.Molecule(''' + ... H 0.0000 0.0000 0.0000 + ... Cl 0.0000 0.0000 1.2000 + ... ''') + >>> hcl.get_molecular_formula() + ClH + + """ + + from ...molutil import molecular_formula_from_symbols + + return molecular_formula_from_symbols(symbols=self.symbols, order=order) + + ### Constructors + + @classmethod + def from_data( + cls, + data: Union[str, Dict[str, Any], np.ndarray, bytes], + dtype: Optional[str] = None, + *, + orient: bool = False, + validate: bool = None, + **kwargs: Dict[str, Any], + ) -> "Molecule": + r""" + Constructs a molecule object from a data structure. + + Parameters + ---------- + data + Data to construct Molecule from + dtype + How to interpret the data, if not passed attempts to discover this based on input type. + orient + Orientates the molecule to a standard frame or not. + validate + Validates the molecule or not. + **kwargs + Additional kwargs to pass to the constructors. kwargs take precedence over data. + + Returns + ------- + Molecule + A constructed molecule class. + + """ + if dtype is None: + if isinstance(data, str): + dtype = "string" + elif isinstance(data, np.ndarray): + dtype = "numpy" + elif isinstance(data, dict): + dtype = "dict" + elif isinstance(dtype, bytes): + dtype = "msgpack" + else: + raise TypeError("Input type not understood, please supply the 'dtype' kwarg.") + + if dtype in ["string", "psi4", "xyz", "xyz+"]: + mol_dict = from_string(data, dtype if dtype != "string" else None) + assert isinstance(mol_dict, dict) + input_dict = to_schema(mol_dict["qm"], dtype=2, np_out=True) + input_dict = _filter_defaults(input_dict) + input_dict["validated"] = True + input_dict["_geometry_prep"] = True + elif dtype == "numpy": + data = np.asarray(data) + data = { + "geom": data[:, 1:], + "elez": data[:, 0], + "units": kwargs.pop("units", "Angstrom"), + "fragment_separators": kwargs.pop("frags", []), + } + input_dict = to_schema(from_arrays(**data), dtype=2, np_out=True) + input_dict = _filter_defaults(input_dict) + input_dict["validated"] = True + input_dict["_geometry_prep"] = True + elif dtype == "msgpack": + assert isinstance(data, bytes) + input_dict = msgpackext_loads(data) + elif dtype == "json": + assert isinstance(data, str) + input_dict = json.loads(data) + elif dtype == "dict": + assert isinstance(data, dict) + input_dict = data + else: + raise KeyError("Dtype not understood '{}'.".format(dtype)) + + input_dict.update(kwargs) + + # if charge/spin options are given, invalidate charge and spin options that are missing + charge_spin_opts = {"molecular_charge", "fragment_charges", "molecular_multiplicity", "fragment_multiplicities"} + kwarg_keys = set(kwargs.keys()) + if len(charge_spin_opts & kwarg_keys) > 0: + for key in charge_spin_opts - kwarg_keys: + input_dict.pop(key, None) + input_dict.pop("validated", None) + + return cls(orient=orient, validate=validate, **input_dict) + + @classmethod + def from_file(cls, filename: str, dtype: Optional[str] = None, *, orient: bool = False, **kwargs): + r""" + Constructs a molecule object from a file. + + Parameters + ---------- + filename + The filename to build + dtype + The type of file to interpret. + orient + Orientates the molecule to a standard frame or not. + **kwargs + Any additional keywords to pass to the constructor + + Returns + ------- + Molecule + A constructed molecule class. + + """ + + ext = Path(filename).suffix + + if dtype is None: + if ext in _extension_map: + dtype = _extension_map[ext] + else: + # Let `from_string` try to sort it + dtype = "string" + + # Raw string type, read and pass through + if dtype in ["string", "xyz", "xyz+", "psi4"]: + with open(filename, "r") as infile: + data = infile.read() + elif dtype == "numpy": + data = np.load(filename) + elif dtype in ["json", "json-ext"]: + with open(filename, "r") as infile: + data = deserialize(infile.read(), encoding="json-ext") + dtype = "dict" + elif dtype in ["msgpack", "msgpack-ext"]: + with open(filename, "rb") as infile_bytes: + data = deserialize(infile_bytes.read(), encoding="msgpack-ext") + dtype = "dict" + else: + raise KeyError("Dtype not understood '{}'.".format(dtype)) + + return cls.from_data(data, dtype, orient=orient, **kwargs) + + def to_file(self, filename: str, dtype: Optional[str] = None) -> None: + r"""Writes the Molecule to a file. + + Parameters + ---------- + filename + The filename to write to + dtype + The type of file to write, attempts to infer dtype from the filename if not provided. + + """ + ext = Path(filename).suffix + + if dtype is None: + if ext in _extension_map: + dtype = _extension_map[ext] + else: + raise KeyError(f"Could not infer dtype from filename: `{filename}`") + + if dtype in ["xyz", "xyz+", "psi4"]: + stringified = self.to_string(dtype) + elif dtype in ["json", "json-ext", "msgpack", "msgpack-ext"]: + stringified = self.serialize(dtype) + elif dtype in ["numpy"]: + elements = np.array(self.atomic_numbers).reshape(-1, 1) + npmol = np.hstack((elements, self.geometry * constants.conversion_factor("bohr", "angstroms"))) + np.save(filename, npmol) + return + else: + raise KeyError(f"Dtype `{dtype}` is not valid") + + flags = "wb" if dtype.startswith("msgpack") else "w" + + with open(filename, flags) as handle: + handle.write(stringified) + + ### Non-Pydantic internal functions + + def _orient_molecule_internal(self): + r""" + Centers the molecule and orients via inertia tensor before returning a new set of the + molecule geometry + """ + + new_geometry = self.geometry.copy() # Ensure we get a copy + # Get the mass as an array + # Masses are needed for orientation + np_mass = np.array(self.masses) + + # Center on Mass + new_geometry -= np.average(new_geometry, axis=0, weights=np_mass) + + # Rotate into inertial frame + tensor = self._inertial_tensor(new_geometry, weight=np_mass) + _, evecs = np.linalg.eigh(tensor) + + new_geometry = np.dot(new_geometry, evecs) + + # Phases? Lets do the simplest thing and ensure the first atom in each column + # that is not on a plane is positve + + phase_check = [False, False, False] + + geom_noise = 10 ** (-GEOMETRY_NOISE) + for num in range(new_geometry.shape[0]): + for x in range(3): + if phase_check[x]: + continue + + val = new_geometry[num, x] + + if abs(val) < geom_noise: + continue + + phase_check[x] = True + + if val < 0: + new_geometry[:, x] *= -1 + + if sum(phase_check) == 3: + break + return new_geometry + + def __repr_args__(self) -> "ReprArgs": + return [("name", self.name), ("formula", self.get_molecular_formula()), ("hash", self.get_hash()[:7])] + + def _ipython_display_(self, **kwargs) -> None: + try: + self.show()._ipython_display_(**kwargs) + except ModuleNotFoundError: + from IPython.display import display + + display(f"Install nglview for interactive visualization.", f"{repr(self)}") + + @staticmethod + def _inertial_tensor(geom, *, weight): + r""" + Compute the moment inertia tensor for a given geometry. + """ + # Build inertia tensor + tensor = np.zeros((3, 3)) + + # Diagonal + tensor[0][0] = np.sum(weight * (geom[:, 1] ** 2.0 + geom[:, 2] ** 2.0)) + tensor[1][1] = np.sum(weight * (geom[:, 0] ** 2.0 + geom[:, 2] ** 2.0)) + tensor[2][2] = np.sum(weight * (geom[:, 0] ** 2.0 + geom[:, 1] ** 2.0)) + + # I(alpha, beta) + # Off diagonal + tensor[1][0] = tensor[0][1] = -1.0 * np.sum(weight * geom[:, 0] * geom[:, 1]) + tensor[2][0] = tensor[0][2] = -1.0 * np.sum(weight * geom[:, 0] * geom[:, 2]) + tensor[2][1] = tensor[1][2] = -1.0 * np.sum(weight * geom[:, 1] * geom[:, 2]) + return tensor + + def nuclear_repulsion_energy(self, ifr: int = None) -> float: + r"""Nuclear repulsion energy. + + Parameters + ---------- + ifr + If not `None`, only compute for the `ifr`-th (0-indexed) fragment. + + Returns + ------- + nre : float + Nuclear repulsion energy in entire molecule or in fragment. + + """ + Zeff = [z * int(real) for z, real in zip(cast(Iterable[int], self.atomic_numbers), self.real)] + atoms = list(range(self.geometry.shape[0])) + + if ifr is not None: + atoms = self.fragments[ifr] + + nre = 0.0 + for iat1, at1 in enumerate(atoms): + for at2 in atoms[:iat1]: + dist = np.linalg.norm(self.geometry[at1] - self.geometry[at2]) + nre += Zeff[at1] * Zeff[at2] / dist + return nre + + def nelectrons(self, ifr: int = None) -> int: + r"""Number of electrons. + + Parameters + ---------- + ifr + If not `None`, only compute for the `ifr`-th (0-indexed) fragment. + + Returns + ------- + nelec : int + Number of electrons in entire molecule or in fragment. + + """ + Zeff = [z * int(real) for z, real in zip(cast(Iterable[int], self.atomic_numbers), self.real)] + + if ifr is None: + nel = sum(Zeff) - self.molecular_charge + + else: + nel = sum([zf for iat, zf in enumerate(Zeff) if iat in self.fragments[ifr]]) - self.fragment_charges[ifr] + + return int(nel) + + def align( + self, + ref_mol: "Molecule", + *, + do_plot: bool = False, + verbose: int = 0, + atoms_map: bool = False, + run_resorting: bool = False, + mols_align: Union[bool, float] = False, + run_to_completion: bool = False, + uno_cutoff: float = 1.0e-3, + run_mirror: bool = False, + generic_ghosts: bool = False, + ) -> Tuple["Molecule", Dict[str, Any]]: + r"""Finds shift, rotation, and atom reordering of `concern_mol` (self) + that best aligns with `ref_mol`. + + Wraps :py:func:`qcelemental.molutil.B787` for :py:class:`qcelemental.models.Molecule`. + Employs the Kabsch, Hungarian, and Uno algorithms to exhaustively locate + the best alignment for non-oriented, non-ordered structures. + + Parameters + ---------- + ref_mol : qcelemental.models.Molecule + Molecule to match. + atoms_map + Whether atom1 of `ref_mol` corresponds to atom1 of `concern_mol`, etc. + If true, specifying `True` can save much time. + mols_align + Whether ref_mol and concern_mol have identical geometries + (barring orientation or atom mapping) and expected final RMSD = 0. + If `True`, procedure is truncated when RMSD condition met, saving time. + If float, RMSD tolerance at which search for alignment stops. If provided, + the alignment routine will throw an error if it fails to align + the molecule within the specified RMSD tolerance. + do_plot + Pops up a mpl plot showing before, after, and ref geometries. + run_to_completion + Run reorderings to completion (past RMSD = 0) even if unnecessary because + `mols_align=True`. Used to test worst-case timings. + run_resorting + Run the resorting machinery even if unnecessary because `atoms_map=True`. + uno_cutoff + TODO + run_mirror + Run alternate geometries potentially allowing best match to `ref_mol` + from mirror image of `concern_mol`. Only run if system confirmed to + be nonsuperimposable upon mirror reflection. + generic_ghosts + When one or both molecules doesn't have meaningful element info for ghosts (can happen + when harvesting from a printout with a generic ghost symbol), set this to True to + place all real=False atoms into the same space for alignment. Only allowed when + ``atoms_map=True``. + verbose + Print level. + + Returns + ------- + mol : Molecule + data : Dict[key, Any] + Molecule is internal geometry of `self` optimally aligned and atom-ordered + to `ref_mol`. Presently all fragment information is discarded. + `data['rmsd']` is RMSD [A] between `ref_mol` and the optimally aligned + geometry computed. + `data['mill']` is a AlignmentMill with fields + (shift, rotation, atommap, mirror) that prescribe the transformation + from `concern_mol` and the optimally aligned geometry. + + """ + from ...molutil.align import B787 + + rgeom = np.array(ref_mol.geometry) + runiq = np.asarray( + [ + hashlib.sha1((sym + str(mas)).encode("utf-8")).hexdigest() + for sym, mas in zip(cast(Iterable[str], ref_mol.symbols), ref_mol.masses) + ] + ) + concern_mol = self + cgeom = np.array(concern_mol.geometry) + cuniq = np.asarray( + [ + hashlib.sha1((sym + str(mas)).encode("utf-8")).hexdigest() + for sym, mas in zip(cast(Iterable[str], concern_mol.symbols), concern_mol.masses) + ] + ) + + if generic_ghosts: + if not mols_align: + raise ValueError("Too risky to lump ghosts together when mols not superimposable.") + + bq_hash = hashlib.sha1(("bq").encode("utf-8")).hexdigest() + runiq = np.asarray([(rl_hash if rl else bq_hash) for rl, rl_hash in zip(ref_mol.real, runiq)]) + cuniq = np.asarray([(rl_hash if rl else bq_hash) for rl, rl_hash in zip(concern_mol.real, cuniq)]) + + rmsd, solution = B787( + cgeom=cgeom, + rgeom=rgeom, + cuniq=cuniq, + runiq=runiq, + do_plot=do_plot, + verbose=verbose, + atoms_map=atoms_map, + run_resorting=run_resorting, + mols_align=mols_align, + run_to_completion=run_to_completion, + run_mirror=run_mirror, + uno_cutoff=uno_cutoff, + ) + + aupdate = { + "symbols": solution.align_atoms(concern_mol.symbols), + "geometry": solution.align_coordinates(concern_mol.geometry, reverse=False), + "masses": solution.align_atoms(concern_mol.masses), + "real": solution.align_atoms(concern_mol.real), + "atom_labels": solution.align_atoms(concern_mol.atom_labels), + "atomic_numbers": solution.align_atoms(concern_mol.atomic_numbers), + "mass_numbers": solution.align_atoms(concern_mol.mass_numbers), + } + adict = {**concern_mol.dict(), **aupdate} + + # preserve intrinsic symmetry with lighter truncation + amol = Molecule(validate=True, **adict, geometry_noise=13) + + # TODO -- can probably do more with fragments in amol now that + # Mol is something with non-contig frags. frags now discarded. + + assert compare_values( + concern_mol.nuclear_repulsion_energy(), + amol.nuclear_repulsion_energy(), + "Q: concern_mol-->returned_mol NRE uncorrupted", + atol=1.0e-4, + quiet=(verbose > 1), + ) + if mols_align: + assert compare_values( + ref_mol.nuclear_repulsion_energy(), + amol.nuclear_repulsion_energy(), + "Q: concern_mol-->returned_mol NRE matches ref_mol", + atol=1.0e-4, + quiet=(verbose > 1), + ) + assert compare( + True, + np.allclose(ref_mol.geometry, amol.geometry, atol=4), + "Q: concern_mol-->returned_mol geometry matches ref_mol", + quiet=(verbose > 1), + ) + + return amol, {"rmsd": rmsd, "mill": solution} + + def scramble( + self, + *, + do_shift: Union[bool, Array[float], List] = True, + do_rotate: Union[bool, Array[float], List[List]] = True, + do_resort: Union[bool, List] = True, + deflection: float = 1.0, + do_mirror: bool = False, + do_plot: bool = False, + do_test: bool = False, + run_to_completion: bool = False, + run_resorting: bool = False, + verbose: int = 0, + ) -> Tuple["Molecule", Dict[str, Any]]: + r"""Generate a Molecule with random or directed translation, rotation, and atom shuffling. + Optionally, check that the aligner returns the opposite transformation. + + Parameters + ---------- + ref_mol : qcelemental.models.Molecule + Molecule to perturb. + do_shift + Whether to generate a random atom shift on interval [-3, 3) in each + dimension (`True`) or leave at current origin. To shift by a specified + vector, supply a 3-element list. + do_rotate + Whether to generate a random 3D rotation according to algorithm of Arvo. + To rotate by a specified matrix, supply a 9-element list of lists. + do_resort + Whether to shuffle atoms (`True`) or leave 1st atom 1st, etc. (`False`). + To specify shuffle, supply a nat-element list of indices. + deflection + If `do_rotate`, how random a rotation: 0.0 is no change, 0.1 is small + perturbation, 1.0 is completely random. + do_mirror + Whether to construct the mirror image structure by inverting y-axis. + do_plot + Pops up a mpl plot showing before, after, and ref geometries. + do_test + Additionally, run the aligner on the returned Molecule and check that + opposite transformations obtained. + run_to_completion + By construction, scrambled systems are fully alignable (final RMSD=0). + Even so, `True` turns off the mechanism to stop when RMSD reaches zero + and instead proceed to worst possible time. + run_resorting + Even if atoms not shuffled, test the resorting machinery. + verbose + Print level. + + Returns + ------- + mol : Molecule + data : Dict[key, Any] + Molecule is scrambled copy of `ref_mol` (self). + `data['rmsd']` is RMSD [A] between `ref_mol` and the scrambled geometry. + `data['mill']` is a AlignmentMill with fields + (shift, rotation, atommap, mirror) that prescribe the transformation + from `ref_mol` to the returned geometry. + + Raises + ------ + AssertionError + If `do_test=True` and aligner sanity check fails for any of the reverse + transformations. + + """ + from ...molutil.align import compute_scramble + + ref_mol = self + rgeom = ref_mol.geometry + nat = rgeom.shape[0] + + perturbation = compute_scramble( + nat, + do_shift=do_shift, + do_rotate=do_rotate, + deflection=deflection, + do_resort=do_resort, + do_mirror=do_mirror, + ) + + cgeom = perturbation.align_coordinates(rgeom, reverse=True) + cupdate = { + "symbols": perturbation.align_atoms(ref_mol.symbols), + "geometry": cgeom, + "masses": perturbation.align_atoms(ref_mol.masses), + "real": perturbation.align_atoms(ref_mol.real), + "atom_labels": perturbation.align_atoms(ref_mol.atom_labels), + "atomic_numbers": perturbation.align_atoms(ref_mol.atomic_numbers), + "mass_numbers": perturbation.align_atoms(ref_mol.mass_numbers), + } + cdict = {**ref_mol.dict(), **cupdate} + + # preserve intrinsic symmetry with lighter truncation + cmol = Molecule(validate=True, **cdict, geometry_noise=13) + + rmsd = np.linalg.norm(cgeom - rgeom) * constants.bohr2angstroms / np.sqrt(nat) + if verbose >= 1: + print("Start RMSD = {:8.4f} [A]".format(rmsd)) + + if do_test: + _, data = cmol.align( + ref_mol, + do_plot=do_plot, + atoms_map=(not do_resort), + run_resorting=run_resorting, + mols_align=True, + run_to_completion=run_to_completion, + run_mirror=do_mirror, + verbose=verbose, + ) + solution = data["mill"] + + assert compare( + True, np.allclose(solution.shift, perturbation.shift, atol=1.0e-6), "shifts equiv", quiet=(verbose > 1) + ) + if not do_resort: + assert compare( + True, + np.allclose(solution.rotation.T, perturbation.rotation), + "rotations transpose", + quiet=(verbose > 1), + ) + if solution.mirror: + assert compare(True, do_mirror, "mirror allowed", quiet=(verbose > 1)) + + return cmol, {"rmsd": rmsd, "mill": perturbation} + + +def _filter_defaults(dicary): + nat = len(dicary["symbols"]) + default_mass = np.array([periodictable.to_mass(e) for e in dicary["symbols"]]) + + dicary.pop("atomic_numbers") + + if np.allclose(default_mass, dicary["masses"]): + dicary.pop("mass_numbers") + dicary.pop("masses") + + if all(dicary["real"]): + dicary.pop("real") + + if dicary["atom_labels"].tolist() == nat * [""]: + dicary.pop("atom_labels") + + if dicary.get("connectivity", "N/A") is None: + dicary.pop("connectivity") + + if dicary["fragments"] == [list(np.arange(nat))]: + dicary.pop("fragments") + dicary.pop("fragment_charges") + dicary.pop("fragment_multiplicities") + + return dicary + + +# auto_gen_docs_on_demand(Molecule) diff --git a/qcelemental/models/procedures.py b/qcelemental/models/procedures.py new file mode 100644 index 00000000..90f3c7cf --- /dev/null +++ b/qcelemental/models/procedures.py @@ -0,0 +1,276 @@ +from enum import Enum +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple + +try: + from pydantic.v1 import Field, conlist, constr, validator +except ImportError: # Will also trap ModuleNotFoundError + from pydantic import Field, conlist, constr, validator + +from ...util import provenance_stamp +from .basemodels import ProtoModel +from .common_models import ( + ComputeError, + DriverEnum, + Model, + Provenance, + qcschema_input_default, + qcschema_optimization_input_default, + qcschema_optimization_output_default, + qcschema_torsion_drive_input_default, + qcschema_torsion_drive_output_default, +) +from .molecule import Molecule +from .results import AtomicResult + +if TYPE_CHECKING: + try: + from pydantic.v1.typing import ReprArgs + except ImportError: # Will also trap ModuleNotFoundError + from pydantic.typing import ReprArgs + + +class TrajectoryProtocolEnum(str, Enum): + """ + Which gradient evaluations to keep in an optimization trajectory. + """ + + all = "all" + initial_and_final = "initial_and_final" + final = "final" + none = "none" + + +class OptimizationProtocols(ProtoModel): + """ + Protocols regarding the manipulation of a Optimization output data. + """ + + trajectory: TrajectoryProtocolEnum = Field( + TrajectoryProtocolEnum.all, description=str(TrajectoryProtocolEnum.__doc__) + ) + + class Config: + force_skip_defaults = True + + +class QCInputSpecification(ProtoModel): + """ + A compute description for energy, gradient, and Hessian computations used in a geometry optimization. + """ + + schema_name: constr(strip_whitespace=True, regex=qcschema_input_default) = qcschema_input_default # type: ignore + schema_version: int = 1 + + driver: DriverEnum = Field(DriverEnum.gradient, description=str(DriverEnum.__doc__)) + model: Model = Field(..., description=str(Model.__doc__)) + keywords: Dict[str, Any] = Field({}, description="The program specific keywords to be used.") + + extras: Dict[str, Any] = Field( + {}, + description="Additional information to bundle with the computation. Use for schema development and scratch space.", + ) + + +class OptimizationInput(ProtoModel): + id: Optional[str] = None + hash_index: Optional[str] = None + schema_name: constr( # type: ignore + strip_whitespace=True, regex=qcschema_optimization_input_default + ) = qcschema_optimization_input_default + schema_version: int = 1 + + keywords: Dict[str, Any] = Field({}, description="The optimization specific keywords to be used.") + extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.") + protocols: OptimizationProtocols = Field(OptimizationProtocols(), description=str(OptimizationProtocols.__doc__)) + + input_specification: QCInputSpecification = Field(..., description=str(QCInputSpecification.__doc__)) + initial_molecule: Molecule = Field(..., description="The starting molecule for the geometry optimization.") + + provenance: Provenance = Field(Provenance(**provenance_stamp(__name__)), description=str(Provenance.__doc__)) + + def __repr_args__(self) -> "ReprArgs": + return [ + ("model", self.input_specification.model.dict()), + ("molecule_hash", self.initial_molecule.get_hash()[:7]), + ] + + +class OptimizationResult(OptimizationInput): + schema_name: constr( # type: ignore + strip_whitespace=True, regex=qcschema_optimization_output_default + ) = qcschema_optimization_output_default + + final_molecule: Optional[Molecule] = Field(..., description="The final molecule of the geometry optimization.") + trajectory: List[AtomicResult] = Field( + ..., description="A list of ordered Result objects for each step in the optimization." + ) + energies: List[float] = Field(..., description="A list of ordered energies for each step in the optimization.") + + stdout: Optional[str] = Field(None, description="The standard output of the program.") + stderr: Optional[str] = Field(None, description="The standard error of the program.") + + success: bool = Field( + ..., description="The success of a given programs execution. If False, other fields may be blank." + ) + error: Optional[ComputeError] = Field(None, description=str(ComputeError.__doc__)) + provenance: Provenance = Field(..., description=str(Provenance.__doc__)) + + @validator("trajectory", each_item=False) + def _trajectory_protocol(cls, v, values): + # Do not propogate validation errors + if "protocols" not in values: + raise ValueError("Protocols was not properly formed.") + + keep_enum = values["protocols"].trajectory + if keep_enum == "all": + pass + elif keep_enum == "initial_and_final": + if len(v) != 2: + v = [v[0], v[-1]] + elif keep_enum == "final": + if len(v) != 1: + v = [v[-1]] + elif keep_enum == "none": + v = [] + else: + raise ValueError(f"Protocol `trajectory:{keep_enum}` is not understood.") + + return v + + +class OptimizationSpecification(ProtoModel): + """ + A specification for how a geometry optimization should be performed **inside** of + another procedure. + + Notes + ----- + * This class is still provisional and may be subject to removal and re-design. + """ + + schema_name: constr(strip_whitespace=True, regex="qcschema_optimization_specification") = "qcschema_optimization_specification" # type: ignore + schema_version: int = 1 + + procedure: str = Field(..., description="Optimization procedure to run the optimization with.") + keywords: Dict[str, Any] = Field({}, description="The optimization specific keywords to be used.") + protocols: OptimizationProtocols = Field(OptimizationProtocols(), description=str(OptimizationProtocols.__doc__)) + + @validator("procedure") + def _check_procedure(cls, v): + return v.lower() + + +class TDKeywords(ProtoModel): + """ + TorsionDriveRecord options + + Notes + ----- + * This class is still provisional and may be subject to removal and re-design. + """ + + dihedrals: List[Tuple[int, int, int, int]] = Field( + ..., + description="The list of dihedrals to select for the TorsionDrive operation. Each entry is a tuple of integers " + "of for particle indices.", + ) + grid_spacing: List[int] = Field( + ..., + description="List of grid spacing for dihedral scan in degrees. Multiple values will be mapped to each " + "dihedral angle.", + ) + dihedral_ranges: Optional[List[Tuple[int, int]]] = Field( + None, + description="A list of dihedral range limits as a pair (lower, upper). " + "Each range corresponds to the dihedrals in input.", + ) + energy_decrease_thresh: Optional[float] = Field( + None, + description="The threshold of the smallest energy decrease amount to trigger activating optimizations from " + "grid point.", + ) + energy_upper_limit: Optional[float] = Field( + None, + description="The threshold if the energy of a grid point that is higher than the current global minimum, to " + "start new optimizations, in unit of a.u. I.e. if energy_upper_limit = 0.05, current global " + "minimum energy is -9.9 , then a new task starting with energy -9.8 will be skipped.", + ) + + +class TorsionDriveInput(ProtoModel): + """Inputs for running a torsion drive. + + Notes + ----- + * This class is still provisional and may be subject to removal and re-design. + """ + + schema_name: constr(strip_whitespace=True, regex=qcschema_torsion_drive_input_default) = qcschema_torsion_drive_input_default # type: ignore + schema_version: int = 1 + + keywords: TDKeywords = Field(..., description="The torsion drive specific keywords to be used.") + extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.") + + input_specification: QCInputSpecification = Field(..., description=str(QCInputSpecification.__doc__)) + initial_molecule: conlist(item_type=Molecule, min_items=1) = Field( + ..., description="The starting molecule(s) for the torsion drive." + ) + + optimization_spec: OptimizationSpecification = Field( + ..., description="Settings to use for optimizations at each grid angle." + ) + + provenance: Provenance = Field(Provenance(**provenance_stamp(__name__)), description=str(Provenance.__doc__)) + + @validator("input_specification") + def _check_input_specification(cls, value): + assert value.driver == DriverEnum.gradient, "driver must be set to gradient" + return value + + +class TorsionDriveResult(TorsionDriveInput): + """Results from running a torsion drive. + + Notes + ----- + * This class is still provisional and may be subject to removal and re-design. + """ + + schema_name: constr(strip_whitespace=True, regex=qcschema_torsion_drive_output_default) = qcschema_torsion_drive_output_default # type: ignore + schema_version: int = 1 + + final_energies: Dict[str, float] = Field( + ..., description="The final energy at each angle of the TorsionDrive scan." + ) + final_molecules: Dict[str, Molecule] = Field( + ..., description="The final molecule at each angle of the TorsionDrive scan." + ) + + optimization_history: Dict[str, List[OptimizationResult]] = Field( + ..., + description="The map of each angle of the TorsionDrive scan to each optimization computations.", + ) + + stdout: Optional[str] = Field(None, description="The standard output of the program.") + stderr: Optional[str] = Field(None, description="The standard error of the program.") + + success: bool = Field( + ..., description="The success of a given programs execution. If False, other fields may be blank." + ) + error: Optional[ComputeError] = Field(None, description=str(ComputeError.__doc__)) + provenance: Provenance = Field(..., description=str(Provenance.__doc__)) + + +def Optimization(*args, **kwargs): + """QC Optimization Results Schema. + + .. deprecated:: 0.12 + Use :py:func:`qcelemental.models.OptimizationResult` instead. + + """ + from warnings import warn + + warn( + "Optimization has been renamed to OptimizationResult and will be removed as soon as v0.13.0", DeprecationWarning + ) + return OptimizationResult(*args, **kwargs) diff --git a/qcelemental/models/results.py b/qcelemental/models/results.py new file mode 100644 index 00000000..44140729 --- /dev/null +++ b/qcelemental/models/results.py @@ -0,0 +1,827 @@ +from enum import Enum +from functools import partial +from typing import TYPE_CHECKING, Any, Dict, Optional, Set, Union + +import numpy as np + +try: + from pydantic.v1 import Field, constr, validator +except ImportError: # Will also trap ModuleNotFoundError + from pydantic import Field, constr, validator + +from ...util import provenance_stamp +from .basemodels import ProtoModel, qcschema_draft +from .basis import BasisSet +from .common_models import ComputeError, DriverEnum, Model, Provenance, qcschema_input_default, qcschema_output_default +from .molecule import Molecule +from .types import Array + +if TYPE_CHECKING: + try: + from pydantic.v1.typing import ReprArgs + except ImportError: # Will also trap ModuleNotFoundError + from pydantic.typing import ReprArgs + + +class AtomicResultProperties(ProtoModel): + r""" + Named properties of quantum chemistry computations following the MolSSI QCSchema. + + All arrays are stored flat but must be reshapable into the dimensions in attribute ``shape``, with abbreviations as follows: + + * nao: number of atomic orbitals = :attr:`~qcelemental.models.AtomicResultProperties.calcinfo_nbasis` + * nmo: number of molecular orbitals = :attr:`~qcelemental.models.AtomicResultProperties.calcinfo_nmo` + """ + + # Calcinfo + calcinfo_nbasis: Optional[int] = Field(None, description="The number of basis functions for the computation.") + calcinfo_nmo: Optional[int] = Field(None, description="The number of molecular orbitals for the computation.") + calcinfo_nalpha: Optional[int] = Field(None, description="The number of alpha electrons in the computation.") + calcinfo_nbeta: Optional[int] = Field(None, description="The number of beta electrons in the computation.") + calcinfo_natom: Optional[int] = Field(None, description="The number of atoms in the computation.") + + # Canonical + nuclear_repulsion_energy: Optional[float] = Field(None, description="The nuclear repulsion energy.") + return_energy: Optional[float] = Field( + None, + description=f"The energy of the requested method, identical to :attr:`~qcelemental.models.AtomicResult.return_result` for :attr:`~qcelemental.models.AtomicInput.driver`\\ =\\ :attr:`~qcelemental.models.DriverEnum.energy` computations.", + ) + return_gradient: Optional[Array[float]] = Field( + None, + description=f"The gradient of the requested method, identical to :attr:`~qcelemental.models.AtomicResult.return_result` for :attr:`~qcelemental.models.AtomicInput.driver`\\ =\\ :attr:`~qcelemental.models.DriverEnum.gradient` computations.", + units="E_h/a0", + ) + return_hessian: Optional[Array[float]] = Field( + None, + description=f"The Hessian of the requested method, identical to :attr:`~qcelemental.models.AtomicResult.return_result` for :attr:`~qcelemental.models.AtomicInput.driver`\\ =\\ :attr:`~qcelemental.models.DriverEnum.hessian` computations.", + units="E_h/a0^2", + ) + + # SCF Keywords + scf_one_electron_energy: Optional[float] = Field( + None, + description="The one-electron (core Hamiltonian) energy contribution to the total SCF energy.", + units="E_h", + ) + scf_two_electron_energy: Optional[float] = Field( + None, + description="The two-electron energy contribution to the total SCF energy.", + units="E_h", + ) + scf_vv10_energy: Optional[float] = Field( + None, + description="The VV10 functional energy contribution to the total SCF energy.", + units="E_h", + ) + scf_xc_energy: Optional[float] = Field( + None, + description="The functional (XC) energy contribution to the total SCF energy.", + units="E_h", + ) + scf_dispersion_correction_energy: Optional[float] = Field( + None, + description="The dispersion correction appended to an underlying functional when a DFT-D method is requested.", + units="E_h", + ) + scf_dipole_moment: Optional[Array[float]] = Field( + None, + description="The SCF X, Y, and Z dipole components", + units="e a0", + ) + scf_quadrupole_moment: Optional[Array[float]] = Field( + None, + description="The quadrupole components (redundant; 6 unique).", + shape=[3, 3], + units="e a0^2", + ) + scf_total_energy: Optional[float] = Field( + None, + description="The total electronic energy of the SCF stage of the calculation.", + units="E_h", + ) + scf_total_gradient: Optional[Array[float]] = Field( + None, + description="The total electronic gradient of the SCF stage of the calculation.", + units="E_h/a0", + ) + scf_total_hessian: Optional[Array[float]] = Field( + None, + description="The total electronic Hessian of the SCF stage of the calculation.", + units="E_h/a0^2", + ) + scf_iterations: Optional[int] = Field(None, description="The number of SCF iterations taken before convergence.") + + # MP2 Keywords + mp2_same_spin_correlation_energy: Optional[float] = Field( + None, + description="The portion of MP2 doubles correlation energy from same-spin (i.e. triplet) correlations, without any user scaling.", + units="E_h", + ) + mp2_opposite_spin_correlation_energy: Optional[float] = Field( + None, + description="The portion of MP2 doubles correlation energy from opposite-spin (i.e. singlet) correlations, without any user scaling.", + units="E_h", + ) + mp2_singles_energy: Optional[float] = Field( + None, + description="The singles portion of the MP2 correlation energy. Zero except in ROHF.", + units="E_h", + ) + mp2_doubles_energy: Optional[float] = Field( + None, + description="The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations.", + units="E_h", + ) + mp2_correlation_energy: Optional[float] = Field( + None, + description="The MP2 correlation energy.", + units="E_h", + ) + mp2_total_energy: Optional[float] = Field( + None, + description="The total MP2 energy (MP2 correlation energy + HF energy).", + units="E_h", + ) + mp2_dipole_moment: Optional[Array[float]] = Field( + None, + description="The MP2 X, Y, and Z dipole components.", + shape=[3], + units="e a0", + ) + + # CCSD Keywords + ccsd_same_spin_correlation_energy: Optional[float] = Field( + None, + description="The portion of CCSD doubles correlation energy from same-spin (i.e. triplet) correlations, without any user scaling.", + units="E_h", + ) + ccsd_opposite_spin_correlation_energy: Optional[float] = Field( + None, + description="The portion of CCSD doubles correlation energy from opposite-spin (i.e. singlet) correlations, without any user scaling.", + units="E_h", + ) + ccsd_singles_energy: Optional[float] = Field( + None, + description="The singles portion of the CCSD correlation energy. Zero except in ROHF.", + units="E_h", + ) + ccsd_doubles_energy: Optional[float] = Field( + None, + description="The doubles portion of the CCSD correlation energy including same-spin and opposite-spin correlations.", + units="E_h", + ) + ccsd_correlation_energy: Optional[float] = Field( + None, + description="The CCSD correlation energy.", + units="E_h", + ) + ccsd_total_energy: Optional[float] = Field( + None, + description="The total CCSD energy (CCSD correlation energy + HF energy).", + units="E_h", + ) + ccsd_dipole_moment: Optional[Array[float]] = Field( + None, + description="The CCSD X, Y, and Z dipole components.", + shape=[3], + units="e a0", + ) + ccsd_iterations: Optional[int] = Field(None, description="The number of CCSD iterations taken before convergence.") + + # CCSD(T) keywords + ccsd_prt_pr_correlation_energy: Optional[float] = Field( + None, + description="The CCSD(T) correlation energy.", + units="E_h", + ) + ccsd_prt_pr_total_energy: Optional[float] = Field( + None, + description="The total CCSD(T) energy (CCSD(T) correlation energy + HF energy).", + units="E_h", + ) + ccsd_prt_pr_dipole_moment: Optional[Array[float]] = Field( + None, + description="The CCSD(T) X, Y, and Z dipole components.", + shape=[3], + units="e a0", + ) + + # CCSDT keywords + ccsdt_correlation_energy: Optional[float] = Field( + None, + description="The CCSDT correlation energy.", + units="E_h", + ) + ccsdt_total_energy: Optional[float] = Field( + None, + description="The total CCSDT energy (CCSDT correlation energy + HF energy).", + units="E_h", + ) + ccsdt_dipole_moment: Optional[Array[float]] = Field( + None, + description="The CCSDT X, Y, and Z dipole components.", + shape=[3], + units="e a0", + ) + ccsdt_iterations: Optional[int] = Field( + None, description="The number of CCSDT iterations taken before convergence." + ) + + # CCSDTQ keywords + ccsdtq_correlation_energy: Optional[float] = Field( + None, + description="The CCSDTQ correlation energy.", + units="E_h", + ) + ccsdtq_total_energy: Optional[float] = Field( + None, + description="The total CCSDTQ energy (CCSDTQ correlation energy + HF energy).", + units="E_h", + ) + ccsdtq_dipole_moment: Optional[Array[float]] = Field( + None, + description="The CCSDTQ X, Y, and Z dipole components.", + shape=[3], + units="e a0", + ) + ccsdtq_iterations: Optional[int] = Field( + None, description="The number of CCSDTQ iterations taken before convergence." + ) + + class Config(ProtoModel.Config): + force_skip_defaults = True + + def __repr_args__(self) -> "ReprArgs": + return [(k, v) for k, v in self.dict().items()] + + @validator( + "scf_dipole_moment", + "mp2_dipole_moment", + "ccsd_dipole_moment", + "ccsd_prt_pr_dipole_moment", + "scf_quadrupole_moment", + ) + def _validate_poles(cls, v, values, field): + if v is None: + return v + + if field.name.endswith("_dipole_moment"): + order = 1 + elif field.name.endswith("_quadrupole_moment"): + order = 2 + + shape = tuple([3] * order) + return np.asarray(v).reshape(shape) + + @validator( + "return_gradient", + "return_hessian", + "scf_total_gradient", + "scf_total_hessian", + ) + def _validate_derivs(cls, v, values, field): + if v is None: + return v + + nat = values.get("calcinfo_natom", None) + if nat is None: + raise ValueError(f"Please also set ``calcinfo_natom``!") + + if field.name.endswith("_gradient"): + shape = (nat, 3) + elif field.name.endswith("_hessian"): + shape = (3 * nat, 3 * nat) + + try: + v = np.asarray(v).reshape(shape) + except (ValueError, AttributeError): + raise ValueError(f"Derivative must be castable to shape {shape}!") + return v + + def dict(self, *args, **kwargs): + # pure-json dict repr for QCFractal compliance, see https://github.com/MolSSI/QCFractal/issues/579 + # Sep 2021: commenting below for now to allow recomposing AtomicResult.properties for qcdb. + # This will break QCFractal tests for now, but future qcf will be ok with it. + # kwargs["encoding"] = "json" + return super().dict(*args, **kwargs) + + +class WavefunctionProperties(ProtoModel): + r"""Wavefunction properties resulting from a computation. Matrix quantities are stored in column-major order. Presence and contents configurable by protocol.""" + + # Class properties + _return_results_names: Set[str] = { + "orbitals_a", + "orbitals_b", + "density_a", + "density_b", + "fock_a", + "fock_b", + "eigenvalues_a", + "eigenvalues_b", + "occupations_a", + "occupations_b", + } + + # The full basis set description of the quantities + basis: BasisSet = Field(..., description=str(BasisSet.__doc__)) + restricted: bool = Field( + ..., + description=str( + "If the computation was restricted or not (alpha == beta). If True, all beta quantities are skipped." + ), + ) + + # Core Hamiltonian + h_core_a: Optional[Array[float]] = Field( + None, description="Alpha-spin core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"] + ) + h_core_b: Optional[Array[float]] = Field( + None, description="Beta-spin core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"] + ) + h_effective_a: Optional[Array[float]] = Field( + None, description="Alpha-spin effective core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"] + ) + h_effective_b: Optional[Array[float]] = Field( + None, description="Beta-spin effective core (one-electron) Hamiltonian in the AO basis", shape=["nao", "nao"] + ) + + # SCF Results + scf_orbitals_a: Optional[Array[float]] = Field( + None, description="SCF alpha-spin orbitals in the AO basis.", shape=["nao", "nmo"] + ) + scf_orbitals_b: Optional[Array[float]] = Field( + None, description="SCF beta-spin orbitals in the AO basis.", shape=["nao", "nmo"] + ) + scf_density_a: Optional[Array[float]] = Field( + None, description="SCF alpha-spin density matrix in the AO basis.", shape=["nao", "nao"] + ) + scf_density_b: Optional[Array[float]] = Field( + None, description="SCF beta-spin density matrix in the AO basis.", shape=["nao", "nao"] + ) + scf_fock_a: Optional[Array[float]] = Field( + None, description="SCF alpha-spin Fock matrix in the AO basis.", shape=["nao", "nao"] + ) + scf_fock_b: Optional[Array[float]] = Field( + None, description="SCF beta-spin Fock matrix in the AO basis.", shape=["nao", "nao"] + ) + scf_eigenvalues_a: Optional[Array[float]] = Field( + None, description="SCF alpha-spin orbital eigenvalues.", shape=["nmo"] + ) + scf_eigenvalues_b: Optional[Array[float]] = Field( + None, description="SCF beta-spin orbital eigenvalues.", shape=["nmo"] + ) + scf_occupations_a: Optional[Array[float]] = Field( + None, description="SCF alpha-spin orbital occupations.", shape=["nmo"] + ) + scf_occupations_b: Optional[Array[float]] = Field( + None, description="SCF beta-spin orbital occupations.", shape=["nmo"] + ) + + # BELOW from qcsk + scf_coulomb_a: Optional[Array[float]] = Field( + None, description="SCF alpha-spin Coulomb matrix in the AO basis.", shape=["nao", "nao"] + ) + scf_coulomb_b: Optional[Array[float]] = Field( + None, description="SCF beta-spin Coulomb matrix in the AO basis.", shape=["nao", "nao"] + ) + scf_exchange_a: Optional[Array[float]] = Field( + None, description="SCF alpha-spin exchange matrix in the AO basis.", shape=["nao", "nao"] + ) + scf_exchange_b: Optional[Array[float]] = Field( + None, description="SCF beta-spin exchange matrix in the AO basis.", shape=["nao", "nao"] + ) + + # Localized-orbital SCF wavefunction quantities + localized_orbitals_a: Optional[Array[float]] = Field( + None, + description="Localized alpha-spin orbitals in the AO basis. All nmo orbitals are included, even if only a subset were localized.", + shape=["nao", "nmo"], + ) + localized_orbitals_b: Optional[Array[float]] = Field( + None, + description="Localized beta-spin orbitals in the AO basis. All nmo orbitals are included, even if only a subset were localized.", + shape=["nao", "nmo"], + ) + localized_fock_a: Optional[Array[float]] = Field( + None, + description="Alpha-spin Fock matrix in the localized molecular orbital basis. All nmo orbitals are included, even if only a subset were localized.", + shape=["nmo", "nmo"], + ) + localized_fock_b: Optional[Array[float]] = Field( + None, + description="Beta-spin Fock matrix in the localized molecular orbital basis. All nmo orbitals are included, even if only a subset were localized.", + shape=["nmo", "nmo"], + ) + # ABOVE from qcsk + + # Return results, must be defined last + orbitals_a: Optional[str] = Field(None, description="Index to the alpha-spin orbitals of the primary return.") + orbitals_b: Optional[str] = Field(None, description="Index to the beta-spin orbitals of the primary return.") + density_a: Optional[str] = Field(None, description="Index to the alpha-spin density of the primary return.") + density_b: Optional[str] = Field(None, description="Index to the beta-spin density of the primary return.") + fock_a: Optional[str] = Field(None, description="Index to the alpha-spin Fock matrix of the primary return.") + fock_b: Optional[str] = Field(None, description="Index to the beta-spin Fock matrix of the primary return.") + eigenvalues_a: Optional[str] = Field( + None, description="Index to the alpha-spin orbital eigenvalues of the primary return." + ) + eigenvalues_b: Optional[str] = Field( + None, description="Index to the beta-spin orbital eigenvalues of the primary return." + ) + occupations_a: Optional[str] = Field( + None, description="Index to the alpha-spin orbital occupations of the primary return." + ) + occupations_b: Optional[str] = Field( + None, description="Index to the beta-spin orbital occupations of the primary return." + ) + + class Config(ProtoModel.Config): + force_skip_defaults = True + + @validator("scf_eigenvalues_a", "scf_eigenvalues_b", "scf_occupations_a", "scf_occupations_b") + def _assert1d(cls, v, values): + try: + v = v.reshape(-1) + except (ValueError, AttributeError): + raise ValueError("Vector must be castable to shape (-1, )!") + return v + + @validator("scf_orbitals_a", "scf_orbitals_b") + def _assert2d_nao_x(cls, v, values): + bas = values.get("basis", None) + + # Do not raise multiple errors + if bas is None: + return v + + try: + v = v.reshape(bas.nbf, -1) + except (ValueError, AttributeError): + raise ValueError("Matrix must be castable to shape (nbf, -1)!") + return v + + @validator( + "h_core_a", + "h_core_b", + "h_effective_a", + "h_effective_b", + # SCF + "scf_density_a", + "scf_density_b", + "scf_fock_a", + "scf_fock_b", + ) + def _assert2d(cls, v, values): + bas = values.get("basis", None) + + # Do not raise multiple errors + if bas is None: + return v + + try: + v = v.reshape(bas.nbf, bas.nbf) + except (ValueError, AttributeError): + raise ValueError("Matrix must be castable to shape (nbf, nbf)!") + return v + + @validator( + "orbitals_a", + "orbitals_b", + "density_a", + "density_b", + "fock_a", + "fock_b", + "eigenvalues_a", + "eigenvalues_b", + "occupations_a", + "occupations_b", + ) + def _assert_exists(cls, v, values): + if values.get(v, None) is None: + raise ValueError(f"Return quantity {v} does not exist in the values.") + return v + + +class WavefunctionProtocolEnum(str, Enum): + r"""Wavefunction to keep from a computation.""" + + all = "all" + orbitals_and_eigenvalues = "orbitals_and_eigenvalues" + occupations_and_eigenvalues = "occupations_and_eigenvalues" + return_results = "return_results" + none = "none" + + +class ErrorCorrectionProtocol(ProtoModel): + r"""Configuration for how QCEngine handles error correction + + WARNING: These protocols are currently experimental and only supported by NWChem tasks + """ + + default_policy: bool = Field( + True, description="Whether to allow error corrections to be used " "if not directly specified in `policies`" + ) + # TODO (wardlt): Consider support for common policies (e.g., 'only increase iterations') as strings (see #182) + policies: Optional[Dict[str, bool]] = Field( + None, + description="Settings that define whether specific error corrections are allowed. " + "Keys are the name of a known error and values are whether it is allowed to be used.", + ) + + def allows(self, policy: str): + if self.policies is None: + return self.default_policy + return self.policies.get(policy, self.default_policy) + + +class NativeFilesProtocolEnum(str, Enum): + r"""CMS program files to keep from a computation.""" + + all = "all" + input = "input" + none = "none" + + +class AtomicResultProtocols(ProtoModel): + r"""Protocols regarding the manipulation of computational result data.""" + + wavefunction: WavefunctionProtocolEnum = Field( + WavefunctionProtocolEnum.none, description=str(WavefunctionProtocolEnum.__doc__) + ) + stdout: bool = Field(True, description="Primary output file to keep from the computation") + error_correction: ErrorCorrectionProtocol = Field( + default_factory=ErrorCorrectionProtocol, description="Policies for error correction" + ) + native_files: NativeFilesProtocolEnum = Field( + NativeFilesProtocolEnum.none, + description="Policies for keeping processed files from the computation", + ) + + class Config: + force_skip_defaults = True + + +### Primary models + + +class AtomicInput(ProtoModel): + r"""The MolSSI Quantum Chemistry Schema""" + + id: Optional[str] = Field(None, description="The optional ID for the computation.") + schema_name: constr(strip_whitespace=True, regex="^(qc_?schema_input)$") = Field( # type: ignore + qcschema_input_default, + description=( + f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_input_default}." + ), + ) + schema_version: int = Field( + 1, + description="The version number of :attr:`~qcelemental.models.AtomicInput.schema_name` to which this model conforms.", + ) + + molecule: Molecule = Field(..., description="The molecule to use in the computation.") + driver: DriverEnum = Field(..., description=str(DriverEnum.__doc__)) + model: Model = Field(..., description=str(Model.__doc__)) + keywords: Dict[str, Any] = Field({}, description="The program-specific keywords to be used.") + protocols: AtomicResultProtocols = Field(AtomicResultProtocols(), description=str(AtomicResultProtocols.__doc__)) + + extras: Dict[str, Any] = Field( + {}, + description="Additional information to bundle with the computation. Use for schema development and scratch space.", + ) + + provenance: Provenance = Field( + default_factory=partial(provenance_stamp, __name__), description=str(Provenance.__doc__) + ) + + class Config(ProtoModel.Config): + def schema_extra(schema, model): + schema["$schema"] = qcschema_draft + + def __repr_args__(self) -> "ReprArgs": + return [ + ("driver", self.driver.value), + ("model", self.model.dict()), + ("molecule_hash", self.molecule.get_hash()[:7]), + ] + + +class AtomicResult(AtomicInput): + r"""Results from a CMS program execution.""" + + schema_name: constr(strip_whitespace=True, regex="^(qc_?schema_output)$") = Field( # type: ignore + qcschema_output_default, + description=( + f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_output_default}." + ), + ) + properties: AtomicResultProperties = Field(..., description=str(AtomicResultProperties.__doc__)) + wavefunction: Optional[WavefunctionProperties] = Field(None, description=str(WavefunctionProperties.__doc__)) + + return_result: Union[float, Array[float], Dict[str, Any]] = Field( + ..., + description="The primary return specified by the :attr:`~qcelemental.models.AtomicInput.driver` field. Scalar if energy; array if gradient or hessian; dictionary with property keys if properties.", + ) # type: ignore + + stdout: Optional[str] = Field( + None, + description="The primary logging output of the program, whether natively standard output or a file. Presence vs. absence (or null-ness?) configurable by protocol.", + ) + stderr: Optional[str] = Field(None, description="The standard error of the program execution.") + native_files: Dict[str, Any] = Field({}, description="DSL files.") + + success: bool = Field(..., description="The success of program execution. If False, other fields may be blank.") + error: Optional[ComputeError] = Field(None, description=str(ComputeError.__doc__)) + provenance: Provenance = Field(..., description=str(Provenance.__doc__)) + + @validator("schema_name", pre=True) + def _input_to_output(cls, v): + r"""If qcschema_input is passed in, cast it to output, otherwise no""" + if v.lower().strip() in [qcschema_input_default, qcschema_output_default]: + return qcschema_output_default + raise ValueError( + "Only {0} or {1} is allowed for schema_name, " + "which will be converted to {0}".format(qcschema_output_default, qcschema_input_default) + ) + + @validator("return_result") + def _validate_return_result(cls, v, values): + if values["driver"] == "gradient": + v = np.asarray(v).reshape(-1, 3) + elif values["driver"] == "hessian": + v = np.asarray(v) + nsq = int(v.size**0.5) + v.shape = (nsq, nsq) + + return v + + @validator("wavefunction", pre=True) + def _wavefunction_protocol(cls, value, values): + # We are pre, gotta do extra checks + if value is None: + return value + elif isinstance(value, dict): + wfn = value.copy() + elif isinstance(value, WavefunctionProperties): + wfn = value.dict() + else: + raise ValueError("wavefunction must be None, a dict, or a WavefunctionProperties object.") + + # Do not propagate validation errors + if "protocols" not in values: + raise ValueError("Protocols was not properly formed.") + + # Handle restricted + restricted = wfn.get("restricted", None) + if restricted is None: + raise ValueError("`restricted` is required.") + + if restricted: + for k in list(wfn.keys()): + if k.endswith("_b"): + wfn.pop(k) + + # Handle protocols + wfnp = values["protocols"].wavefunction + return_keep = None + if wfnp == "all": + pass + elif wfnp == "none": + wfn = None + elif wfnp == "return_results": + return_keep = [ + "orbitals_a", + "orbitals_b", + "density_a", + "density_b", + "fock_a", + "fock_b", + "eigenvalues_a", + "eigenvalues_b", + "occupations_a", + "occupations_b", + ] + elif wfnp == "orbitals_and_eigenvalues": + return_keep = ["orbitals_a", "orbitals_b", "eigenvalues_a", "eigenvalues_b"] + elif wfnp == "occupations_and_eigenvalues": + return_keep = ["occupations_a", "occupations_b", "eigenvalues_a", "eigenvalues_b"] + else: + raise ValueError(f"Protocol `wavefunction:{wfnp}` is not understood.") + + if return_keep is not None: + ret_wfn = {"restricted": restricted} + if "basis" in wfn: + ret_wfn["basis"] = wfn["basis"] + + for rk in return_keep: + key = wfn.get(rk, None) + if key is None: + continue + + ret_wfn[rk] = key + ret_wfn[key] = wfn[key] + + return ret_wfn + else: + return wfn + + @validator("stdout") + def _stdout_protocol(cls, value, values): + # Do not propagate validation errors + if "protocols" not in values: + raise ValueError("Protocols was not properly formed.") + + outp = values["protocols"].stdout + if outp is True: + return value + elif outp is False: + return None + else: + raise ValueError(f"Protocol `stdout:{outp}` is not understood") + + @validator("native_files") + def _native_file_protocol(cls, value, values): + ancp = values["protocols"].native_files + if ancp == "all": + return value + elif ancp == "none": + return {} + elif ancp == "input": + return_keep = ["input"] + if value is None: + files = {} + else: + files = value.copy() + else: + raise ValueError(f"Protocol `native_files:{ancp}` is not understood") + + ret = {} + for rk in return_keep: + ret[rk] = files.get(rk, None) + return ret + + +class ResultProperties(AtomicResultProperties): + """QC Result Properties Schema. + + .. deprecated:: 0.12 + Use :py:func:`qcelemental.models.AtomicResultProperties` instead. + + """ + + def __init__(self, *args, **kwargs): + from warnings import warn + + warn( + "ResultProperties has been renamed to AtomicResultProperties and will be removed as soon as v0.13.0", + DeprecationWarning, + ) + super().__init__(*args, **kwargs) + + +class ResultProtocols(AtomicResultProtocols): + """QC Result Protocols Schema. + + .. deprecated:: 0.12 + Use :py:func:`qcelemental.models.AtomicResultProtocols` instead. + + """ + + def __init__(self, *args, **kwargs): + from warnings import warn + + warn( + "ResultProtocols has been renamed to AtomicResultProtocols and will be removed as soon as v0.13.0", + DeprecationWarning, + ) + super().__init__(*args, **kwargs) + + +class ResultInput(AtomicInput): + """QC Input Schema. + + .. deprecated:: 0.12 + Use :py:func:`qcelemental.models.AtomicInput` instead. + + """ + + def __init__(self, *args, **kwargs): + from warnings import warn + + warn("ResultInput has been renamed to AtomicInput and will be removed as soon as v0.13.0", DeprecationWarning) + super().__init__(*args, **kwargs) + + +class Result(AtomicResult): + """QC Result Schema. + + .. deprecated:: 0.12 + Use :py:func:`qcelemental.models.AtomicResult` instead. + + """ + + def __init__(self, *args, **kwargs): + from warnings import warn + + warn("Result has been renamed to AtomicResult and will be removed as soon as v0.13.0", DeprecationWarning) + super().__init__(*args, **kwargs) diff --git a/qcelemental/models/types.py b/qcelemental/models/types.py new file mode 100644 index 00000000..7840482a --- /dev/null +++ b/qcelemental/models/types.py @@ -0,0 +1,40 @@ +from typing import Any, Dict + +import numpy as np + + +class TypedArray(np.ndarray): + @classmethod + def __get_validators__(cls): + yield cls.validate + + @classmethod + def validate(cls, v): + try: + v = np.asarray(v, dtype=cls._dtype) + except ValueError: + raise ValueError("Could not cast {} to NumPy Array!".format(v)) + + return v + + @classmethod + def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None: + dt = cls._dtype + if dt is int or np.issubdtype(dt, np.integer): + items = {"type": "number", "multipleOf": 1.0} + elif dt is float or np.issubdtype(dt, np.floating): + items = {"type": "number"} + elif dt is str or np.issubdtype(dt, np.bytes_): + items = {"type": "string"} + elif dt is bool or np.issubdtype(dt, np.bool_): + items = {"type": "boolean"} + field_schema.update(type="array", items=items) + + +class ArrayMeta(type): + def __getitem__(self, dtype): + return type("Array", (TypedArray,), {"_dtype": dtype}) + + +class Array(np.ndarray, metaclass=ArrayMeta): + pass From ee7f43c01f90338ceb283552d09ef179b31aa9ad Mon Sep 17 00:00:00 2001 From: Levi Naden Date: Thu, 5 Sep 2024 18:18:11 -0400 Subject: [PATCH 2/4] Levi 321 pyd v2 overhaul models --- qcelemental/models/align.py | 21 +- qcelemental/models/basemodels.py | 133 +++++++++--- qcelemental/models/basis.py | 140 ++++++------ qcelemental/models/common_models.py | 45 ++-- qcelemental/models/molecule.py | 251 +++++++++++++--------- qcelemental/models/procedures.py | 52 ++--- qcelemental/models/results.py | 322 +++++++++++++++++----------- qcelemental/models/types.py | 92 ++++++-- 8 files changed, 655 insertions(+), 401 deletions(-) diff --git a/qcelemental/models/align.py b/qcelemental/models/align.py index ca09504f..36a6c2dc 100644 --- a/qcelemental/models/align.py +++ b/qcelemental/models/align.py @@ -1,14 +1,10 @@ from typing import Optional import numpy as np - -try: - from pydantic.v1 import Field, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field, validator +from pydantic import Field, field_validator from ...util import blockwise_contract, blockwise_expand -from .basemodels import ProtoModel +from .basemodels import ExtendedConfigDict, ProtoModel from .types import Array __all__ = ["AlignmentMill"] @@ -30,19 +26,20 @@ class AlignmentMill(ProtoModel): atommap: Optional[Array[int]] = Field(None, description="Atom exchange map (nat,) for coordinates.") # type: ignore mirror: bool = Field(False, description="Do mirror invert coordinates?") - class Config: - force_skip_defaults = True + model_config = ExtendedConfigDict(force_skip_defaults=True) - @validator("shift") - def _must_be_3(cls, v, values, **kwargs): + @field_validator("shift") + @classmethod + def _must_be_3(cls, v): try: v = v.reshape(3) except (ValueError, AttributeError): raise ValueError("Shift must be castable to shape (3,)!") return v - @validator("rotation") - def _must_be_33(cls, v, values, **kwargs): + @field_validator("rotation") + @classmethod + def _must_be_33(cls, v): try: v = v.reshape(3, 3) except (ValueError, AttributeError): diff --git a/qcelemental/models/basemodels.py b/qcelemental/models/basemodels.py index 2fecef26..c82109f3 100644 --- a/qcelemental/models/basemodels.py +++ b/qcelemental/models/basemodels.py @@ -1,15 +1,11 @@ import json +import warnings from pathlib import Path -from typing import Any, Dict, Optional, Set, Union +from typing import Any, Callable, Dict, List, Optional, Set, Union import numpy as np - -try: - from pydantic.v1 import BaseSettings # remove when QCFractal merges `next` - from pydantic.v1 import BaseModel -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import BaseSettings # remove when QCFractal merges `next` - from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict, model_serializer +from pydantic_settings import BaseSettings # remove when QCFractal merges `next` from qcelemental.util import deserialize, serialize from qcelemental.util.autodocs import AutoPydanticDocGenerator # remove when QCFractal merges `next` @@ -19,14 +15,36 @@ def _repr(self) -> str: return f'{self.__repr_name__()}({self.__repr_str__(", ")})' +# Encoders, to be deprecated at some point +ndarray_encoder = {np.ndarray: lambda v: v.flatten().tolist()} + + +class ExtendedConfigDict(ConfigDict, total=False): + serialize_default_excludes: Set + """Add items to exclude from serialization""" + + serialize_skip_defaults: bool + """When serializing, ignore default values (i.e. those not set by user)""" + + force_skip_defaults: bool + """Manually force defaults to not be included in output dictionary""" + + canonical_repr: bool + """Use canonical representation of the molecules""" + + repr_style: Union[List[str], Callable] + """Representation styles""" + + class ProtoModel(BaseModel): - class Config: - allow_mutation: bool = False - extra: str = "forbid" - json_encoders: Dict[str, Any] = {np.ndarray: lambda v: v.flatten().tolist()} - serialize_default_excludes: Set = set() - serialize_skip_defaults: bool = False - force_skip_defaults: bool = False + model_config = ExtendedConfigDict( + frozen=True, + extra="forbid", + populate_by_name=True, # Allows using alias to populate + serialize_default_excludes=set(), + serialize_skip_defaults=False, + force_skip_defaults=False, + ) def __init_subclass__(cls, **kwargs) -> None: super().__init_subclass__(**kwargs) @@ -43,6 +61,11 @@ def parse_raw(cls, data: Union[bytes, str], *, encoding: Optional[str] = None) - r""" Parses raw string or bytes into a Model object. + This overwrites the deprecated parse_file of v2 Pydantic to eventually call parse_model or parse_model_json, + but is kept to preserve our own API + + May also be deprecated from QCElemental in time + Parameters ---------- data @@ -65,18 +88,24 @@ def parse_raw(cls, data: Union[bytes, str], *, encoding: Optional[str] = None) - raise TypeError("Input is neither str nor bytes, please specify an encoding.") if encoding.endswith(("json", "javascript", "pickle")): - return super().parse_raw(data, content_type=encoding) + # return super().parse_raw(data, content_type=encoding) + return cls.model_validate_json(data) elif encoding in ["msgpack-ext", "json-ext", "msgpack"]: obj = deserialize(data, encoding) else: raise TypeError(f"Content type '{encoding}' not understood.") - return cls.parse_obj(obj) + return cls.model_validate(obj) @classmethod def parse_file(cls, path: Union[str, Path], *, encoding: Optional[str] = None) -> "ProtoModel": # type: ignore r"""Parses a file into a Model object. + This overwrites the deprecated parse_file of v2 Pydantic to eventually call parse_model or parse_model_json, + but is kept to preserve our own API + + May also be deprecated from QCElemental in time + Parameters ---------- path @@ -105,16 +134,50 @@ def parse_file(cls, path: Union[str, Path], *, encoding: Optional[str] = None) - return cls.parse_raw(path.read_bytes(), encoding=encoding) def dict(self, **kwargs) -> Dict[str, Any]: + warnings.warn("The `dict` method is deprecated; use `model_dump` instead.", DeprecationWarning) + return self.model_dump(**kwargs) + + @model_serializer(mode="wrap") + def _serialize_model(self, handler) -> Dict[str, Any]: + """ + Customize the serialization output. Does duplicate with some code in model_dump, but handles the case of nested + models and any model config options. + + Encoding is handled at the `model_dump` level and not here as that should happen only after EVERYTHING has been + dumped/de-pydantic-ized. + + DEVELOPER WARNING: If default values for nested ProtoModels are not validated and are also not the expected + model (e.g. Provenance fields are dicts by default), then this function will throw an error because the self + field becomes the current value, not the model. + """ + + # Get the default return, let the model_dump handle kwarg + default_result = handler(self) + exclusion_set = self.model_config["serialize_default_excludes"] + force_skip_default = self.model_config["force_skip_defaults"] + output_dict = {} + # Could handle this with a comprehension, easier this way + for key, value in default_result.items(): + # Skip defaults on config level (skip default must be on and k has to be unset) + # Also check against exclusion set on a model_config level + if (force_skip_default and key not in self.model_fields_set) or key in exclusion_set: + continue + output_dict[key] = value + return output_dict + + def model_dump(self, **kwargs) -> Dict[str, Any]: encoding = kwargs.pop("encoding", None) - kwargs["exclude"] = ( - kwargs.get("exclude", None) or set() - ) | self.__config__.serialize_default_excludes # type: ignore - kwargs.setdefault("exclude_unset", self.__config__.serialize_skip_defaults) # type: ignore - if self.__config__.force_skip_defaults: # type: ignore - kwargs["exclude_unset"] = True + # kwargs["exclude"] = ( + # kwargs.get("exclude", None) or set() + # ) | self.model_config["serialize_default_excludes"] # type: ignore + # kwargs.setdefault("exclude_unset", self.model_config["serialize_skip_defaults"]) # type: ignore + # if self.model_config["force_skip_defaults"]: # type: ignore + # kwargs["exclude_unset"] = True - data = super().dict(**kwargs) + # Model config defaults will be handled in the @model_serializer function + # The @model_serializer function will be called AFTER this is called + data = super().model_dump(**kwargs) if encoding is None: return data @@ -168,12 +231,16 @@ def serialize( if exclude_none: kwargs["exclude_none"] = exclude_none - data = self.dict(**kwargs) + data = self.model_dump(**kwargs) return serialize(data, encoding=encoding) def json(self, **kwargs): # Alias JSON here from BaseModel to reflect dict changes + warnings.warn("The `json` method is deprecated; use `model_dump_json` instead.", DeprecationWarning) + return self.model_dump_json(**kwargs) + + def model_dump_json(self, **kwargs): return self.serialize("json", **kwargs) def compare(self, other: Union["ProtoModel", BaseModel], **kwargs) -> bool: @@ -195,6 +262,22 @@ def compare(self, other: Union["ProtoModel", BaseModel], **kwargs) -> bool: return compare_recursive(self, other, **kwargs) + @classmethod + def _merge_config_with(cls, *args, **kwargs): + """ + Helper function to merge protomodel's config with other args + + args: other ExtendedConfigDict instances or equivalent dicts + kwargs: Keys to add into the dictionary raw + """ + output_dict = {**cls.model_config} + for arg in args: # Handle other dicts first + output_dict.update(arg) + # Update any specific keywords + output_dict.update(kwargs) + # Finally, check against the Extended Config Dict + return ExtendedConfigDict(**output_dict) + # remove when QCFractal merges `next` class AutodocBaseSettings(BaseSettings): diff --git a/qcelemental/models/basis.py b/qcelemental/models/basis.py index 2a4b2c88..ca9ad843 100644 --- a/qcelemental/models/basis.py +++ b/qcelemental/models/basis.py @@ -1,17 +1,13 @@ from enum import Enum from typing import Dict, List, Optional -try: - from pydantic.v1 import ConstrainedInt, Field, constr, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import ConstrainedInt, Field, constr, validator +from pydantic import Field, constr, field_validator +from typing_extensions import Annotated from ...exceptions import ValidationError from .basemodels import ProtoModel, qcschema_draft - -class NonnegativeInt(ConstrainedInt): - ge = 0 +NonnegativeInt = Annotated[int, Field(ge=0)] class HarmonicType(str, Enum): @@ -21,42 +17,47 @@ class HarmonicType(str, Enum): cartesian = "cartesian" +def electron_shell_json_schema_extra(schema, model): + # edit to allow string storage of basis sets as BSE uses. + # alternately, could `Union[float, str]` above but that loses some validation + schema["properties"]["exponents"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} + schema["properties"]["coefficients"]["items"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} + schema["properties"]["angular_momentum"].update({"uniqueItems": True}) + + class ElectronShell(ProtoModel): """Information for a single electronic shell.""" angular_momentum: List[NonnegativeInt] = Field( - ..., description="Angular momentum for the shell as an array of integers.", min_items=1 + ..., description="Angular momentum for the shell as an array of integers.", min_length=1 ) harmonic_type: HarmonicType = Field(..., description=str(HarmonicType.__doc__)) - exponents: List[float] = Field(..., description="Exponents for the contracted shell.", min_items=1) + exponents: List[float] = Field(..., description="Exponents for the contracted shell.", min_length=1) coefficients: List[List[float]] = Field( ..., - description="General contraction coefficients for the shell; individual list components will be the individual segment contraction coefficients.", - min_items=1, + description="General contraction coefficients for the shell; " + "individual list components will be the individual segment contraction coefficients.", + min_length=1, ) - class Config(ProtoModel.Config): - def schema_extra(schema, model): - # edit to allow string storage of basis sets as BSE uses. alternately, could `Union[float, str]` above but that loses some validation - schema["properties"]["exponents"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} - schema["properties"]["coefficients"]["items"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} - schema["properties"]["angular_momentum"].update({"uniqueItems": True}) + model_config = ProtoModel._merge_config_with(json_schema_extra=electron_shell_json_schema_extra) - @validator("coefficients") - def _check_coefficient_length(cls, v, values): - len_exp = len(values["exponents"]) + @field_validator("coefficients") + @classmethod + def _check_coefficient_length(cls, v, info): + len_exp = len(info.data["exponents"]) for row in v: if len(row) != len_exp: raise ValueError("The length of coefficients does not match the length of exponents.") return v - @validator("coefficients") - def _check_general_contraction_or_fused(cls, v, values): - if len(values["angular_momentum"]) > 1: - if len(values["angular_momentum"]) != len(v): - raise ValueError("The length for a fused shell must equal the length of coefficients.") - + @field_validator("coefficients") + @classmethod + def _check_general_contraction_or_fused(cls, v, info): + angular_momentum = info.data["angular_momentum"] + if len(angular_momentum) > 1 and len(angular_momentum) != len(v): + raise ValueError("The length for a fused shell must equal the length of coefficients.") return v def nfunctions(self) -> int: @@ -94,39 +95,45 @@ class ECPType(str, Enum): spinorbit = "spinorbit" +def ecp_json_schema_extra(schema, model): + # edit to allow string storage of basis sets as BSE uses. + # alternately, could `Union[float, str]` above but that loses some validation + schema["properties"]["gaussian_exponents"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} + schema["properties"]["coefficients"]["items"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} + schema["properties"]["angular_momentum"].update({"uniqueItems": True}) + + class ECPPotential(ProtoModel): """Information for a single ECP potential.""" ecp_type: ECPType = Field(..., description=str(ECPType.__doc__)) angular_momentum: List[NonnegativeInt] = Field( - ..., description="Angular momentum for the potential as an array of integers.", min_items=1 + ..., description="Angular momentum for the potential as an array of integers.", min_length=1 ) - r_exponents: List[int] = Field(..., description="Exponents of the 'r' term.", min_items=1) - gaussian_exponents: List[float] = Field(..., description="Exponents of the 'gaussian' term.", min_items=1) + r_exponents: List[int] = Field(..., description="Exponents of the 'r' term.", min_length=1) + gaussian_exponents: List[float] = Field(..., description="Exponents of the 'gaussian' term.", min_length=1) coefficients: List[List[float]] = Field( ..., - description="General contraction coefficients for the potential; individual list components will be the individual segment contraction coefficients.", - min_items=1, + description="General contraction coefficients for the potential; " + "individual list components will be the individual segment contraction coefficients.", + min_length=1, ) - class Config(ProtoModel.Config): - def schema_extra(schema, model): - # edit to allow string storage of basis sets as BSE uses. alternately, could `Union[float, str]` above but that loses some validation - schema["properties"]["gaussian_exponents"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} - schema["properties"]["coefficients"]["items"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]} - schema["properties"]["angular_momentum"].update({"uniqueItems": True}) + model_config = ProtoModel._merge_config_with(json_schema_extra=ecp_json_schema_extra) - @validator("gaussian_exponents") - def _check_gaussian_exponents_length(cls, v, values): - len_exp = len(values["r_exponents"]) + @field_validator("gaussian_exponents") + @classmethod + def _check_gaussian_exponents_length(cls, v, info): + len_exp = len(info.data["r_exponents"]) if len(v) != len_exp: raise ValueError("The length of gaussian_exponents does not match the length of `r` exponents.") return v - @validator("coefficients") - def _check_coefficient_length(cls, v, values): - len_exp = len(values["r_exponents"]) + @field_validator("coefficients") + @classmethod + def _check_coefficient_length(cls, v, info): + len_exp = len(info.data["r_exponents"]) for row in v: if len(row) != len_exp: raise ValueError("The length of coefficients does not match the length of `r` exponents.") @@ -134,19 +141,25 @@ def _check_coefficient_length(cls, v, values): return v +def basis_center_json_schema_extras(schema, model): + schema["properties"]["electron_shells"].update({"uniqueItems": True}) + schema["properties"]["ecp_potentials"].update({"uniqueItems": True}) + + class BasisCenter(ProtoModel): """Data for a single atom/center in a basis set.""" - electron_shells: List[ElectronShell] = Field(..., description="Electronic shells for this center.", min_items=1) + electron_shells: List[ElectronShell] = Field(..., description="Electronic shells for this center.", min_length=1) ecp_electrons: int = Field(0, description="Number of electrons replaced by ECP, MCP, or other field potentials.") ecp_potentials: Optional[List[ECPPotential]] = Field( - None, description="ECPs, MCPs, or other field potentials for this center.", min_items=1 + None, description="ECPs, MCPs, or other field potentials for this center.", min_length=1 ) - class Config(ProtoModel.Config): - def schema_extra(schema, model): - schema["properties"]["electron_shells"].update({"uniqueItems": True}) - schema["properties"]["ecp_potentials"].update({"uniqueItems": True}) + model_config = ProtoModel._merge_config_with(json_schema_extra=basis_center_json_schema_extras) + + +def basis_set_json_schema_extra(schema, model): + schema["$schema"] = qcschema_draft class BasisSet(ProtoModel): @@ -154,13 +167,14 @@ class BasisSet(ProtoModel): A quantum chemistry basis description. """ - schema_name: constr(strip_whitespace=True, regex="^(qcschema_basis)$") = Field( # type: ignore + schema_name: constr(strip_whitespace=True, pattern="^(qcschema_basis)$") = Field( # type: ignore "qcschema_basis", - description=(f"The QCSchema specification to which this model conforms. Explicitly fixed as qcschema_basis."), + description=f"The QCSchema specification to which this model conforms. Explicitly fixed as qcschema_basis.", ) schema_version: int = Field( # type: ignore 1, - description="The version number of :attr:`~qcelemental.models.BasisSet.schema_name` to which this model conforms.", + description="The version number of :attr:`~qcelemental.models.BasisSet.schema_name` " + "to which this model conforms.", ) name: str = Field(..., description="The standard basis name if available (e.g., 'cc-pVDZ').") @@ -172,19 +186,20 @@ class BasisSet(ProtoModel): ..., description="Mapping of all atoms/centers in the parent molecule to centers in ``center_data``." ) - nbf: Optional[int] = Field(None, description="The number of basis functions. Use for convenience or as checksum") + nbf: Optional[int] = Field( + None, description="The number of basis functions. Use for convenience or as checksum", validate_default=True + ) - class Config(ProtoModel.Config): - def schema_extra(schema, model): - schema["$schema"] = qcschema_draft + model_config = ProtoModel._merge_config_with(json_schema_extra=basis_set_json_schema_extra) - @validator("atom_map") - def _check_atom_map(cls, v, values): + @field_validator("atom_map") + @classmethod + def _check_atom_map(cls, v, info): sv = set(v) # Center_data validation error, skipping try: - missing = sv - values["center_data"].keys() + missing = sv - info.data["center_data"].keys() except KeyError: return v @@ -193,11 +208,12 @@ def _check_atom_map(cls, v, values): return v - @validator("nbf", always=True) - def _check_nbf(cls, v, values): + @field_validator("nbf") + @classmethod + def _check_nbf(cls, v, info): # Bad construction, pass on errors try: - nbf = cls._calculate_nbf(values["atom_map"], values["center_data"]) + nbf = cls._calculate_nbf(info.data["atom_map"], info.data["center_data"]) except KeyError: return v diff --git a/qcelemental/models/common_models.py b/qcelemental/models/common_models.py index f848449d..da63ba15 100644 --- a/qcelemental/models/common_models.py +++ b/qcelemental/models/common_models.py @@ -1,25 +1,18 @@ from enum import Enum -from typing import TYPE_CHECKING, Any, Dict, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Tuple, Union import numpy as np - -try: - from pydantic.v1 import Field -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field +from pydantic import Field from .basemodels import ProtoModel, qcschema_draft from .basis import BasisSet if TYPE_CHECKING: - try: - from pydantic.v1.typing import ReprArgs - except ImportError: # Will also trap ModuleNotFoundError - from pydantic.typing import ReprArgs + ReprArgs = Sequence[Tuple[Optional[str], Any]] -# Encoders, to be deprecated -ndarray_encoder = {np.ndarray: lambda v: v.flatten().tolist()} +def provenance_json_schema_extra(schema, model): + schema["$schema"] = qcschema_draft class Provenance(ProtoModel): @@ -28,16 +21,14 @@ class Provenance(ProtoModel): creator: str = Field(..., description="The name of the program, library, or person who created the object.") version: str = Field( "", - description="The version of the creator, blank otherwise. This should be sortable by the very broad `PEP 440 `_.", + description="The version of the creator, blank otherwise. " + "This should be sortable by the very broad `PEP 440 `_.", ) routine: str = Field("", description="The name of the routine or function within the creator, blank otherwise.") - class Config(ProtoModel.Config): - canonical_repr = True - extra: str = "allow" - - def schema_extra(schema, model): - schema["$schema"] = qcschema_draft + model_config = ProtoModel._merge_config_with( + canonical_repr=True, json_schema_extra=provenance_json_schema_extra, extra="allow" + ) class Model(ProtoModel): @@ -55,10 +46,7 @@ class Model(ProtoModel): ) # basis_spec: BasisSpec = None # This should be exclusive with basis, but for now will be omitted - - class Config(ProtoModel.Config): - canonical_repr = True - extra: str = "allow" + model_config = ProtoModel._merge_config_with(canonical_repr=True, extra="allow") class DriverEnum(str, Enum): @@ -82,7 +70,8 @@ class ComputeError(ProtoModel): error_type: str = Field( # type: ignore ..., # Error enumeration not yet strict - description="The type of error which was thrown. Restrict this field to short classifiers e.g. 'input_error'. Suggested classifiers: https://github.com/MolSSI/QCEngine/blob/master/qcengine/exceptions.py", + description="The type of error which was thrown. Restrict this field to short classifiers e.g. 'input_error'. " + "Suggested classifiers: https://github.com/MolSSI/QCEngine/blob/master/qcengine/exceptions.py", ) error_message: str = Field( # type: ignore ..., @@ -94,15 +83,17 @@ class ComputeError(ProtoModel): description="Additional information to bundle with the error.", ) - class Config: - repr_style = ["error_type", "error_message"] + model_config = ProtoModel._merge_config_with(repr_style=["error_type", "error_message"]) def __repr_args__(self) -> "ReprArgs": return [("error_type", self.error_type), ("error_message", self.error_message)] class FailedOperation(ProtoModel): - """Record indicating that a given operation (program, procedure, etc.) has failed and containing the reason and input data which generated the failure.""" + """ + Record indicating that a given operation (program, procedure, etc.) has failed + and containing the reason and input data which generated the failure. + """ id: str = Field( # type: ignore None, diff --git a/qcelemental/models/molecule.py b/qcelemental/models/molecule.py index d2261f63..e7c40403 100644 --- a/qcelemental/models/molecule.py +++ b/qcelemental/models/molecule.py @@ -10,11 +10,8 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast import numpy as np - -try: - from pydantic.v1 import ConstrainedFloat, ConstrainedInt, Field, constr, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import ConstrainedFloat, ConstrainedInt, Field, constr, validator +from pydantic import Field, constr, field_validator, model_serializer +from typing_extensions import Annotated # molparse imports separated b/c https://github.com/python/mypy/issues/7203 from ...molparse.from_arrays import from_arrays @@ -31,10 +28,7 @@ from .types import Array if TYPE_CHECKING: - try: - from pydantic.v1.typing import ReprArgs - except ImportError: # Will also trap ModuleNotFoundError - from pydantic.typing import ReprArgs + from .common_models import ReprArgs # Rounding quantities for hashing GEOMETRY_NOISE = 8 @@ -71,13 +65,8 @@ def float_prep(array, around): return array -class NonnegativeInt(ConstrainedInt): - ge = 0 - - -class BondOrderFloat(ConstrainedFloat): - ge = 0 - le = 5 +NonnegativeInt = Annotated[int, Field(ge=0)] +BondOrderFloat = Annotated[float, Field(ge=0, le=5)] class Identifiers(ProtoModel): @@ -97,8 +86,12 @@ class Identifiers(ProtoModel): pubchem_sid: Optional[str] = Field(None, description="PubChem Substance ID") pubchem_conformerid: Optional[str] = Field(None, description="PubChem Conformer ID") - class Config(ProtoModel.Config): - serialize_skip_defaults = True + model_config = ProtoModel._merge_config_with(serialize_skip_defaults=True) + + +def molecule_json_schema_extras(schema, model): + # below addresses the draft-04 issue until https://github.com/samuelcolvin/pydantic/issues/1478 . + schema["$schema"] = qcschema_draft class Molecule(ProtoModel): @@ -120,7 +113,7 @@ class Molecule(ProtoModel): """ - schema_name: constr(strip_whitespace=True, regex="^(qcschema_molecule)$") = Field( # type: ignore + schema_name: constr(strip_whitespace=True, pattern="^(qcschema_molecule)$") = Field( # type: ignore qcschema_molecule_default, description=( f"The QCSchema specification to which this model conforms. Explicitly fixed as {qcschema_molecule_default}." @@ -146,7 +139,7 @@ class Molecule(ProtoModel): "sets atomic order for all other per-atom fields like :attr:`~qcelemental.models.Molecule.real` and the first dimension of " ":attr:`~qcelemental.models.Molecule.geometry`. Ghost/virtual atoms must have an entry here in :attr:`~qcelemental.models.Molecule.symbols`; ghostedness is " "indicated through the :attr:`~qcelemental.models.Molecule.real` field.", - shape=["nat"], + json_schema_extra={"shape": ["nat"]}, ) geometry: Array[float] = Field( # type: ignore ..., @@ -160,19 +153,21 @@ class Molecule(ProtoModel): "QCElemental can also accept array-likes which can be mapped to (nat,3) such as a 1-D list of length 3*nat, " "or the serialized version of the array in (3*nat,) shape; all forms will be reshaped to " "(nat,3) for this attribute.", - shape=["nat", 3], - units="a0", + json_schema_extra={"shape": ["nat", 3], "units": "a0"}, ) # Molecule data name: Optional[str] = Field( # type: ignore None, - description="Common or human-readable name to assign to this molecule. This field can be arbitrary; see :attr:`~qcelemental.models.Molecule.identifiers` for well-defined labels.", + description="Common or human-readable name to assign to this molecule. " + "This field can be arbitrary; see :attr:`~qcelemental.models.Molecule.identifiers` " + "for well-defined labels.", ) identifiers: Optional[Identifiers] = Field( # type: ignore None, description="An optional dictionary of additional identifiers by which this molecule can be referenced, " - "such as INCHI, canonical SMILES, etc. See the :class:`~qcelemental.models.results.Identifiers` model for more details.", + "such as INCHI, canonical SMILES, etc. See the :class:`~qcelemental.models.results.Identifiers` " + "model for more details.", ) comment: Optional[str] = Field( # type: ignore None, @@ -184,79 +179,114 @@ class Molecule(ProtoModel): # Atom data masses_: Optional[Array[float]] = Field( # type: ignore None, - description="The ordered array of atomic masses. Index order " - "matches the 0-indexed indices of all other per-atom fields like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. If " - "this is not provided, the mass of each atom is inferred from its most common isotope. If this " - "is provided, it must be the same length as :attr:`~qcelemental.models.Molecule.symbols` but can accept ``None`` entries for " - "standard masses to infer from the same index in the :attr:`~qcelemental.models.Molecule.symbols` field.", - shape=["nat"], - units="u", + description="The ordered array of atomic masses. Index order matches the 0-indexed indices of all other " + "per-atom fields like :attr:`~qcelemental.models.Molecule.symbols` " + "and :attr:`~qcelemental.models.Molecule.real`. " + "If this is not provided, the mass of each atom is inferred from its most common isotope. " + "If this is provided, it must be the same length as :attr:`~qcelemental.models.Molecule.symbols` " + "but can accept ``None`` entries for standard masses to infer from the same index in the " + ":attr:`~qcelemental.models.Molecule.symbols` field.", + alias="masses", + json_schema_extra={"shape": ["nat"], "units": "u"}, ) real_: Optional[Array[bool]] = Field( # type: ignore None, - description="The ordered array indicating if each atom is real (``True``) or " - "ghost/virtual (``False``). Index " - "matches the 0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and the first " - "dimension of :attr:`~qcelemental.models.Molecule.geometry`. If this is not provided, all atoms are assumed to be real (``True``)." + description="The ordered array indicating if each atom is real (``True``) or ghost/virtual (``False``). " + "Index matches the 0-indexed indices of all other per-atom settings like " + ":attr:`~qcelemental.models.Molecule.symbols` and the first dimension of " + ":attr:`~qcelemental.models.Molecule.geometry`. " + "If this is not provided, all atoms are assumed to be real (``True``). " "If this is provided, the reality or ghostedness of every atom must be specified.", - shape=["nat"], + alias="real", + json_schema_extra={ + "shape": ["nat"], + }, ) atom_labels_: Optional[Array[str]] = Field( # type: ignore None, - description="Additional per-atom labels as an array of strings. Typical use is in " - "model conversions, such as Elemental <-> Molpro and not typically something which should be user " - "assigned. See the :attr:`~qcelemental.models.Molecule.comment` field for general human-consumable text to affix to the molecule.", - shape=["nat"], + description="Additional per-atom labels as an array of strings. Typical use is in model conversions, " + "such as Elemental <-> Molpro and not typically something which should be user assigned. " + "See the :attr:`~qcelemental.models.Molecule.comment` field for general human-consumable " + "text to affix to the molecule.", + alias="atom_labels", + json_schema_extra={ + "shape": ["nat"], + }, ) atomic_numbers_: Optional[Array[np.int16]] = Field( # type: ignore None, - description="An optional ordered 1-D array-like object of atomic numbers of shape (nat,). Index " - "matches the 0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. " - "Values are inferred from the :attr:`~qcelemental.models.Molecule.symbols` list if not explicitly set. " - "Ghostedness should be indicated through :attr:`~qcelemental.models.Molecule.real` field, not zeros here.", - shape=["nat"], + description="An optional ordered 1-D array-like object of atomic numbers of shape (nat,). Index matches the " + "0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols`" + " and :attr:`~qcelemental.models.Molecule.real`. Values are inferred from the " + ":attr:`~qcelemental.models.Molecule.symbols` list if not explicitly set. Ghostedness should be " + "indicated through :attr:`~qcelemental.models.Molecule.real` field, not zeros here.", + alias="atomic_numbers", + json_schema_extra={ + "shape": ["nat"], + }, ) mass_numbers_: Optional[Array[np.int16]] = Field( # type: ignore None, - description="An optional ordered 1-D array-like object of atomic *mass* numbers of shape (nat). Index " - "matches the 0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. " - "Values are inferred from the most common isotopes of the :attr:`~qcelemental.models.Molecule.symbols` list if not explicitly set. " + description="An optional ordered 1-D array-like object of atomic *mass* numbers of shape (nat). " + "Index matches the 0-indexed indices of all other per-atom settings like " + ":attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. " + "Values are inferred from the most common isotopes of the " + ":attr:`~qcelemental.models.Molecule.symbols` list if not explicitly set. " "If single isotope not (yet) known for an atom, -1 is placeholder.", - shape=["nat"], + alias="mass_numbers", + json_schema_extra={ + "shape": ["nat"], + }, ) # Fragment and connection data connectivity_: Optional[List[Tuple[NonnegativeInt, NonnegativeInt, BondOrderFloat]]] = Field( # type: ignore None, - description="A list of bonds within the molecule. Each entry is a tuple " - "of ``(atom_index_A, atom_index_B, bond_order)`` where the ``atom_index`` " - "matches the 0-indexed indices of all other per-atom settings like :attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. " + description="A list of bonds within the molecule. " + "Each entry is a tuple of ``(atom_index_A, atom_index_B, bond_order)`` where the ``atom_index`` " + "matches the 0-indexed indices of all other per-atom settings like " + ":attr:`~qcelemental.models.Molecule.symbols` and :attr:`~qcelemental.models.Molecule.real`. " "Bonds may be freely reordered and inverted.", - min_items=1, + alias="connectivity", + min_length=1, ) fragments_: Optional[List[Array[np.int32]]] = Field( # type: ignore None, description="List of indices grouping atoms (0-indexed) into molecular fragments within the molecule. " - "Each entry in the outer list is a new fragment; index matches the ordering in :attr:`~qcelemental.models.Molecule.fragment_charges` and " - ":attr:`~qcelemental.models.Molecule.fragment_multiplicities`. Inner lists are 0-indexed atoms which compose the fragment; every atom must " - "be in exactly one inner list. Noncontiguous fragments are allowed, though no QM program is known to support them. " - "Fragment ordering is fixed; that is, a consumer who shuffles fragments must not reattach the input " - "(pre-shuffling) molecule schema instance to any output (post-shuffling) per-fragment results (e.g., n-body energy arrays).", - shape=["nfr", ""], + "Each entry in the outer list is a new fragment; index matches the ordering in " + ":attr:`~qcelemental.models.Molecule.fragment_charges` and " + ":attr:`~qcelemental.models.Molecule.fragment_multiplicities`. " + "Inner lists are 0-indexed atoms which compose the fragment; every atom must be in exactly one " + "inner list. Noncontiguous fragments are allowed, though no QM program is known to support them. " + "Fragment ordering is fixed; that is, a consumer who shuffles fragments must not reattach the " + "input (pre-shuffling) molecule schema instance to any output (post-shuffling) per-fragment " + "results (e.g., n-body energy arrays).", + alias="fragments", + json_schema_extra={ + "shape": ["nfr", ""], + }, ) fragment_charges_: Optional[List[float]] = Field( # type: ignore None, - description="The total charge of each fragment in the :attr:`~qcelemental.models.Molecule.fragments` list. The index of this " - "list matches the 0-index indices of :attr:`~qcelemental.models.Molecule.fragments` list. Will be filled in based on a set of rules " - "if not provided (and :attr:`~qcelemental.models.Molecule.fragments` are specified).", - shape=["nfr"], + description="The total charge of each fragment in the :attr:`~qcelemental.models.Molecule.fragments` list. " + "The index of this list matches the 0-index indices of " + ":attr:`~qcelemental.models.Molecule.fragments` list. Will be filled in based on a set of rules if " + "not provided (and :attr:`~qcelemental.models.Molecule.fragments` are specified).", + alias="fragment_charges", + json_schema_extra={ + "shape": ["nfr"], + }, ) fragment_multiplicities_: Optional[List[int]] = Field( # type: ignore None, - description="The multiplicity of each fragment in the :attr:`~qcelemental.models.Molecule.fragments` list. The index of this " - "list matches the 0-index indices of :attr:`~qcelemental.models.Molecule.fragments` list. Will be filled in based on a set of " - "rules if not provided (and :attr:`~qcelemental.models.Molecule.fragments` are specified).", - shape=["nfr"], + description="The multiplicity of each fragment in the :attr:`~qcelemental.models.Molecule.fragments` list. " + "The index of this list matches the 0-index indices of " + ":attr:`~qcelemental.models.Molecule.fragments` list. Will be filled in based on a set of rules if " + "not provided (and :attr:`~qcelemental.models.Molecule.fragments` are specified).", + alias="fragment_multiplicities", + json_schema_extra={ + "shape": ["nfr"], + }, ) # Orientation @@ -289,6 +319,7 @@ class Molecule(ProtoModel): default_factory=partial(provenance_stamp, __name__), description="The provenance information about how this Molecule (and its attributes) were generated, " "provided, and manipulated.", + validate_default=True, # Force casting provenance to dict ) id: Optional[Any] = Field( # type: ignore None, @@ -301,28 +332,16 @@ class Molecule(ProtoModel): description="Additional information to bundle with the molecule. Use for schema development and scratch space.", ) - class Config(ProtoModel.Config): - serialize_skip_defaults = True - repr_style = lambda self: [ + model_config = ProtoModel._merge_config_with( + serialize_skip_defaults=True, + json_schema_extra=molecule_json_schema_extras, + repr_style=lambda self: [ ("name", self.name), ("formula", self.get_molecular_formula()), ("hash", self.get_hash()[:7]), - ] - fields = { - "masses_": "masses", - "real_": "real", - "atom_labels_": "atom_labels", - "atomic_numbers_": "atomic_numbers", - "mass_numbers_": "mass_numbers", - "connectivity_": "connectivity", - "fragments_": "fragments", - "fragment_charges_": "fragment_charges", - "fragment_multiplicities_": "fragment_multiplicities", - } - - def schema_extra(schema, model): - # below addresses the draft-04 issue until https://github.com/samuelcolvin/pydantic/issues/1478 . - schema["$schema"] = qcschema_draft + ], + ) + # Alias fields are handled with the Field objects above def __init__(self, orient: bool = False, validate: Optional[bool] = None, **kwargs: Any) -> None: r"""Initializes the molecule object from dictionary-like values. @@ -377,34 +396,38 @@ def __init__(self, orient: bool = False, validate: Optional[bool] = None, **kwar elif validate or geometry_prep: values["geometry"] = float_prep(values["geometry"], geometry_noise) - @validator("geometry") - def _must_be_3n(cls, v, values, **kwargs): - n = len(values["symbols"]) + @field_validator("geometry") + @classmethod + def _must_be_3n(cls, v, info): + n = len(info.data["symbols"]) try: v = v.reshape(n, 3) except (ValueError, AttributeError): raise ValueError("Geometry must be castable to shape (N,3)!") return v - @validator("masses_", "real_") - def _must_be_n(cls, v, values, **kwargs): - n = len(values["symbols"]) + @field_validator("masses_", "real_") + @classmethod + def _must_be_n(cls, v, info): + n = len(info.data["symbols"]) if len(v) != n: raise ValueError("Masses and Real must be same number of entries as Symbols") return v - @validator("real_") - def _populate_real(cls, v, values, **kwargs): + @field_validator("real_") + @classmethod + def _populate_real(cls, v, info): # Can't use geometry here since its already been validated and not in values - n = len(values["symbols"]) + n = len(info.data["symbols"]) if len(v) == 0: v = np.array([True for _ in range(n)]) return v - @validator("fragment_charges_", "fragment_multiplicities_") - def _must_be_n_frag(cls, v, values, **kwargs): - if "fragments_" in values and values["fragments_"] is not None: - n = len(values["fragments_"]) + @field_validator("fragment_charges_", "fragment_multiplicities_") + @classmethod + def _must_be_n_frag(cls, v, info): + if "fragments_" in info.data and info.data["fragments_"] is not None: + n = len(info.data["fragments_"]) if len(v) != n: raise ValueError( "Fragment Charges and Fragment Multiplicities must be same number of entries as Fragments" @@ -545,7 +568,7 @@ def orient_molecule(self): r""" Centers the molecule and orients via inertia tensor before returning a new Molecule """ - return Molecule(orient=True, **self.dict()) + return Molecule(orient=True, **self.model_dump()) def compare(self, other): warnings.warn( @@ -569,10 +592,24 @@ def __eq__(self, other): return self.get_hash() == other.get_hash() - def dict(self, *args, **kwargs): - kwargs["by_alias"] = True - kwargs["exclude_unset"] = True - return super().dict(*args, **kwargs) + def dict(self, **kwargs): + warnings.warn("The `dict` method is deprecated; use `model_dump` instead.", DeprecationWarning) + return self.model_dump(**kwargs) + + @model_serializer(mode="wrap") + def _serialize_molecule(self, handler) -> Dict[str, Any]: + default_result = handler(self) + output_dict = {} + for key, value in default_result.items(): + # Could do this as a single comprehension dict, but this is easier to read + # Handle exclude unset is always true + if key not in self.model_fields_set: + continue + # Handle "by_alias" is always true + alias = self.model_fields[key].alias + output_key = alias if alias is not None else key + output_dict[output_key] = value + return output_dict def pretty_print(self): r"""Print the molecule in Angstroms. Same as :py:func:`print_out` only always in Angstroms. @@ -759,7 +796,7 @@ def to_string( # type: ignore Suggest psi4 --> psi4frag and psi4 route to to_string """ - molrec = from_schema(self.dict(), nonphysical=True) + molrec = from_schema(self.model_dump(), nonphysical=True) return to_string( molrec, dtype=dtype, @@ -1259,7 +1296,7 @@ def align( "atomic_numbers": solution.align_atoms(concern_mol.atomic_numbers), "mass_numbers": solution.align_atoms(concern_mol.mass_numbers), } - adict = {**concern_mol.dict(), **aupdate} + adict = {**concern_mol.model_dump(), **aupdate} # preserve intrinsic symmetry with lighter truncation amol = Molecule(validate=True, **adict, geometry_noise=13) @@ -1383,7 +1420,7 @@ def scramble( "atomic_numbers": perturbation.align_atoms(ref_mol.atomic_numbers), "mass_numbers": perturbation.align_atoms(ref_mol.mass_numbers), } - cdict = {**ref_mol.dict(), **cupdate} + cdict = {**ref_mol.model_dump(), **cupdate} # preserve intrinsic symmetry with lighter truncation cmol = Molecule(validate=True, **cdict, geometry_noise=13) diff --git a/qcelemental/models/procedures.py b/qcelemental/models/procedures.py index 90f3c7cf..2b7ecb86 100644 --- a/qcelemental/models/procedures.py +++ b/qcelemental/models/procedures.py @@ -1,13 +1,10 @@ from enum import Enum from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple -try: - from pydantic.v1 import Field, conlist, constr, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field, conlist, constr, validator +from pydantic import Field, conlist, constr, field_validator from ...util import provenance_stamp -from .basemodels import ProtoModel +from .basemodels import ExtendedConfigDict, ProtoModel from .common_models import ( ComputeError, DriverEnum, @@ -23,10 +20,7 @@ from .results import AtomicResult if TYPE_CHECKING: - try: - from pydantic.v1.typing import ReprArgs - except ImportError: # Will also trap ModuleNotFoundError - from pydantic.typing import ReprArgs + from .common_models import ReprArgs class TrajectoryProtocolEnum(str, Enum): @@ -49,8 +43,7 @@ class OptimizationProtocols(ProtoModel): TrajectoryProtocolEnum.all, description=str(TrajectoryProtocolEnum.__doc__) ) - class Config: - force_skip_defaults = True + model_config = ExtendedConfigDict(force_skip_defaults=True) class QCInputSpecification(ProtoModel): @@ -58,7 +51,7 @@ class QCInputSpecification(ProtoModel): A compute description for energy, gradient, and Hessian computations used in a geometry optimization. """ - schema_name: constr(strip_whitespace=True, regex=qcschema_input_default) = qcschema_input_default # type: ignore + schema_name: constr(strip_whitespace=True, pattern=qcschema_input_default) = qcschema_input_default # type: ignore schema_version: int = 1 driver: DriverEnum = Field(DriverEnum.gradient, description=str(DriverEnum.__doc__)) @@ -75,7 +68,7 @@ class OptimizationInput(ProtoModel): id: Optional[str] = None hash_index: Optional[str] = None schema_name: constr( # type: ignore - strip_whitespace=True, regex=qcschema_optimization_input_default + strip_whitespace=True, pattern=qcschema_optimization_input_default ) = qcschema_optimization_input_default schema_version: int = 1 @@ -90,14 +83,14 @@ class OptimizationInput(ProtoModel): def __repr_args__(self) -> "ReprArgs": return [ - ("model", self.input_specification.model.dict()), + ("model", self.input_specification.model.model_dump()), ("molecule_hash", self.initial_molecule.get_hash()[:7]), ] class OptimizationResult(OptimizationInput): schema_name: constr( # type: ignore - strip_whitespace=True, regex=qcschema_optimization_output_default + strip_whitespace=True, pattern=qcschema_optimization_output_default ) = qcschema_optimization_output_default final_molecule: Optional[Molecule] = Field(..., description="The final molecule of the geometry optimization.") @@ -115,13 +108,14 @@ class OptimizationResult(OptimizationInput): error: Optional[ComputeError] = Field(None, description=str(ComputeError.__doc__)) provenance: Provenance = Field(..., description=str(Provenance.__doc__)) - @validator("trajectory", each_item=False) - def _trajectory_protocol(cls, v, values): + @field_validator("trajectory") + @classmethod + def _trajectory_protocol(cls, v, info): # Do not propogate validation errors - if "protocols" not in values: + if "protocols" not in info.data: raise ValueError("Protocols was not properly formed.") - keep_enum = values["protocols"].trajectory + keep_enum = info.data["protocols"].trajectory if keep_enum == "all": pass elif keep_enum == "initial_and_final": @@ -148,14 +142,17 @@ class OptimizationSpecification(ProtoModel): * This class is still provisional and may be subject to removal and re-design. """ - schema_name: constr(strip_whitespace=True, regex="qcschema_optimization_specification") = "qcschema_optimization_specification" # type: ignore + schema_name: constr( + strip_whitespace=True, pattern="qcschema_optimization_specification" + ) = "qcschema_optimization_specification" # type: ignore schema_version: int = 1 procedure: str = Field(..., description="Optimization procedure to run the optimization with.") keywords: Dict[str, Any] = Field({}, description="The optimization specific keywords to be used.") protocols: OptimizationProtocols = Field(OptimizationProtocols(), description=str(OptimizationProtocols.__doc__)) - @validator("procedure") + @field_validator("procedure") + @classmethod def _check_procedure(cls, v): return v.lower() @@ -205,14 +202,16 @@ class TorsionDriveInput(ProtoModel): * This class is still provisional and may be subject to removal and re-design. """ - schema_name: constr(strip_whitespace=True, regex=qcschema_torsion_drive_input_default) = qcschema_torsion_drive_input_default # type: ignore + schema_name: constr( + strip_whitespace=True, pattern=qcschema_torsion_drive_input_default + ) = qcschema_torsion_drive_input_default # type: ignore schema_version: int = 1 keywords: TDKeywords = Field(..., description="The torsion drive specific keywords to be used.") extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.") input_specification: QCInputSpecification = Field(..., description=str(QCInputSpecification.__doc__)) - initial_molecule: conlist(item_type=Molecule, min_items=1) = Field( + initial_molecule: conlist(item_type=Molecule, min_length=1) = Field( ..., description="The starting molecule(s) for the torsion drive." ) @@ -222,7 +221,8 @@ class TorsionDriveInput(ProtoModel): provenance: Provenance = Field(Provenance(**provenance_stamp(__name__)), description=str(Provenance.__doc__)) - @validator("input_specification") + @field_validator("input_specification") + @classmethod def _check_input_specification(cls, value): assert value.driver == DriverEnum.gradient, "driver must be set to gradient" return value @@ -236,7 +236,9 @@ class TorsionDriveResult(TorsionDriveInput): * This class is still provisional and may be subject to removal and re-design. """ - schema_name: constr(strip_whitespace=True, regex=qcschema_torsion_drive_output_default) = qcschema_torsion_drive_output_default # type: ignore + schema_name: constr( + strip_whitespace=True, pattern=qcschema_torsion_drive_output_default + ) = qcschema_torsion_drive_output_default # type: ignore schema_version: int = 1 final_energies: Dict[str, float] = Field( diff --git a/qcelemental/models/results.py b/qcelemental/models/results.py index 44140729..ea0b6fcf 100644 --- a/qcelemental/models/results.py +++ b/qcelemental/models/results.py @@ -3,24 +3,17 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Set, Union import numpy as np - -try: - from pydantic.v1 import Field, constr, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field, constr, validator +from pydantic import Field, constr, field_validator from ...util import provenance_stamp -from .basemodels import ProtoModel, qcschema_draft +from .basemodels import ExtendedConfigDict, ProtoModel, qcschema_draft from .basis import BasisSet from .common_models import ComputeError, DriverEnum, Model, Provenance, qcschema_input_default, qcschema_output_default from .molecule import Molecule from .types import Array if TYPE_CHECKING: - try: - from pydantic.v1.typing import ReprArgs - except ImportError: # Will also trap ModuleNotFoundError - from pydantic.typing import ReprArgs + from .common_models import ReprArgs class AtomicResultProperties(ProtoModel): @@ -49,65 +42,64 @@ class AtomicResultProperties(ProtoModel): return_gradient: Optional[Array[float]] = Field( None, description=f"The gradient of the requested method, identical to :attr:`~qcelemental.models.AtomicResult.return_result` for :attr:`~qcelemental.models.AtomicInput.driver`\\ =\\ :attr:`~qcelemental.models.DriverEnum.gradient` computations.", - units="E_h/a0", + json_schema_extra={"units": "E_h/a0"}, ) return_hessian: Optional[Array[float]] = Field( None, description=f"The Hessian of the requested method, identical to :attr:`~qcelemental.models.AtomicResult.return_result` for :attr:`~qcelemental.models.AtomicInput.driver`\\ =\\ :attr:`~qcelemental.models.DriverEnum.hessian` computations.", - units="E_h/a0^2", + json_schema_extra={"units": "E_h/a0^2"}, ) # SCF Keywords scf_one_electron_energy: Optional[float] = Field( None, description="The one-electron (core Hamiltonian) energy contribution to the total SCF energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) scf_two_electron_energy: Optional[float] = Field( None, description="The two-electron energy contribution to the total SCF energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) scf_vv10_energy: Optional[float] = Field( None, description="The VV10 functional energy contribution to the total SCF energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) scf_xc_energy: Optional[float] = Field( None, description="The functional (XC) energy contribution to the total SCF energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) scf_dispersion_correction_energy: Optional[float] = Field( None, description="The dispersion correction appended to an underlying functional when a DFT-D method is requested.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) scf_dipole_moment: Optional[Array[float]] = Field( None, description="The SCF X, Y, and Z dipole components", - units="e a0", + json_schema_extra={"units": "e a0"}, ) scf_quadrupole_moment: Optional[Array[float]] = Field( None, description="The quadrupole components (redundant; 6 unique).", - shape=[3, 3], - units="e a0^2", + json_schema_extra={"units": "e a0^2", "shape": [3, 3]}, ) scf_total_energy: Optional[float] = Field( None, description="The total electronic energy of the SCF stage of the calculation.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) scf_total_gradient: Optional[Array[float]] = Field( None, description="The total electronic gradient of the SCF stage of the calculation.", - units="E_h/a0", + json_schema_extra={"units": "E_h/a0"}, ) scf_total_hessian: Optional[Array[float]] = Field( None, description="The total electronic Hessian of the SCF stage of the calculation.", - units="E_h/a0^2", + json_schema_extra={"units": "E_h/a0^2"}, ) scf_iterations: Optional[int] = Field(None, description="The number of SCF iterations taken before convergence.") @@ -115,76 +107,74 @@ class AtomicResultProperties(ProtoModel): mp2_same_spin_correlation_energy: Optional[float] = Field( None, description="The portion of MP2 doubles correlation energy from same-spin (i.e. triplet) correlations, without any user scaling.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) mp2_opposite_spin_correlation_energy: Optional[float] = Field( None, description="The portion of MP2 doubles correlation energy from opposite-spin (i.e. singlet) correlations, without any user scaling.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) mp2_singles_energy: Optional[float] = Field( None, description="The singles portion of the MP2 correlation energy. Zero except in ROHF.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) mp2_doubles_energy: Optional[float] = Field( None, description="The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) mp2_correlation_energy: Optional[float] = Field( None, description="The MP2 correlation energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) mp2_total_energy: Optional[float] = Field( None, description="The total MP2 energy (MP2 correlation energy + HF energy).", - units="E_h", + json_schema_extra={"units": "E_h"}, ) mp2_dipole_moment: Optional[Array[float]] = Field( None, description="The MP2 X, Y, and Z dipole components.", - shape=[3], - units="e a0", + json_schema_extra={"shape": [3], "units": "e a0"}, ) # CCSD Keywords ccsd_same_spin_correlation_energy: Optional[float] = Field( None, description="The portion of CCSD doubles correlation energy from same-spin (i.e. triplet) correlations, without any user scaling.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsd_opposite_spin_correlation_energy: Optional[float] = Field( None, description="The portion of CCSD doubles correlation energy from opposite-spin (i.e. singlet) correlations, without any user scaling.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsd_singles_energy: Optional[float] = Field( None, description="The singles portion of the CCSD correlation energy. Zero except in ROHF.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsd_doubles_energy: Optional[float] = Field( None, description="The doubles portion of the CCSD correlation energy including same-spin and opposite-spin correlations.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsd_correlation_energy: Optional[float] = Field( None, description="The CCSD correlation energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsd_total_energy: Optional[float] = Field( None, description="The total CCSD energy (CCSD correlation energy + HF energy).", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsd_dipole_moment: Optional[Array[float]] = Field( None, description="The CCSD X, Y, and Z dipole components.", - shape=[3], - units="e a0", + json_schema_extra={"shape": [3], "units": "e a0"}, ) ccsd_iterations: Optional[int] = Field(None, description="The number of CCSD iterations taken before convergence.") @@ -192,36 +182,34 @@ class AtomicResultProperties(ProtoModel): ccsd_prt_pr_correlation_energy: Optional[float] = Field( None, description="The CCSD(T) correlation energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsd_prt_pr_total_energy: Optional[float] = Field( None, description="The total CCSD(T) energy (CCSD(T) correlation energy + HF energy).", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsd_prt_pr_dipole_moment: Optional[Array[float]] = Field( None, description="The CCSD(T) X, Y, and Z dipole components.", - shape=[3], - units="e a0", + json_schema_extra={"shape": [3], "units": "e a0"}, ) # CCSDT keywords ccsdt_correlation_energy: Optional[float] = Field( None, description="The CCSDT correlation energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsdt_total_energy: Optional[float] = Field( None, description="The total CCSDT energy (CCSDT correlation energy + HF energy).", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsdt_dipole_moment: Optional[Array[float]] = Field( None, description="The CCSDT X, Y, and Z dipole components.", - shape=[3], - units="e a0", + json_schema_extra={"shape": [3], "units": "e a0"}, ) ccsdt_iterations: Optional[int] = Field( None, description="The number of CCSDT iterations taken before convergence." @@ -231,65 +219,65 @@ class AtomicResultProperties(ProtoModel): ccsdtq_correlation_energy: Optional[float] = Field( None, description="The CCSDTQ correlation energy.", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsdtq_total_energy: Optional[float] = Field( None, description="The total CCSDTQ energy (CCSDTQ correlation energy + HF energy).", - units="E_h", + json_schema_extra={"units": "E_h"}, ) ccsdtq_dipole_moment: Optional[Array[float]] = Field( None, description="The CCSDTQ X, Y, and Z dipole components.", - shape=[3], - units="e a0", + json_schema_extra={"shape": [3], "units": "e a0"}, ) ccsdtq_iterations: Optional[int] = Field( None, description="The number of CCSDTQ iterations taken before convergence." ) - class Config(ProtoModel.Config): - force_skip_defaults = True + model_config = ProtoModel._merge_config_with(force_skip_defaults=True) def __repr_args__(self) -> "ReprArgs": return [(k, v) for k, v in self.dict().items()] - @validator( + @field_validator( "scf_dipole_moment", "mp2_dipole_moment", "ccsd_dipole_moment", "ccsd_prt_pr_dipole_moment", "scf_quadrupole_moment", ) - def _validate_poles(cls, v, values, field): + @classmethod + def _validate_poles(cls, v, info): if v is None: return v - if field.name.endswith("_dipole_moment"): + if info.field_name.endswith("_dipole_moment"): order = 1 - elif field.name.endswith("_quadrupole_moment"): + elif info.field_name.endswith("_quadrupole_moment"): order = 2 shape = tuple([3] * order) return np.asarray(v).reshape(shape) - @validator( + @field_validator( "return_gradient", "return_hessian", "scf_total_gradient", "scf_total_hessian", ) - def _validate_derivs(cls, v, values, field): + @classmethod + def _validate_derivs(cls, v, info): if v is None: return v - nat = values.get("calcinfo_natom", None) + nat = info.data.get("calcinfo_natom", None) if nat is None: raise ValueError(f"Please also set ``calcinfo_natom``!") - if field.name.endswith("_gradient"): + if info.field_name.endswith("_gradient"): shape = (nat, 3) - elif field.name.endswith("_hessian"): + elif info.field_name.endswith("_hessian"): shape = (3 * nat, 3 * nat) try: @@ -303,11 +291,12 @@ def dict(self, *args, **kwargs): # Sep 2021: commenting below for now to allow recomposing AtomicResult.properties for qcdb. # This will break QCFractal tests for now, but future qcf will be ok with it. # kwargs["encoding"] = "json" - return super().dict(*args, **kwargs) + return super().model_dump(*args, **kwargs) class WavefunctionProperties(ProtoModel): - r"""Wavefunction properties resulting from a computation. Matrix quantities are stored in column-major order. Presence and contents configurable by protocol.""" + r"""Wavefunction properties resulting from a computation. + Matrix quantities are stored in column-major order. Presence and contents configurable by protocol.""" # Class properties _return_results_names: Set[str] = { @@ -334,84 +323,156 @@ class WavefunctionProperties(ProtoModel): # Core Hamiltonian h_core_a: Optional[Array[float]] = Field( - None, description="Alpha-spin core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"] + None, + description="Alpha-spin core (one-electron) Hamiltonian in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) h_core_b: Optional[Array[float]] = Field( - None, description="Beta-spin core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"] + None, + description="Beta-spin core (one-electron) Hamiltonian in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) h_effective_a: Optional[Array[float]] = Field( - None, description="Alpha-spin effective core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"] + None, + description="Alpha-spin effective core (one-electron) Hamiltonian in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) h_effective_b: Optional[Array[float]] = Field( - None, description="Beta-spin effective core (one-electron) Hamiltonian in the AO basis", shape=["nao", "nao"] + None, + description="Beta-spin effective core (one-electron) Hamiltonian in the AO basis", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) # SCF Results scf_orbitals_a: Optional[Array[float]] = Field( - None, description="SCF alpha-spin orbitals in the AO basis.", shape=["nao", "nmo"] + None, + description="SCF alpha-spin orbitals in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nmo"], + }, ) scf_orbitals_b: Optional[Array[float]] = Field( - None, description="SCF beta-spin orbitals in the AO basis.", shape=["nao", "nmo"] + None, + description="SCF beta-spin orbitals in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nmo"], + }, ) scf_density_a: Optional[Array[float]] = Field( - None, description="SCF alpha-spin density matrix in the AO basis.", shape=["nao", "nao"] + None, + description="SCF alpha-spin density matrix in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) scf_density_b: Optional[Array[float]] = Field( - None, description="SCF beta-spin density matrix in the AO basis.", shape=["nao", "nao"] + None, + description="SCF beta-spin density matrix in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) scf_fock_a: Optional[Array[float]] = Field( - None, description="SCF alpha-spin Fock matrix in the AO basis.", shape=["nao", "nao"] + None, + description="SCF alpha-spin Fock matrix in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) scf_fock_b: Optional[Array[float]] = Field( - None, description="SCF beta-spin Fock matrix in the AO basis.", shape=["nao", "nao"] + None, + description="SCF beta-spin Fock matrix in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) scf_eigenvalues_a: Optional[Array[float]] = Field( - None, description="SCF alpha-spin orbital eigenvalues.", shape=["nmo"] + None, + description="SCF alpha-spin orbital eigenvalues.", + json_schema_extra={ + "shape": ["nmo"], + }, ) scf_eigenvalues_b: Optional[Array[float]] = Field( - None, description="SCF beta-spin orbital eigenvalues.", shape=["nmo"] + None, + description="SCF beta-spin orbital eigenvalues.", + json_schema_extra={ + "shape": ["nmo"], + }, ) scf_occupations_a: Optional[Array[float]] = Field( - None, description="SCF alpha-spin orbital occupations.", shape=["nmo"] + None, + description="SCF alpha-spin orbital occupations.", + json_schema_extra={ + "shape": ["nmo"], + }, ) scf_occupations_b: Optional[Array[float]] = Field( - None, description="SCF beta-spin orbital occupations.", shape=["nmo"] + None, + description="SCF beta-spin orbital occupations.", + json_schema_extra={ + "shape": ["nmo"], + }, ) # BELOW from qcsk scf_coulomb_a: Optional[Array[float]] = Field( - None, description="SCF alpha-spin Coulomb matrix in the AO basis.", shape=["nao", "nao"] + None, + description="SCF alpha-spin Coulomb matrix in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) scf_coulomb_b: Optional[Array[float]] = Field( - None, description="SCF beta-spin Coulomb matrix in the AO basis.", shape=["nao", "nao"] + None, + description="SCF beta-spin Coulomb matrix in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) scf_exchange_a: Optional[Array[float]] = Field( - None, description="SCF alpha-spin exchange matrix in the AO basis.", shape=["nao", "nao"] + None, + description="SCF alpha-spin exchange matrix in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) scf_exchange_b: Optional[Array[float]] = Field( - None, description="SCF beta-spin exchange matrix in the AO basis.", shape=["nao", "nao"] + None, + description="SCF beta-spin exchange matrix in the AO basis.", + json_schema_extra={ + "shape": ["nao", "nao"], + }, ) # Localized-orbital SCF wavefunction quantities localized_orbitals_a: Optional[Array[float]] = Field( None, description="Localized alpha-spin orbitals in the AO basis. All nmo orbitals are included, even if only a subset were localized.", - shape=["nao", "nmo"], + json_schema_extra={"shape": ["nao", "nmo"]}, ) localized_orbitals_b: Optional[Array[float]] = Field( None, description="Localized beta-spin orbitals in the AO basis. All nmo orbitals are included, even if only a subset were localized.", - shape=["nao", "nmo"], + json_schema_extra={"shape": ["nao", "nmo"]}, ) localized_fock_a: Optional[Array[float]] = Field( None, description="Alpha-spin Fock matrix in the localized molecular orbital basis. All nmo orbitals are included, even if only a subset were localized.", - shape=["nmo", "nmo"], + json_schema_extra={"shape": ["nmo", "nmo"]}, ) localized_fock_b: Optional[Array[float]] = Field( None, description="Beta-spin Fock matrix in the localized molecular orbital basis. All nmo orbitals are included, even if only a subset were localized.", - shape=["nmo", "nmo"], + json_schema_extra={"shape": ["nmo", "nmo"]}, ) # ABOVE from qcsk @@ -435,20 +496,21 @@ class WavefunctionProperties(ProtoModel): None, description="Index to the beta-spin orbital occupations of the primary return." ) - class Config(ProtoModel.Config): - force_skip_defaults = True + model_config = ProtoModel._merge_config_with(force_skip_defaults=True) - @validator("scf_eigenvalues_a", "scf_eigenvalues_b", "scf_occupations_a", "scf_occupations_b") - def _assert1d(cls, v, values): + @field_validator("scf_eigenvalues_a", "scf_eigenvalues_b", "scf_occupations_a", "scf_occupations_b") + @classmethod + def _assert1d(cls, v): try: v = v.reshape(-1) except (ValueError, AttributeError): raise ValueError("Vector must be castable to shape (-1, )!") return v - @validator("scf_orbitals_a", "scf_orbitals_b") - def _assert2d_nao_x(cls, v, values): - bas = values.get("basis", None) + @field_validator("scf_orbitals_a", "scf_orbitals_b") + @classmethod + def _assert2d_nao_x(cls, v, info): + bas = info.data.get("basis", None) # Do not raise multiple errors if bas is None: @@ -460,7 +522,7 @@ def _assert2d_nao_x(cls, v, values): raise ValueError("Matrix must be castable to shape (nbf, -1)!") return v - @validator( + @field_validator( "h_core_a", "h_core_b", "h_effective_a", @@ -471,8 +533,9 @@ def _assert2d_nao_x(cls, v, values): "scf_fock_a", "scf_fock_b", ) - def _assert2d(cls, v, values): - bas = values.get("basis", None) + @classmethod + def _assert2d(cls, v, info): + bas = info.data.get("basis", None) # Do not raise multiple errors if bas is None: @@ -484,7 +547,7 @@ def _assert2d(cls, v, values): raise ValueError("Matrix must be castable to shape (nbf, nbf)!") return v - @validator( + @field_validator( "orbitals_a", "orbitals_b", "density_a", @@ -496,8 +559,9 @@ def _assert2d(cls, v, values): "occupations_a", "occupations_b", ) - def _assert_exists(cls, v, values): - if values.get(v, None) is None: + @classmethod + def _assert_exists(cls, v, info): + if info.data.get(v, None) is None: raise ValueError(f"Return quantity {v} does not exist in the values.") return v @@ -557,18 +621,21 @@ class AtomicResultProtocols(ProtoModel): description="Policies for keeping processed files from the computation", ) - class Config: - force_skip_defaults = True + model_config = ExtendedConfigDict(force_skip_defaults=True) ### Primary models +def atomic_input_json_schema_extra(schema, model): + schema["$schema"] = qcschema_draft + + class AtomicInput(ProtoModel): r"""The MolSSI Quantum Chemistry Schema""" id: Optional[str] = Field(None, description="The optional ID for the computation.") - schema_name: constr(strip_whitespace=True, regex="^(qc_?schema_input)$") = Field( # type: ignore + schema_name: constr(strip_whitespace=True, pattern="^(qc_?schema_input)$") = Field( # type: ignore qcschema_input_default, description=( f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_input_default}." @@ -591,17 +658,17 @@ class AtomicInput(ProtoModel): ) provenance: Provenance = Field( - default_factory=partial(provenance_stamp, __name__), description=str(Provenance.__doc__) + default_factory=partial(provenance_stamp, __name__), + description=str(Provenance.__doc__), + validate_default=True, # Cast inputs to ) - class Config(ProtoModel.Config): - def schema_extra(schema, model): - schema["$schema"] = qcschema_draft + model_config = ProtoModel._merge_config_with(json_schema_extra=atomic_input_json_schema_extra) def __repr_args__(self) -> "ReprArgs": return [ ("driver", self.driver.value), - ("model", self.model.dict()), + ("model", self.model.model_dump()), ("molecule_hash", self.molecule.get_hash()[:7]), ] @@ -609,7 +676,7 @@ def __repr_args__(self) -> "ReprArgs": class AtomicResult(AtomicInput): r"""Results from a CMS program execution.""" - schema_name: constr(strip_whitespace=True, regex="^(qc_?schema_output)$") = Field( # type: ignore + schema_name: constr(strip_whitespace=True, pattern="^(qc_?schema_output)$") = Field( # type: ignore qcschema_output_default, description=( f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_output_default}." @@ -634,7 +701,8 @@ class AtomicResult(AtomicInput): error: Optional[ComputeError] = Field(None, description=str(ComputeError.__doc__)) provenance: Provenance = Field(..., description=str(Provenance.__doc__)) - @validator("schema_name", pre=True) + @field_validator("schema_name", mode="before") + @classmethod def _input_to_output(cls, v): r"""If qcschema_input is passed in, cast it to output, otherwise no""" if v.lower().strip() in [qcschema_input_default, qcschema_output_default]: @@ -644,31 +712,33 @@ def _input_to_output(cls, v): "which will be converted to {0}".format(qcschema_output_default, qcschema_input_default) ) - @validator("return_result") - def _validate_return_result(cls, v, values): - if values["driver"] == "gradient": + @field_validator("return_result") + @classmethod + def _validate_return_result(cls, v, info): + if info.data["driver"] == "gradient": v = np.asarray(v).reshape(-1, 3) - elif values["driver"] == "hessian": + elif info.data["driver"] == "hessian": v = np.asarray(v) nsq = int(v.size**0.5) v.shape = (nsq, nsq) return v - @validator("wavefunction", pre=True) - def _wavefunction_protocol(cls, value, values): + @field_validator("wavefunction", mode="before") + @classmethod + def _wavefunction_protocol(cls, value, info): # We are pre, gotta do extra checks if value is None: return value elif isinstance(value, dict): wfn = value.copy() elif isinstance(value, WavefunctionProperties): - wfn = value.dict() + wfn = value.model_dump() else: raise ValueError("wavefunction must be None, a dict, or a WavefunctionProperties object.") # Do not propagate validation errors - if "protocols" not in values: + if "protocols" not in info.data: raise ValueError("Protocols was not properly formed.") # Handle restricted @@ -682,7 +752,7 @@ def _wavefunction_protocol(cls, value, values): wfn.pop(k) # Handle protocols - wfnp = values["protocols"].wavefunction + wfnp = info.data["protocols"].wavefunction return_keep = None if wfnp == "all": pass @@ -725,13 +795,14 @@ def _wavefunction_protocol(cls, value, values): else: return wfn - @validator("stdout") - def _stdout_protocol(cls, value, values): + @field_validator("stdout") + @classmethod + def _stdout_protocol(cls, value, info): # Do not propagate validation errors - if "protocols" not in values: + if "protocols" not in info.data: raise ValueError("Protocols was not properly formed.") - outp = values["protocols"].stdout + outp = info.data["protocols"].stdout if outp is True: return value elif outp is False: @@ -739,9 +810,10 @@ def _stdout_protocol(cls, value, values): else: raise ValueError(f"Protocol `stdout:{outp}` is not understood") - @validator("native_files") - def _native_file_protocol(cls, value, values): - ancp = values["protocols"].native_files + @field_validator("native_files") + @classmethod + def _native_file_protocol(cls, value, info): + ancp = info.data["protocols"].native_files if ancp == "all": return value elif ancp == "none": diff --git a/qcelemental/models/types.py b/qcelemental/models/types.py index 7840482a..942203a8 100644 --- a/qcelemental/models/types.py +++ b/qcelemental/models/types.py @@ -1,25 +1,65 @@ +import sys +import warnings from typing import Any, Dict import numpy as np +from numpy.typing import NDArray +from pydantic import SerializerFunctionWrapHandler +from pydantic_core import core_schema +from typing_extensions import Annotated, get_args -class TypedArray(np.ndarray): - @classmethod - def __get_validators__(cls): - yield cls.validate - - @classmethod - def validate(cls, v): +def generate_caster(dtype): + def cast_to_np(v): try: - v = np.asarray(v, dtype=cls._dtype) + v = np.asarray(v, dtype=dtype) except ValueError: - raise ValueError("Could not cast {} to NumPy Array!".format(v)) - + raise ValueError(f"Could not cast {v} to NumPy Array!") return v + return cast_to_np + + +def listandstr_ndarray(v: Any, nxt: SerializerFunctionWrapHandler) -> str: + """Special helper to list NumPy arrays before serializing""" + if isinstance(v, np.ndarray): + return f"{nxt(v.tolist())}" + return f"{nxt(v)}" + + +def flatten_ndarray(v: Any, nxt: SerializerFunctionWrapHandler) -> np.ndarray: + """Special helper to first flatten NumPy arrays before serializing with json""" + if isinstance(v, np.ndarray): + return nxt(v.flatten()) + return nxt(v) + + +class ValidatableArrayAnnotation: + @classmethod + def __get_pydantic_core_schema__(cls, source, _): + """ + We return a pydantic_core.CoreSchema that behaves in the following ways: + + * Data will be cast to ndarrays with the correct dtype + * `ndarrays` instances will be parsed as `ndarrays` and cast to the correct dtype + """ + shape, dtype_alias = get_args(source) + dtype = get_args(dtype_alias)[0] + validator = generate_caster(dtype) + # When using JSON, flatten and to list it + serializer = core_schema.plain_serializer_function_ser_schema(lambda v: v.flatten().tolist(), when_used="json") + # Affix dtype metadata to the schema we'll use in serialization + schema = core_schema.no_info_plain_validator_function( + validator, serialization=serializer, metadata={"dtype": dtype} + ) + return schema + @classmethod - def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None: - dt = cls._dtype + def __get_pydantic_json_schema__(cls, _core_schema, handler) -> Dict[str, Any]: + # Old __modify_schema__ method from v1 setup in v2 and customized for our purposes + # Get the dtype metadata from our original schema + dt = _core_schema["metadata"]["dtype"] + output_schema = {} if dt is int or np.issubdtype(dt, np.integer): items = {"type": "number", "multipleOf": 1.0} elif dt is float or np.issubdtype(dt, np.floating): @@ -28,13 +68,29 @@ def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None: items = {"type": "string"} elif dt is bool or np.issubdtype(dt, np.bool_): items = {"type": "boolean"} - field_schema.update(type="array", items=items) + else: + items = {"type": "Unknown"} + warnings.warn(f"Unknown dtype to handle type [{dt}] for array. May result in weird serialization or typing") + output_schema.update(type="array", items=items) + return output_schema -class ArrayMeta(type): - def __getitem__(self, dtype): - return type("Array", (TypedArray,), {"_dtype": dtype}) +if sys.version_info < (3, 9): + # LNN: Ooooooooohhhh boy... + # Source information: https://github.com/beartype/beartype/issues/42 + # Annotated checks against instances of _GenericAlias to see if you can support Data[type_info], e.g. NDArray[int] + # (in Python types since 3.9, and kinda of as _GenricAlias before that) + # Prior to 3.9, Numpy implemented their own version of _GenericAlias which isn't Python _GenericAlias, so the types + # are not considered "Generic" when Annotated[NDArray, Metadata][type_info] does its thing. + # So. This code block does a TON of heavy lifting to re-cast the NDArray type with _GenericAlias from python typing. + # I've tried to reuse as much data from NDArray as I possibly can and still use np.ndarray (which is not + # np.typing.NDArray) to still correctly type hint np.ndarrays. + # See (pre 3.9) numpy/typing/_generic_alias.py + from typing import _GenericAlias + _shape_info, _dtype_info = NDArray.__args__ + _generic_dtype = _GenericAlias(_dtype_info, _dtype_info.__args__) + _generic_ndarr = _GenericAlias(np.ndarray, (_shape_info, _generic_dtype)) + NDArray = _generic_ndarr -class Array(np.ndarray, metaclass=ArrayMeta): - pass +Array = Annotated[NDArray, ValidatableArrayAnnotation] From ded5edb06b054b7dd98b234daf83c504a7f1a7f9 Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Thu, 5 Sep 2024 18:41:52 -0400 Subject: [PATCH 3/4] move Levi's pyd v2 models into qcsk v2 directory --- qcelemental/models/{ => v2}/align.py | 0 qcelemental/models/{ => v2}/basemodels.py | 0 qcelemental/models/{ => v2}/basis.py | 0 qcelemental/models/{ => v2}/common_models.py | 0 qcelemental/models/{ => v2}/molecule.py | 0 qcelemental/models/{ => v2}/procedures.py | 0 qcelemental/models/{ => v2}/results.py | 0 qcelemental/models/{ => v2}/types.py | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename qcelemental/models/{ => v2}/align.py (100%) rename qcelemental/models/{ => v2}/basemodels.py (100%) rename qcelemental/models/{ => v2}/basis.py (100%) rename qcelemental/models/{ => v2}/common_models.py (100%) rename qcelemental/models/{ => v2}/molecule.py (100%) rename qcelemental/models/{ => v2}/procedures.py (100%) rename qcelemental/models/{ => v2}/results.py (100%) rename qcelemental/models/{ => v2}/types.py (100%) diff --git a/qcelemental/models/align.py b/qcelemental/models/v2/align.py similarity index 100% rename from qcelemental/models/align.py rename to qcelemental/models/v2/align.py diff --git a/qcelemental/models/basemodels.py b/qcelemental/models/v2/basemodels.py similarity index 100% rename from qcelemental/models/basemodels.py rename to qcelemental/models/v2/basemodels.py diff --git a/qcelemental/models/basis.py b/qcelemental/models/v2/basis.py similarity index 100% rename from qcelemental/models/basis.py rename to qcelemental/models/v2/basis.py diff --git a/qcelemental/models/common_models.py b/qcelemental/models/v2/common_models.py similarity index 100% rename from qcelemental/models/common_models.py rename to qcelemental/models/v2/common_models.py diff --git a/qcelemental/models/molecule.py b/qcelemental/models/v2/molecule.py similarity index 100% rename from qcelemental/models/molecule.py rename to qcelemental/models/v2/molecule.py diff --git a/qcelemental/models/procedures.py b/qcelemental/models/v2/procedures.py similarity index 100% rename from qcelemental/models/procedures.py rename to qcelemental/models/v2/procedures.py diff --git a/qcelemental/models/results.py b/qcelemental/models/v2/results.py similarity index 100% rename from qcelemental/models/results.py rename to qcelemental/models/v2/results.py diff --git a/qcelemental/models/types.py b/qcelemental/models/v2/types.py similarity index 100% rename from qcelemental/models/types.py rename to qcelemental/models/v2/types.py From d515a5f24a5d98da4b6e4b51d0feb6e55f374f0f Mon Sep 17 00:00:00 2001 From: "Lori A. Burns" Date: Fri, 6 Sep 2024 14:13:11 -0400 Subject: [PATCH 4/4] fix import, tests, etc --- .github/workflows/CI.yaml | 22 ++---- docs/api.rst | 3 +- docs/changelog.rst | 5 ++ docs/conf.py | 13 ++-- docs/model_common.rst | 16 ++--- docs/model_molecule.rst | 2 +- docs/model_result.rst | 10 +-- pyproject.toml | 60 +++++++++++++---- qcelemental/info/cpu_info.py | 5 +- qcelemental/info/dft_info.py | 5 +- qcelemental/models/__init__.py | 9 +++ qcelemental/models/v1/__init__.py | 8 --- qcelemental/models/v1/align.py | 6 +- qcelemental/models/v1/basemodels.py | 9 +-- qcelemental/models/v1/basis.py | 5 +- qcelemental/models/v1/common_models.py | 11 +-- qcelemental/models/v1/molecule.py | 11 +-- qcelemental/models/v1/procedures.py | 10 +-- qcelemental/models/v1/results.py | 11 +-- qcelemental/models/v2/__init__.py | 19 ++++++ qcelemental/models/v2/basemodels.py | 10 +-- qcelemental/models/v2/basis.py | 2 +- qcelemental/models/v2/molecule.py | 6 ++ qcelemental/models/v2/procedures.py | 25 ++----- qcelemental/models/v2/results.py | 89 ++++++------------------- qcelemental/tests/addons.py | 9 +-- qcelemental/tests/test_model_results.py | 52 +++++++++++---- qcelemental/tests/test_molutil.py | 11 +-- qcelemental/tests/test_utils.py | 8 +-- qcelemental/util/autodocs.py | 9 +-- 30 files changed, 210 insertions(+), 251 deletions(-) create mode 100644 qcelemental/models/v2/__init__.py diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 55f641f4..6a258a01 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -17,12 +17,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.9", "3.11", "3.12"] - pydantic-version: ["1", "2"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + pydantic-version: ["2"] runs-on: [ubuntu-latest, windows-latest] - exclude: - - runs-on: windows-latest - pydantic-version: "1" name: "🐍 ${{ matrix.python-version }} • ${{ matrix.pydantic-version }} • ${{ matrix.runs-on }}" runs-on: ${{ matrix.runs-on }} @@ -35,15 +32,6 @@ jobs: uses: actions/checkout@v3 - name: Install poetry run: pip install poetry - # Force pydantic 1.0 by modifying poetry dep "pydantic" string with in-place sed - # -i is zero-length extension which does effectively in-place sub. - # Can't do -i '' because Ubuntu sed is -i{suffix} whereas OSX sed is -i {suffix}... ugh - # ^ start of line, pydantic, optional spaces and > sign, capture the version, replace with ^{version} - # Should avoid also replacing the autodoc-pydantic spec later on. - - name: Sed replace pydantic on repo - if: matrix.pydantic-version == '1' - run: | - sed -i 's/^pydantic *= *">*= *\([0-9.]*\)"/pydantic = "^\1"/' pyproject.toml - name: Install repo with poetry (full deps) if: matrix.python-version != '3.9' run: poetry install --no-interaction --no-ansi --all-extras @@ -74,12 +62,10 @@ jobs: name: Set up Python with: python-version: "3.10" + - name: Setup Graphviz + uses: ts-graphviz/setup-graphviz@v2 - name: Install poetry run: pip install poetry - # Force pydantic 1.0 by modifying poetry dep "pydantic" string with in-place sed (see above for details) - - name: Sed replace pydantic on repo - run: | - sed -i 's/^pydantic *= *">*= *\([0-9.]*\)"/pydantic = "^\1"/' pyproject.toml - name: Install repo run: poetry install --no-interaction --no-ansi - name: Build Documentation diff --git a/docs/api.rst b/docs/api.rst index 09bd0bd2..27e23c73 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -12,7 +12,6 @@ QCElemental API .. automodapi:: qcelemental.testing :skip:tnm -.. automodapi:: qcelemental.models - :skip:Optimization +.. automodapi:: qcelemental.models.v2 :skip:qcschema_models diff --git a/docs/changelog.rst b/docs/changelog.rst index 3da28c88..1148f544 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -26,13 +26,18 @@ Changelog Breaking Changes ++++++++++++++++ * The very old model names `ResultInput`, `Result`, `ResultProperties`, `Optimization` deprecated in 2019 are now only available through `qcelelemental.models.v1` +* ``models.v2`` do not support AutoDoc. The AutoDoc routines have been left at pydantic v1 syntax. Use autodoc-pydantic for Sphinx instead. New Features ++++++++++++ * Downstream code should ``from qcelemental.models.v1 import Molecule, AtomicResult`` etc. to assure medium-term availability of existing models. +* New pydantic v2 models available as ``from qcelemental.models.v2 import Molecule, AtomicResult`` etc. Enhancements ++++++++++++ +* The ``models.v2`` have had their `schema_version` bumped for ``BasisSet``, ``AtomicInput``, ``OptimizationInput`` (implicit for ``AtomicResult`` and ``OptimizationResult``), ``TorsionDriveInput`` , and ``TorsionDriveResult``. +* The ``models.v2`` ``AtomicResultProperties`` has been given a ``schema_name`` and ``schema_version`` (2) for the first time. +* Note that ``models.v2`` ``QCInputSpecification`` and ``OptimizationSpecification`` have *not* had schema_version bumped. Bug Fixes +++++++++ diff --git a/docs/conf.py b/docs/conf.py index d420f73a..9633b9a3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -88,7 +88,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -115,7 +115,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] +html_static_path = [] # "_static"] # Custom sidebar templates, must be a dictionary that maps document names # to template names. @@ -187,8 +187,8 @@ # -- Extension configuration ------------------------------------------------- extlinks = { - "issue": ("https://github.com/MolSSI/QCElemental/issues/%s", "GH#"), - "pr": ("https://github.com/MolSSI/QCElemental/pull/%s", "GH#"), + "issue": ("https://github.com/MolSSI/QCElemental/issues/%s", "GH#%s"), + "pr": ("https://github.com/MolSSI/QCElemental/pull/%s", "GH#%s"), } @@ -200,8 +200,9 @@ "numpy": ("https://numpy.org/doc/stable/", None), "scipy": ("https://docs.scipy.org/doc/scipy/", None), "matplotlib": ("https://matplotlib.org/stable/", None), - "qcengine": ("http://docs.qcarchive.molssi.org/projects/QCEngine/en/latest/", None), - "qcfractal": ("http://docs.qcarchive.molssi.org/projects/QCFractal/en/latest/", None), + "qcengine": ("https://molssi.github.io/QCEngine/", None), + "qcfractal": ("https://molssi.github.io/QCFractal/", None), + "nglview": ("https://nglviewer.org/nglview/release/v2.7.7", None), } # -- Options for todo extension ---------------------------------------------- diff --git a/docs/model_common.rst b/docs/model_common.rst index d02946ca..6de905e9 100644 --- a/docs/model_common.rst +++ b/docs/model_common.rst @@ -6,40 +6,40 @@ Common Models used throughout the QCArchive ecosystem. BasisSet -------- -.. autopydantic_model:: qcelemental.models.BasisSet +.. autopydantic_model:: qcelemental.models.v2.BasisSet :noindex: -.. autopydantic_model:: qcelemental.models.basis.BasisCenter +.. autopydantic_model:: qcelemental.models.v2.basis.BasisCenter :noindex: -.. autopydantic_model:: qcelemental.models.basis.ElectronShell +.. autopydantic_model:: qcelemental.models.v2.basis.ElectronShell :noindex: -.. autopydantic_model:: qcelemental.models.basis.ECPPotential +.. autopydantic_model:: qcelemental.models.v2.basis.ECPPotential :noindex: ComputeError ------------ -.. autopydantic_model:: qcelemental.models.ComputeError +.. autopydantic_model:: qcelemental.models.v2.ComputeError :noindex: FailedOperation --------------- -.. autopydantic_model:: qcelemental.models.FailedOperation +.. autopydantic_model:: qcelemental.models.v2.FailedOperation :noindex: Provenance ---------- -.. autopydantic_model:: qcelemental.models.Provenance +.. autopydantic_model:: qcelemental.models.v2.Provenance :noindex: DriverEnum ---------- -.. autoclass:: qcelemental.models.DriverEnum +.. autoclass:: qcelemental.models.v2.DriverEnum :noindex: :members: :undoc-members: diff --git a/docs/model_molecule.rst b/docs/model_molecule.rst index 0b76e76a..06125184 100644 --- a/docs/model_molecule.rst +++ b/docs/model_molecule.rst @@ -127,6 +127,6 @@ Obtaining fragments with ghost atoms is also supported: API --- -.. autopydantic_model:: qcelemental.models.Molecule +.. autopydantic_model:: qcelemental.models.v2.Molecule :noindex: diff --git a/docs/model_result.rst b/docs/model_result.rst index ea2a8284..58c81373 100644 --- a/docs/model_result.rst +++ b/docs/model_result.rst @@ -8,25 +8,25 @@ A Python implementation of the `MolSSI QCSchema AtomicInput ----------- -.. autopydantic_model:: qcelemental.models.AtomicInput +.. autopydantic_model:: qcelemental.models.v2.AtomicInput :noindex: AtomicResult ------------ -.. autopydantic_model:: qcelemental.models.AtomicResult +.. autopydantic_model:: qcelemental.models.v2.AtomicResult :noindex: API --- -.. autopydantic_model:: qcelemental.models.results.AtomicResultProtocols +.. autopydantic_model:: qcelemental.models.v2.results.AtomicResultProtocols :noindex: -.. autopydantic_model:: qcelemental.models.results.AtomicResultProperties +.. autopydantic_model:: qcelemental.models.v2.results.AtomicResultProperties :noindex: -.. autopydantic_model:: qcelemental.models.results.WavefunctionProperties +.. autopydantic_model:: qcelemental.models.v2.results.WavefunctionProperties :noindex: diff --git a/pyproject.toml b/pyproject.toml index 5504e066..d733260b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,8 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Framework :: Pydantic", + "Framework :: Pydantic :: 2", ] [tool.poetry.dependencies] @@ -36,13 +38,13 @@ packaging = [ { version = ">=24.1", python = ">=3.8" }, ] # qcel is compatible with most any numpy, v1 or v2, but numpy v2 only works with pint >=0.24, which is only available for py >=3.10 -python = "^3.7" +python = "^3.7.1" pint = [ { version = ">=0.10", python = ">=3.7,<3.9" }, { version = ">=0.23", python = ">=3.9,<3.10" }, { version = ">=0.24", python = ">=3.10,<3.13" }, ] -pydantic = ">=1.8.2" +pydantic = ">=2.0" nglview = { version = "^3.0.3", optional = true } ipykernel = { version = "<6.0.0", optional = true } importlib-metadata = { version = ">=4.8", python = "<3.8" } @@ -58,24 +60,56 @@ viz = ["nglview", "ipykernel"] align = ["networkx", "scipy"] test = ["pytest"] +# Note that all the versions below are a farce for poetry's benefit. +# One needs a fairly recent sphinx, pydantic, and autodoc-pydantic for a +# successful docs build, and that likely requires py 3.9." + [tool.poetry.group.dev.dependencies] black = ">=22.1.0,<23.0a0" mypy = "^1.1.1" isort = "5.11.5" -flake8 = "<6.0.0" -pre-commit = "<3.2.0" +flake8 = [ + { version = "<6.0.0", python = "<3.8.1" }, + { version = "6.0.0", python = ">=3.8.1,<4.0.0" } +] +pre-commit = [ + { version = "<3.2.0", python = "<3.9" }, + { version = "^3.8.0", python = ">=3.9,<4.0.0" } +] pytest-cov = "^4.0.0" autoflake = "^2.0.2" -jsonschema = "^4.17.3" -msgpack = "^1.0.5" -numpydoc = "^1.5.0" -docutils = "<0.19" -sphinx = "<6.0.0" +jsonschema = { version = "^4.23.0", python = ">=3.8,<4.0.0" } +msgpack = { version = "^1.0.8", python = ">=3.8,<4.0.0" } +numpydoc = [ + { version = "^1.5.0", python = "<3.9" }, + { version = "^1.8.0", python = ">=3.9,<4.0.0" } +] +docutils = [ + { version = "<0.19", python = "<3.9" }, + { version = "0.20.1", python = ">=3.9,<4.0.0" } +] +sphinx = [ + { version = "<6.0.0", python = "<3.9" }, + { version = "^7.0.0", python = ">=3.9,<4.0.0" } +] sphinxcontrib-napoleon = "^0.7" -sphinx-rtd-theme = "^1.2.0" -autodoc-pydantic = "^1.8.0" -sphinx-automodapi = "^0.15.0" -sphinx-autodoc-typehints = "^1.22" +sphinx-rtd-theme = [ + { version = "^1.2.0", python = "<3.9" }, + { version = "^2.0.0", python = ">=3.9,<4.0.0" } +] +autodoc-pydantic = [ + { version = "^2.0.0", python = "<3.8" }, + { version = "^2.1.0", python = ">=3.8,<4.0" } +] +sphinx-automodapi = [ + { version = "^0.15.0", python = "<3.8" }, + { version = "^0.17.0", python = ">=3.8,<4.0.0" } +] +sphinx-autodoc-typehints = [ + { version = "^1.22", python = "<3.10" }, + { version = "^2.3", python = ">=3.10,<4.0.0" } +] +graphviz = "^0.20.0" # insufficient on pypi as also need `dot`. python-graphviz sufficient in conda. [tool.black] line-length = 120 diff --git a/qcelemental/info/cpu_info.py b/qcelemental/info/cpu_info.py index 55f109de..4fe35689 100644 --- a/qcelemental/info/cpu_info.py +++ b/qcelemental/info/cpu_info.py @@ -8,10 +8,7 @@ from functools import lru_cache from typing import List, Optional -try: - from pydantic.v1 import Field -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field +from pydantic.v1 import Field from ..models import ProtoModel diff --git a/qcelemental/info/dft_info.py b/qcelemental/info/dft_info.py index ce82c763..073e40d3 100644 --- a/qcelemental/info/dft_info.py +++ b/qcelemental/info/dft_info.py @@ -4,10 +4,7 @@ from typing import Dict -try: - from pydantic.v1 import Field -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field +from pydantic.v1 import Field from ..models import ProtoModel diff --git a/qcelemental/models/__init__.py b/qcelemental/models/__init__.py index e7e2ac4b..829a3482 100644 --- a/qcelemental/models/__init__.py +++ b/qcelemental/models/__init__.py @@ -1 +1,10 @@ +try: + import pydantic +except ImportError: # pragma: no cover + raise ImportError( + "Python module pydantic not found. Solve by installing it: " + "`conda install pydantic -c conda-forge` or `pip install pydantic`" + ) + +from . import v1, v2 from .v1 import * diff --git a/qcelemental/models/v1/__init__.py b/qcelemental/models/v1/__init__.py index c17f2cdc..e61f458b 100644 --- a/qcelemental/models/v1/__init__.py +++ b/qcelemental/models/v1/__init__.py @@ -1,11 +1,3 @@ -try: - import pydantic -except ImportError: # pragma: no cover - raise ImportError( - "Python module pydantic not found. Solve by installing it: " - "`conda install pydantic -c conda-forge` or `pip install pydantic`" - ) - from . import types from .align import AlignmentMill from .basemodels import AutodocBaseSettings # remove when QCFractal merges `next` diff --git a/qcelemental/models/v1/align.py b/qcelemental/models/v1/align.py index ca09504f..2a6c0a23 100644 --- a/qcelemental/models/v1/align.py +++ b/qcelemental/models/v1/align.py @@ -1,11 +1,7 @@ from typing import Optional import numpy as np - -try: - from pydantic.v1 import Field, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field, validator +from pydantic.v1 import Field, validator from ...util import blockwise_contract, blockwise_expand from .basemodels import ProtoModel diff --git a/qcelemental/models/v1/basemodels.py b/qcelemental/models/v1/basemodels.py index 2fecef26..229b1588 100644 --- a/qcelemental/models/v1/basemodels.py +++ b/qcelemental/models/v1/basemodels.py @@ -3,13 +3,8 @@ from typing import Any, Dict, Optional, Set, Union import numpy as np - -try: - from pydantic.v1 import BaseSettings # remove when QCFractal merges `next` - from pydantic.v1 import BaseModel -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import BaseSettings # remove when QCFractal merges `next` - from pydantic import BaseModel +from pydantic.v1 import BaseSettings # remove when QCFractal merges `next` +from pydantic.v1 import BaseModel from qcelemental.util import deserialize, serialize from qcelemental.util.autodocs import AutoPydanticDocGenerator # remove when QCFractal merges `next` diff --git a/qcelemental/models/v1/basis.py b/qcelemental/models/v1/basis.py index 2a4b2c88..c7d1c4b8 100644 --- a/qcelemental/models/v1/basis.py +++ b/qcelemental/models/v1/basis.py @@ -1,10 +1,7 @@ from enum import Enum from typing import Dict, List, Optional -try: - from pydantic.v1 import ConstrainedInt, Field, constr, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import ConstrainedInt, Field, constr, validator +from pydantic.v1 import ConstrainedInt, Field, constr, validator from ...exceptions import ValidationError from .basemodels import ProtoModel, qcschema_draft diff --git a/qcelemental/models/v1/common_models.py b/qcelemental/models/v1/common_models.py index f848449d..7f822798 100644 --- a/qcelemental/models/v1/common_models.py +++ b/qcelemental/models/v1/common_models.py @@ -2,20 +2,13 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Union import numpy as np - -try: - from pydantic.v1 import Field -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field +from pydantic.v1 import Field from .basemodels import ProtoModel, qcschema_draft from .basis import BasisSet if TYPE_CHECKING: - try: - from pydantic.v1.typing import ReprArgs - except ImportError: # Will also trap ModuleNotFoundError - from pydantic.typing import ReprArgs + from pydantic.v1.typing import ReprArgs # Encoders, to be deprecated diff --git a/qcelemental/models/v1/molecule.py b/qcelemental/models/v1/molecule.py index d2261f63..e533b832 100644 --- a/qcelemental/models/v1/molecule.py +++ b/qcelemental/models/v1/molecule.py @@ -10,11 +10,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast import numpy as np - -try: - from pydantic.v1 import ConstrainedFloat, ConstrainedInt, Field, constr, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import ConstrainedFloat, ConstrainedInt, Field, constr, validator +from pydantic.v1 import ConstrainedFloat, ConstrainedInt, Field, constr, validator # molparse imports separated b/c https://github.com/python/mypy/issues/7203 from ...molparse.from_arrays import from_arrays @@ -31,10 +27,7 @@ from .types import Array if TYPE_CHECKING: - try: - from pydantic.v1.typing import ReprArgs - except ImportError: # Will also trap ModuleNotFoundError - from pydantic.typing import ReprArgs + from pydantic.v1.typing import ReprArgs # Rounding quantities for hashing GEOMETRY_NOISE = 8 diff --git a/qcelemental/models/v1/procedures.py b/qcelemental/models/v1/procedures.py index 90f3c7cf..5a0ce95b 100644 --- a/qcelemental/models/v1/procedures.py +++ b/qcelemental/models/v1/procedures.py @@ -1,10 +1,7 @@ from enum import Enum from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple -try: - from pydantic.v1 import Field, conlist, constr, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field, conlist, constr, validator +from pydantic.v1 import Field, conlist, constr, validator from ...util import provenance_stamp from .basemodels import ProtoModel @@ -23,10 +20,7 @@ from .results import AtomicResult if TYPE_CHECKING: - try: - from pydantic.v1.typing import ReprArgs - except ImportError: # Will also trap ModuleNotFoundError - from pydantic.typing import ReprArgs + from pydantic.v1.typing import ReprArgs class TrajectoryProtocolEnum(str, Enum): diff --git a/qcelemental/models/v1/results.py b/qcelemental/models/v1/results.py index 44140729..ede7197a 100644 --- a/qcelemental/models/v1/results.py +++ b/qcelemental/models/v1/results.py @@ -3,11 +3,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Set, Union import numpy as np - -try: - from pydantic.v1 import Field, constr, validator -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import Field, constr, validator +from pydantic.v1 import Field, constr, validator from ...util import provenance_stamp from .basemodels import ProtoModel, qcschema_draft @@ -17,10 +13,7 @@ from .types import Array if TYPE_CHECKING: - try: - from pydantic.v1.typing import ReprArgs - except ImportError: # Will also trap ModuleNotFoundError - from pydantic.typing import ReprArgs + from pydantic.v1.typing import ReprArgs class AtomicResultProperties(ProtoModel): diff --git a/qcelemental/models/v2/__init__.py b/qcelemental/models/v2/__init__.py new file mode 100644 index 00000000..509cda9d --- /dev/null +++ b/qcelemental/models/v2/__init__.py @@ -0,0 +1,19 @@ +from . import types +from .align import AlignmentMill +from .basemodels import ProtoModel +from .basis import BasisSet +from .common_models import ComputeError, DriverEnum, FailedOperation, Provenance +from .molecule import Molecule +from .procedures import OptimizationInput, OptimizationResult +from .results import AtomicInput, AtomicResult, AtomicResultProperties + + +def qcschema_models(): + return [ + AtomicInput, + AtomicResult, + AtomicResultProperties, + BasisSet, + Molecule, + Provenance, + ] diff --git a/qcelemental/models/v2/basemodels.py b/qcelemental/models/v2/basemodels.py index c82109f3..e87079d2 100644 --- a/qcelemental/models/v2/basemodels.py +++ b/qcelemental/models/v2/basemodels.py @@ -5,10 +5,8 @@ import numpy as np from pydantic import BaseModel, ConfigDict, model_serializer -from pydantic_settings import BaseSettings # remove when QCFractal merges `next` from qcelemental.util import deserialize, serialize -from qcelemental.util.autodocs import AutoPydanticDocGenerator # remove when QCFractal merges `next` def _repr(self) -> str: @@ -37,6 +35,8 @@ class ExtendedConfigDict(ConfigDict, total=False): class ProtoModel(BaseModel): + """QCSchema extension of pydantic.BaseModel.""" + model_config = ExtendedConfigDict( frozen=True, extra="forbid", @@ -279,10 +279,4 @@ def _merge_config_with(cls, *args, **kwargs): return ExtendedConfigDict(**output_dict) -# remove when QCFractal merges `next` -class AutodocBaseSettings(BaseSettings): - def __init_subclass__(cls) -> None: - cls.__doc__ = AutoPydanticDocGenerator(cls, always_apply=True) - - qcschema_draft = "http://json-schema.org/draft-04/schema#" diff --git a/qcelemental/models/v2/basis.py b/qcelemental/models/v2/basis.py index ca9ad843..54ff278f 100644 --- a/qcelemental/models/v2/basis.py +++ b/qcelemental/models/v2/basis.py @@ -172,7 +172,7 @@ class BasisSet(ProtoModel): description=f"The QCSchema specification to which this model conforms. Explicitly fixed as qcschema_basis.", ) schema_version: int = Field( # type: ignore - 1, + 2, description="The version number of :attr:`~qcelemental.models.BasisSet.schema_name` " "to which this model conforms.", ) diff --git a/qcelemental/models/v2/molecule.py b/qcelemental/models/v2/molecule.py index e7c40403..9e721403 100644 --- a/qcelemental/models/v2/molecule.py +++ b/qcelemental/models/v2/molecule.py @@ -13,6 +13,12 @@ from pydantic import Field, constr, field_validator, model_serializer from typing_extensions import Annotated +try: + import nglview +except ModuleNotFoundError: + # import is purely for forward reference for docs-build. import is not required except for Molecule.show() + pass + # molparse imports separated b/c https://github.com/python/mypy/issues/7203 from ...molparse.from_arrays import from_arrays from ...molparse.from_schema import from_schema diff --git a/qcelemental/models/v2/procedures.py b/qcelemental/models/v2/procedures.py index 2b7ecb86..3e3a365f 100644 --- a/qcelemental/models/v2/procedures.py +++ b/qcelemental/models/v2/procedures.py @@ -65,12 +65,14 @@ class QCInputSpecification(ProtoModel): class OptimizationInput(ProtoModel): + """QCSchema input directive for geometry optimization.""" + id: Optional[str] = None hash_index: Optional[str] = None schema_name: constr( # type: ignore strip_whitespace=True, pattern=qcschema_optimization_input_default ) = qcschema_optimization_input_default - schema_version: int = 1 + schema_version: int = 2 keywords: Dict[str, Any] = Field({}, description="The optimization specific keywords to be used.") extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.") @@ -89,6 +91,8 @@ def __repr_args__(self) -> "ReprArgs": class OptimizationResult(OptimizationInput): + """QCSchema results model for geometry optimization.""" + schema_name: constr( # type: ignore strip_whitespace=True, pattern=qcschema_optimization_output_default ) = qcschema_optimization_output_default @@ -205,7 +209,7 @@ class TorsionDriveInput(ProtoModel): schema_name: constr( strip_whitespace=True, pattern=qcschema_torsion_drive_input_default ) = qcschema_torsion_drive_input_default # type: ignore - schema_version: int = 1 + schema_version: int = 2 keywords: TDKeywords = Field(..., description="The torsion drive specific keywords to be used.") extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.") @@ -239,7 +243,7 @@ class TorsionDriveResult(TorsionDriveInput): schema_name: constr( strip_whitespace=True, pattern=qcschema_torsion_drive_output_default ) = qcschema_torsion_drive_output_default # type: ignore - schema_version: int = 1 + schema_version: int = 2 final_energies: Dict[str, float] = Field( ..., description="The final energy at each angle of the TorsionDrive scan." @@ -261,18 +265,3 @@ class TorsionDriveResult(TorsionDriveInput): ) error: Optional[ComputeError] = Field(None, description=str(ComputeError.__doc__)) provenance: Provenance = Field(..., description=str(Provenance.__doc__)) - - -def Optimization(*args, **kwargs): - """QC Optimization Results Schema. - - .. deprecated:: 0.12 - Use :py:func:`qcelemental.models.OptimizationResult` instead. - - """ - from warnings import warn - - warn( - "Optimization has been renamed to OptimizationResult and will be removed as soon as v0.13.0", DeprecationWarning - ) - return OptimizationResult(*args, **kwargs) diff --git a/qcelemental/models/v2/results.py b/qcelemental/models/v2/results.py index ea0b6fcf..3c787ed1 100644 --- a/qcelemental/models/v2/results.py +++ b/qcelemental/models/v2/results.py @@ -2,6 +2,12 @@ from functools import partial from typing import TYPE_CHECKING, Any, Dict, Optional, Set, Union +try: + from typing import Literal +except ImportError: + # remove when minimum py38 + from typing_extensions import Literal + import numpy as np from pydantic import Field, constr, field_validator @@ -26,6 +32,17 @@ class AtomicResultProperties(ProtoModel): * nmo: number of molecular orbitals = :attr:`~qcelemental.models.AtomicResultProperties.calcinfo_nmo` """ + schema_name: Literal["qcschema_atomicproperties"] = Field( + "qcschema_atomicproperties", + description=( + f"The QCSchema specification this model conforms to. Explicitly fixed as qcschema_atomicproperties." + ), + ) + schema_version: int = Field( + 2, + description="The version number of :attr:`~qcelemental.models.AtomicResultProperties.schema_name` to which this model conforms.", + ) + # Calcinfo calcinfo_nbasis: Optional[int] = Field(None, description="The number of basis functions for the computation.") calcinfo_nmo: Optional[int] = Field(None, description="The number of molecular orbitals for the computation.") @@ -635,14 +652,14 @@ class AtomicInput(ProtoModel): r"""The MolSSI Quantum Chemistry Schema""" id: Optional[str] = Field(None, description="The optional ID for the computation.") - schema_name: constr(strip_whitespace=True, pattern="^(qc_?schema_input)$") = Field( # type: ignore + schema_name: constr(strip_whitespace=True, pattern=r"^(qc\_?schema_input)$") = Field( # type: ignore qcschema_input_default, description=( f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_input_default}." ), ) schema_version: int = Field( - 1, + 2, description="The version number of :attr:`~qcelemental.models.AtomicInput.schema_name` to which this model conforms.", ) @@ -676,7 +693,7 @@ def __repr_args__(self) -> "ReprArgs": class AtomicResult(AtomicInput): r"""Results from a CMS program execution.""" - schema_name: constr(strip_whitespace=True, pattern="^(qc_?schema_output)$") = Field( # type: ignore + schema_name: constr(strip_whitespace=True, pattern=r"^(qc\_?schema_output)$") = Field( # type: ignore qcschema_output_default, description=( f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_output_default}." @@ -831,69 +848,3 @@ def _native_file_protocol(cls, value, info): for rk in return_keep: ret[rk] = files.get(rk, None) return ret - - -class ResultProperties(AtomicResultProperties): - """QC Result Properties Schema. - - .. deprecated:: 0.12 - Use :py:func:`qcelemental.models.AtomicResultProperties` instead. - - """ - - def __init__(self, *args, **kwargs): - from warnings import warn - - warn( - "ResultProperties has been renamed to AtomicResultProperties and will be removed as soon as v0.13.0", - DeprecationWarning, - ) - super().__init__(*args, **kwargs) - - -class ResultProtocols(AtomicResultProtocols): - """QC Result Protocols Schema. - - .. deprecated:: 0.12 - Use :py:func:`qcelemental.models.AtomicResultProtocols` instead. - - """ - - def __init__(self, *args, **kwargs): - from warnings import warn - - warn( - "ResultProtocols has been renamed to AtomicResultProtocols and will be removed as soon as v0.13.0", - DeprecationWarning, - ) - super().__init__(*args, **kwargs) - - -class ResultInput(AtomicInput): - """QC Input Schema. - - .. deprecated:: 0.12 - Use :py:func:`qcelemental.models.AtomicInput` instead. - - """ - - def __init__(self, *args, **kwargs): - from warnings import warn - - warn("ResultInput has been renamed to AtomicInput and will be removed as soon as v0.13.0", DeprecationWarning) - super().__init__(*args, **kwargs) - - -class Result(AtomicResult): - """QC Result Schema. - - .. deprecated:: 0.12 - Use :py:func:`qcelemental.models.AtomicResult` instead. - - """ - - def __init__(self, *args, **kwargs): - from warnings import warn - - warn("Result has been renamed to AtomicResult and will be removed as soon as v0.13.0", DeprecationWarning) - super().__init__(*args, **kwargs) diff --git a/qcelemental/tests/addons.py b/qcelemental/tests/addons.py index a590faf4..f54151fc 100644 --- a/qcelemental/tests/addons.py +++ b/qcelemental/tests/addons.py @@ -62,12 +62,13 @@ def xfail_on_pubchem_busy(): def drop_qcsk(instance, tnm: str, schema_name: str = None): - if isinstance(instance, qcelemental.models.ProtoModel) and schema_name is None: + is_model = isinstance(instance, (qcelemental.models.v1.ProtoModel, qcelemental.models.v2.ProtoModel)) + if is_model and schema_name is None: schema_name = type(instance).__name__ drop = (_data_path / schema_name / tnm).with_suffix(".json") with open(drop, "w") as fp: - if isinstance(instance, qcelemental.models.ProtoModel): + if is_model: # fp.write(instance.json(exclude_unset=True, exclude_none=True)) # works but file is one-line instance = json.loads(instance.json(exclude_unset=True, exclude_none=True)) elif isinstance(instance, dict): @@ -83,7 +84,7 @@ def Molecule(request): if request.param == "v1": return qcelemental.models.v1.Molecule elif request.param == "v2": - return qcelemental.models.v1.Molecule # TODO v2 + return qcelemental.models.v2.Molecule else: return qcelemental.models.Molecule @@ -93,6 +94,6 @@ def schema_versions(request): if request.param == "v1": return qcelemental.models.v1 elif request.param == "v2": - return qcelemental.models.v1 # TODO v2 + return qcelemental.models.v2 else: return qcelemental.models diff --git a/qcelemental/tests/test_model_results.py b/qcelemental/tests/test_model_results.py index f089917d..395d9ce2 100644 --- a/qcelemental/tests/test_model_results.py +++ b/qcelemental/tests/test_model_results.py @@ -1,4 +1,5 @@ import numpy as np +import pydantic import pytest import qcelemental as qcel @@ -549,40 +550,63 @@ def test_result_derivatives_array(request, schema_versions): @pytest.mark.parametrize( - "smodel", ["molecule", "atomicresultproperties", "atomicinput", "atomicresult", "optimizationresult"] + "smodel", ["molecule", "atomicresultproperties", "atomicinput", "atomicresult", "optimizationresult", "basisset"] ) -def test_model_dictable(result_data_fixture, optimization_data_fixture, smodel, schema_versions): - Molecule = schema_versions.Molecule - AtomicResultProperties = schema_versions.AtomicResultProperties - AtomicInput = schema_versions.AtomicInput - AtomicResult = schema_versions.AtomicResult - OptimizationResult = schema_versions.OptimizationResult +def test_model_dictable(result_data_fixture, optimization_data_fixture, smodel, schema_versions, request): + qcsk_ver = "v2" if ("v2" in request.node.name) else "v1" if smodel == "molecule": - model = Molecule + model = schema_versions.Molecule data = result_data_fixture["molecule"].dict() + sver = (2, 2) # TODO , 3) elif smodel == "atomicresultproperties": - model = AtomicResultProperties + model = schema_versions.AtomicResultProperties data = {"scf_one_electron_energy": "-5.0", "scf_dipole_moment": [1, 2, 3], "ccsd_dipole_moment": None} + sver = (None, 2) elif smodel == "atomicinput": - model = AtomicInput + model = schema_versions.AtomicInput data = {k: result_data_fixture[k] for k in ["molecule", "model", "driver"]} + sver = (1, 2) elif smodel == "atomicresult": - model = AtomicResult + model = schema_versions.AtomicResult data = result_data_fixture + sver = (1, 2) elif smodel == "optimizationresult": - model = OptimizationResult + model = schema_versions.OptimizationResult data = optimization_data_fixture + sver = (1, 2) + + elif smodel == "basisset": + model = schema_versions.basis.BasisSet + data = {"name": "custom", "center_data": center_data, "atom_map": ["bs_sto3g_o", "bs_sto3g_h", "bs_sto3g_h"]} + sver = (1, 2) + + def ver_tests(qcsk_ver): + if qcsk_ver == "v1": + if sver[0] is not None: + assert instance.schema_version == sver[0] + assert isinstance(instance, pydantic.v1.BaseModel) + elif qcsk_ver == "v2": + if sver[1] is not None: + assert instance.schema_version == sver[1] + assert isinstance(instance, pydantic.BaseModel) instance = model(**data) - assert model(**instance.dict()) + ver_tests(qcsk_ver) + instance = model(**instance.dict()) + assert instance + ver_tests(qcsk_ver) + +def test_result_model_deprecations(result_data_fixture, optimization_data_fixture, request): + if "v1" not in request.node.name: + # schema_versions coming from fixtures despite not being explicitly present + pytest.skip("Deprecations from 2019 only available from qcel.models.v1") -def test_result_model_deprecations(result_data_fixture, optimization_data_fixture): with pytest.warns(DeprecationWarning): qcel.models.v1.ResultProperties(scf_one_electron_energy="-5.0") diff --git a/qcelemental/tests/test_molutil.py b/qcelemental/tests/test_molutil.py index b5b0b4f7..3cd78f5b 100644 --- a/qcelemental/tests/test_molutil.py +++ b/qcelemental/tests/test_molutil.py @@ -2,12 +2,7 @@ import pprint import numpy as np - -try: - import pydantic.v1 as pydantic -except ImportError: # Will also trap ModuleNotFoundError - import pydantic - +import pydantic import pytest import qcelemental as qcel @@ -166,7 +161,7 @@ def test_error_nat_b787(Molecule): def test_mill_shift_error(schema_versions): AlignmentMill = schema_versions.AlignmentMill - with pytest.raises(pydantic.ValidationError) as e: + with pytest.raises((pydantic.v1.ValidationError, pydantic.ValidationError)) as e: AlignmentMill(shift=[0, 1]) assert "Shift must be castable to shape" in str(e.value) @@ -175,7 +170,7 @@ def test_mill_shift_error(schema_versions): def test_mill_rot_error(schema_versions): AlignmentMill = schema_versions.AlignmentMill - with pytest.raises(pydantic.ValidationError) as e: + with pytest.raises((pydantic.v1.ValidationError, pydantic.ValidationError)) as e: AlignmentMill(rotation=[0, 1, 3]) assert "Rotation must be castable to shape" in str(e.value) diff --git a/qcelemental/tests/test_utils.py b/qcelemental/tests/test_utils.py index f8c70aff..43bbb285 100644 --- a/qcelemental/tests/test_utils.py +++ b/qcelemental/tests/test_utils.py @@ -2,11 +2,7 @@ import numpy as np import pytest - -try: - from pydantic.v1 import BaseModel, Field -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import BaseModel, Field +from pydantic.v1 import BaseModel, Field import qcelemental as qcel from qcelemental.testing import compare_recursive, compare_values @@ -16,6 +12,8 @@ @pytest.fixture(scope="function") def doc_fixture(): + # associated with AutoDoc, so leaving at Pydantic v1 syntax + class Nest(BaseModel): """A nested model""" diff --git a/qcelemental/util/autodocs.py b/qcelemental/util/autodocs.py index b6b64232..ac57b50d 100644 --- a/qcelemental/util/autodocs.py +++ b/qcelemental/util/autodocs.py @@ -3,10 +3,11 @@ from textwrap import dedent, indent from typing import Any -try: - from pydantic.v1 import BaseModel, BaseSettings -except ImportError: # Will also trap ModuleNotFoundError - from pydantic import BaseModel, BaseSettings +from pydantic.v1 import BaseModel, BaseSettings + +# home-grown AutoDoc has been replaced autodoc-pydantic for Sphinx in QCElemental and QCEngine. +# pre-next QCFractal was the last known user. Leaving this in pydantic v1 for now until removed entirely. + __all__ = ["auto_gen_docs_on_demand", "get_base_docs", "AutoPydanticDocGenerator"]