Source code for qcelemental.datum

"""
Datum Object Model
"""

from decimal import Decimal
from typing import Any, Dict, Optional, Union

import numpy as np
from pydantic import (
    BaseModel,
    ConfigDict,
    SerializationInfo,
    SerializerFunctionWrapHandler,
    WrapSerializer,
    field_validator,
    model_serializer,
)
from typing_extensions import Annotated


def reduce_complex(data):
    # Reduce Complex
    if isinstance(data, complex):
        return [data.real, data.imag]
    # Fallback
    return data


def keep_decimal_cast_ndarray_complex(
    v: Any, nxt: SerializerFunctionWrapHandler, info: SerializationInfo
) -> Union[list, Decimal, float]:
    """
    Ensure Decimal types are preserved on the way out

    This arose because Decimal was serialized to string and "dump" is equal to "serialize" in v2 pydantic
    https://docs.pydantic.dev/latest/migration/#changes-to-json-schema-generation


    This also checks against NumPy Arrays and complex numbers in the instance of being in JSON mode
    """
    if isinstance(v, Decimal):
        return v
    if info.mode == "json":
        if isinstance(v, complex):
            return nxt(reduce_complex(v))
        if isinstance(v, np.ndarray):
            # Handle NDArray and complex NDArray
            flat_list = v.flatten().tolist()
            reduced_list = list(map(reduce_complex, flat_list))
            return nxt(reduced_list)
        try:
            # Cast NumPy scalar data types to native Python data type
            v = v.item()
        except (AttributeError, ValueError):
            pass
    return nxt(v)


# Only 1 serializer is allowed. You can't chain wrap serializers.
AnyArrayComplex = Annotated[Any, WrapSerializer(keep_decimal_cast_ndarray_complex)]


[docs] class Datum(BaseModel): r"""Facilitates the storage of quantum chemical results by labeling them with basic metadata. Attributes ---------- label : str Official label for `data`, often qcvar. May contain spaces. units : str ASCII, LaTeX-like representation of units, without square brackets. data : float or decimal.Decimal or numpy.ndarray Value for `label`. comment : str Additional notes. doi : str Literature citation or definition DOI link. glossary : str Extended description or definition. numeric : bool Whether `data` is numeric. Pass `True` to disable validating `data` as float/Decimal/np.ndarray. """ numeric: bool label: str units: str data: AnyArrayComplex comment: str = "" doi: Optional[str] = None glossary: str = "" model_config = ConfigDict( extra="forbid", frozen=True, ) def __init__(self, label, units, data, *, comment=None, doi=None, glossary=None, numeric=True): kwargs = {"label": label, "units": units, "data": data, "numeric": numeric} if comment is not None: kwargs["comment"] = comment if doi is not None: kwargs["doi"] = doi if glossary is not None: kwargs["glossary"] = glossary super().__init__(**kwargs)
[docs] @field_validator("data") @classmethod def must_be_numerical(cls, v, info): try: 1.0 * v except TypeError: try: Decimal("1.0") * v except TypeError: if info.data["numeric"]: raise ValueError(f"Datum data should be float, Decimal, or np.ndarray, not {type(v)}.") else: info.data["numeric"] = True else: info.data["numeric"] = True return v
def __str__(self, label=""): width = 40 text = ["-" * width, "{:^{width}}".format("Datum " + self.label, width=width)] if label: text.append("{:^{width}}".format(label, width=width)) text.append("-" * width) text.append("Data: {}".format(self.data)) text.append("Units: [{}]".format(self.units)) text.append("doi: {}".format(self.doi)) text.append("Comment: {}".format(self.comment)) text.append("Glossary: {}".format(self.glossary)) text.append("-" * width) return "\n".join(text) @model_serializer(mode="wrap") def _serialize_model(self, handler) -> Dict[str, Any]: """ Customize the serialization output. Does duplicate with some code in model_dump, but handles the case of nested models and any model config options. Encoding is handled at the `model_dump` level and not here as that should happen only after EVERYTHING has been dumped/de-pydantic-ized. """ # Get the default return, let the model_dump handle kwarg default_result = handler(self) # Exclude unset always output_dict = {key: value for key, value in default_result.items() if key in self.model_fields_set} return output_dict
[docs] def dict(self, *args, **kwargs): """ Passthrough to model_dump without deprecation warning exclude_unset is forced through the model_serializer """ return super().model_dump(*args, **kwargs)
[docs] def json(self, *args, **kwargs): """ Passthrough to model_dump_json without deprecation warning exclude_unset is forced through the model_serializer """ return super().model_dump_json(*args, **kwargs)
[docs] def to_units(self, units=None): from .physical_constants import constants to_unit = self.units if units is None else units factor = constants.conversion_factor(self.units, to_unit) if isinstance(self.data, Decimal): return factor * float(self.data) else: return factor * self.data
def print_variables(qcvars: Dict[str, "Datum"]) -> str: r"""Form a printable representation of qcvariables. Parameters ---------- qcvars Group of Datum objects to print. Returns ------- str Printable string representation of label, data, and unit in Datum-s. """ text = ["\n Variable Map:", " ----------------------------------------------------------------------------"] if len(qcvars) == 0: text.append(" (none)") return "\n".join(text) largest_key = max(len(k) for k in qcvars) + 2 # for quotation marks largest_characteristic = 8 for k, v in qcvars.items(): try: exp = int(str(v.data).split("E")[1]) except IndexError: pass else: largest_characteristic = max(exp, largest_characteristic) for k, qca in sorted(qcvars.items()): # if k != qca.lbl: # raise ValidationError('Huh? {} != {}'.format(k, qca.label)) if isinstance(qca.data, np.ndarray): data = np.array_str(qca.data, max_line_width=120, precision=8, suppress_small=True) data = "\n".join(" " + ln for ln in data.splitlines()) text.append( """ {:{keywidth}} => {:{width}} [{}]""".format( '"' + k + '"', "", qca.units, keywidth=largest_key, width=largest_characteristic + 14 ) ) text.append(data) elif isinstance(qca.data, Decimal): text.append( """ {:{keywidth}} => {:{width}} [{}]""".format( '"' + k + '"', qca.data, qca.units, keywidth=largest_key, width=largest_characteristic + 14 ) ) elif not qca.numeric: text.append( """ {:{keywidth}} => {:>{width}} [{}]""".format( '"' + k + '"', str(qca.data), qca.units, keywidth=largest_key, width=largest_characteristic + 14 ) ) else: text.append( """ {:{keywidth}} => {:{width}.{prec}f} [{}]""".format( '"' + k + '"', qca.data, qca.units, keywidth=largest_key, width=largest_characteristic + 14, prec=12 ) ) text.append("") return "\n".join(text)