Source code for qcelemental.datum

"""
Datum Object Model
"""

from decimal import Decimal
from typing import Any, Dict, Optional, Union

import numpy as np
from numpy.typing import NDArray
from pydantic import (
    BaseModel,
    ConfigDict,
    SerializationInfo,
    SerializerFunctionWrapHandler,
    WrapSerializer,
    field_validator,
    model_serializer,
)
from typing_extensions import Annotated


def reduce_complex(data):
    # Reduce Complex
    if isinstance(data, complex):
        return [data.real, data.imag]
    # Fallback
    return data


def keep_decimal_cast_ndarray_complex(v: Any, nxt: SerializerFunctionWrapHandler, info: SerializationInfo) -> Any:
    """
    Ensure Decimal types are preserved on the way out

    This arose because Decimal was serialized to string and "dump" is equal to "serialize" in v2 pydantic
    https://docs.pydantic.dev/latest/migration/#changes-to-json-schema-generation


    This also checks against NumPy Arrays and complex numbers in the instance of being in JSON mode
    """
    if isinstance(v, Decimal):
        return v
    if info.mode == "json":
        if isinstance(v, complex):
            return nxt(reduce_complex(v))
        if isinstance(v, np.ndarray):
            # Handle NDArray and complex NDArray
            flat_list = v.flatten().tolist()
            reduced_list = list(map(reduce_complex, flat_list))
            return nxt(reduced_list)
        try:
            # Cast NumPy scalar data types to native Python data type
            v = v.item()
        except (AttributeError, ValueError):
            pass
    return nxt(v)


# Only 1 serializer is allowed. You can't chain wrap serializers.
AnyArrayComplex = Annotated[Any, WrapSerializer(keep_decimal_cast_ndarray_complex)]



[docs]
class Datum(BaseModel):
    r"""Facilitates the storage of quantum chemical results by labeling them with basic metadata.

    Attributes
    ----------
    label : str
        Official label for `data`, often qcvar. May contain spaces.
    units : str
        ASCII, LaTeX-like representation of units, without square brackets.
    data : float or decimal.Decimal or numpy.ndarray
        Value for `label`.
    comment : str
        Additional notes.
    doi : str
        Literature citation or definition DOI link.
    glossary : str
        Extended description or definition.
    numeric : bool
        Whether `data` is numeric. Pass `True` to disable validating `data` as float/Decimal/np.ndarray.

    """

    numeric: bool
    label: str
    units: str
    data: AnyArrayComplex
    comment: str = ""
    doi: Optional[str] = None
    glossary: str = ""

    model_config = ConfigDict(
        extra="forbid",
        frozen=True,
    )

    def __init__(self, label, units, data, *, comment=None, doi=None, glossary=None, numeric=True):
        kwargs = {"label": label, "units": units, "data": data, "numeric": numeric}
        if comment is not None:
            kwargs["comment"] = comment
        if doi is not None:
            kwargs["doi"] = doi
        if glossary is not None:
            kwargs["glossary"] = glossary

        super().__init__(**kwargs)


[docs]
    @field_validator("data")
    @classmethod
    def must_be_numerical(cls, v, info):
        try:
            1.0 * v
        except TypeError:
            try:
                Decimal("1.0") * v
            except TypeError:
                if info.data["numeric"]:
                    raise ValueError(f"Datum data should be float, Decimal, or np.ndarray, not {type(v)}.")
            else:
                info.data["numeric"] = True
        else:
            info.data["numeric"] = True

        return v


    def __str__(self, label=""):
        width = 40
        text = ["-" * width, "{:^{width}}".format("Datum " + self.label, width=width)]
        if label:
            text.append("{:^{width}}".format(label, width=width))
        text.append("-" * width)
        text.append("Data:     {}".format(self.data))
        text.append("Units:    [{}]".format(self.units))
        text.append("doi:      {}".format(self.doi))
        text.append("Comment:  {}".format(self.comment))
        text.append("Glossary: {}".format(self.glossary))
        text.append("-" * width)
        return "\n".join(text)

    @model_serializer(mode="wrap")
    def _serialize_model(self, handler) -> Dict[str, Any]:
        """
        Customize the serialization output. Does duplicate with some code in model_dump, but handles the case of nested
        models and any model config options.

        Encoding is handled at the `model_dump` level and not here as that should happen only after EVERYTHING has been
        dumped/de-pydantic-ized.
        """

        # Get the default return, let the model_dump handle kwarg
        default_result = handler(self)
        # Exclude unset always
        output_dict = {key: value for key, value in default_result.items() if key in self.model_fields_set}
        return output_dict


[docs]
    def dict(self, *args, **kwargs):
        """
        Passthrough to model_dump without deprecation warning
        exclude_unset is forced through the model_serializer
        """
        return super().model_dump(*args, **kwargs)



[docs]
    def json(self, *args, **kwargs):
        """
        Passthrough to model_dump_json without deprecation warning
        exclude_unset is forced through the model_serializer
        """
        return super().model_dump_json(*args, **kwargs)



[docs]
    def to_units(self, units=None):
        from .physical_constants import constants

        to_unit = self.units if units is None else units
        factor = constants.conversion_factor(self.units, to_unit)

        if isinstance(self.data, Decimal):
            return factor * float(self.data)
        else:
            return factor * self.data




def print_variables(qcvars: Dict[str, "Datum"]) -> str:
    r"""Form a printable representation of qcvariables.

    Parameters
    ----------
    qcvars
        Group of Datum objects to print.

    Returns
    -------
    str
        Printable string representation of label, data, and unit in Datum-s.

    """
    text = ["\n  Variable Map:", "  ----------------------------------------------------------------------------"]

    if len(qcvars) == 0:
        text.append("  (none)")
        return "\n".join(text)

    largest_key = max(len(k) for k in qcvars) + 2  # for quotation marks
    largest_characteristic = 8
    for k, v in qcvars.items():
        try:
            exp = int(str(v.data).split("E")[1])
        except IndexError:
            pass
        else:
            largest_characteristic = max(exp, largest_characteristic)

    for k, qca in sorted(qcvars.items()):
        # if k != qca.lbl:
        #    raise ValidationError('Huh? {} != {}'.format(k, qca.label))

        if isinstance(qca.data, np.ndarray):
            data = np.array_str(qca.data, max_line_width=120, precision=8, suppress_small=True)
            data = "\n".join("        " + ln for ln in data.splitlines())
            text.append(
                """  {:{keywidth}} => {:{width}} [{}]""".format(
                    '"' + k + '"', "", qca.units, keywidth=largest_key, width=largest_characteristic + 14
                )
            )
            text.append(data)
        elif isinstance(qca.data, Decimal):
            text.append(
                """  {:{keywidth}} => {:{width}} [{}]""".format(
                    '"' + k + '"', qca.data, qca.units, keywidth=largest_key, width=largest_characteristic + 14
                )
            )
        elif not qca.numeric:
            text.append(
                """  {:{keywidth}} => {:>{width}} [{}]""".format(
                    '"' + k + '"', str(qca.data), qca.units, keywidth=largest_key, width=largest_characteristic + 14
                )
            )
        else:
            text.append(
                """  {:{keywidth}} => {:{width}.{prec}f} [{}]""".format(
                    '"' + k + '"', qca.data, qca.units, keywidth=largest_key, width=largest_characteristic + 14, prec=12
                )
            )

    text.append("")
    return "\n".join(text)