Source code for qcmanybody.utils

from __future__ import annotations

import ast
import json
import math
import re
import string
from typing import Any, Dict, Iterable, List, Literal, Mapping, Optional, Set, Tuple, Union

import numpy as np

__all__ = [
    # "collect_vars",
    "delabeler",
    "labeler",
    # "print_nbody_energy",
    "modelchem_labels",
    "provenance_stamp",
    "resize_gradient",
    "resize_hessian",
    # "sum_cluster_data",
    "translate_qcvariables",
]


def find_shape(x: Union[float, np.ndarray]) -> Tuple[int, ...]:
    if isinstance(x, float):
        return (1,)
    else:
        return x.shape


def shaped_zero(shape: Tuple[int, ...]) -> Union[float, np.ndarray]:
    if shape == (1,):
        return 0.0
    else:
        return np.zeros(shape)


def copy_value(x: Union[float, np.ndarray]) -> Union[int, float, np.ndarray]:
    if isinstance(x, float):
        return x
    else:
        return np.copy(x)


def all_same_shape(it: Iterable[Union[float, np.ndarray]]) -> bool:
    """Check if all elements of an iterable have the same shape."""

    it = iter(it)
    try:
        first = next(it)
    except StopIteration:
        return True
    if isinstance(first, float):
        return all(isinstance(x, float) for x in it)
    elif isinstance(first, np.ndarray):
        return all(x.shape == first.shape for x in it)
    else:
        raise TypeError(f"Expected float or np.ndarray, got {type(first)}")


[docs] def resize_gradient( grad: np.ndarray, bas: Tuple[int, ...], fragment_size_dict: Dict[int, int], fragment_slice_dict: Dict[int, slice], *, reverse: bool = False, ) -> np.ndarray: r"""Pads or extracts a gradient array between subsystem and full supersystem sizes. Parameters ---------- grad Gradient matrix of natural size for *bas*, (3 * _<nat in bas\>_, 3). If `reverse=True`, gradient matrix of supersystem size, (3 * _<nat of all fragments\>_, 3). bas 1-indexed fragments active in *grad*. If `reverse=True`, 1-indexed fragments to be extracted from *grad*. fragment_size_dict Dictionary containing the number of atoms of each 1-indexed fragment. For He--HOOH--Me cluster, `{1: 1, 2: 4, 3: 5}`. fragment_slice_dict Dictionary containing slices that index the gradient matrix for each of the 1-indexed fragments. For He--HOOH--Me cluster, `{1: slice(0, 1), 2: slice(1, 5), 3: slice(5, 10)}`. reverse If True, contract *grad* from supersystem size and shape to that which is natural for *bas*. Returns ------- numpy.ndarray Gradient array padded with zeros to supersystem size, (3 * _<nat of supersystem\>_, 3). If reverse=True, gradient array extracted to natural size, `(3 * _<nat in bas\>_, 3)`. """ if reverse: nat = sum(fragment_size_dict[ifr] for ifr in bas) else: nat = sum(fragment_size_dict.values()) ret = np.zeros((nat, 3)) start = 0 for ifr in bas: end = start + fragment_size_dict[ifr] if reverse: ret[start:end] = grad[fragment_slice_dict[ifr]] else: ret[fragment_slice_dict[ifr]] = grad[start:end] start += fragment_size_dict[ifr] return ret
[docs] def resize_hessian( hess: np.ndarray, bas: Tuple[int, ...], fragment_size_dict: Dict[int, int], fragment_slice_dict: Dict[int, slice], *, reverse: bool = False, ) -> np.ndarray: r"""Pads or extracts a Hessian array between subsystem and full supersystem sizes. Parameters ---------- hess Hessian matrix of natural size for *bas*, ``(3 * <nat in bas>, 3 * <nat in bas>)``. If `reverse=True`, Hessian matrix of supersystem size, ``(3 * <nat of all fragments>, 3 * <nat of all fragments>)``. bas 1-indexed fragments active in *hess*. If `reverse=True`, 1-indexed fragments to be extracted from *hess*. fragment_size_dict Dictionary containing the number of atoms of each 1-indexed fragment. For He--HOOH--Me cluster, ``{1: 1, 2: 4, 3: 5}``. fragment_slice_dict Dictionary containing slices that index the gradient matrix for each of the 1-indexed fragments. For He--HOOH--Me cluster, ``{1: slice(0, 1), 2: slice(1, 5), 3: slice(5, 10)}``. reverse If True, contract *hess* from supersystem size and shape to that which is natural for *bas*. Returns ------- numpy.ndarray Hessian array padded with zeros to supersystem size, ``(3 * <nat of supersystem>, 3 * <nat of supersystem>)``. If *reverse=True*, Hessian array extracted to natural size, ``(3 * <nat in bas>, 3 * <nat in bas>)``. """ if reverse: nat = sum(fragment_size_dict[ifr] for ifr in bas) else: nat = sum(fragment_size_dict.values()) ret = np.zeros((nat * 3, nat * 3)) # Build up start and end slices abs_start, rel_start = 0, 0 abs_slices, rel_slices = [], [] for ifr in bas: rel_end = rel_start + 3 * fragment_size_dict[ifr] rel_slices.append(slice(rel_start, rel_end)) rel_start += 3 * fragment_size_dict[ifr] tmp_slice = fragment_slice_dict[ifr] abs_slices.append(slice(tmp_slice.start * 3, tmp_slice.stop * 3)) for abs_sl1, rel_sl1 in zip(abs_slices, rel_slices): for abs_sl2, rel_sl2 in zip(abs_slices, rel_slices): if reverse: ret[rel_sl1, rel_sl2] = hess[abs_sl1, abs_sl2] else: ret[abs_sl1, abs_sl2] = hess[rel_sl1, rel_sl2] return ret
def sum_cluster_data( data: Dict[str, Union[float, np.ndarray]], compute_list: Set["FragBasIndex"], mc_level_lbl: str, vmfc: bool = False, nb: int = 0, ) -> Union[float, np.ndarray]: """Sum (direct or alternate weight by n-body) like data from Parameters ---------- data Dictionary containing computed property (e/g/H/etc.) for each subsystem/component computation. compute_list A list of (frag, bas) tuples notating all the required computations for the desired sum. mc_level_lbl User label for what modelchem level results should be pulled out of *data*. vmfc Is this a vmfc calculation, by default False? nb 1-indexed n-body level only used when `vmfc=True`, by default 0. Returns ------- Union[float, np.ndarray] Scalar or array containing summed energy, gradient, Hessian, or other result. Usually (nocp or cp; `vmfc=False`), compute_list defines all fragments of a given number of active fragments and active basis fragments, so the return is the 3b@3b sum, for example. Other times (`vmfc=True`), compute list defines all fragments of a given number of active basis fragments. Then alternating weighting is applied so if `nb=3`, the return is the quantity (3b@3b sum - 2b@3b sum + 1b@3b sum), for example. Raises ------ ValueError If the shapes of all the `data` values aren't the same. No summing energies with gradients. """ sign = 1 if not all_same_shape(data.values()): raise ValueError("All values in data dictionary must have the same shape.") first_key = next(iter(data)) shape = find_shape(data[first_key]) ret = shaped_zero(shape) # np.sum might be preferable for precision for G/H but np.sum(generator) ill-defined precise_sum_func = math.fsum if isinstance(ret, float) else sum ret = precise_sum_func( (((-1) ** (nb - len(frag))) if vmfc else 1) * (data[labeler(mc_level_lbl, frag, bas)]) for frag, bas in compute_list ) # A more readable format for the above but not ammenable to using specialty summation functions # ``` # for frag, bas in compute_list: # egh = data[labeler(mc_level_lbl, frag, bas)] # # if vmfc: # sign = (-1) ** (nb - len(frag)) # # ret += sign * egh # ``` return ret
[docs] def labeler( mc_level_lbl: Optional[Union[str, int]], frag: Tuple[int, ...], bas: Tuple[int, ...], *, opaque: bool = True ) -> str: """Form label from model chemistry id and fragment and basis indices. Parameters ---------- mc_level_lbl Key identifying the model chemistry. May be `"(auto)"`. Often the ManyBodyInput.specification.specification keys. When `opaque=False`, result is for pretty printing so instead of a string, `mc_level_lbl` might be an integer index (apply 1-indexing beforehand) or None (if the model chemistry part is unwanted because single-level). frag List of 1-indexed fragments active in the supersystem. bas List of 1-indexed fragments with active basis sets in the supersystem. All those in *frag* plus any ghost. opaque Toggle whether to return JSON-friendly semi-opaque internal str label (True) or eye-friendly label with @ for basis and § for model chemistry (False). Returns ------- str JSON string from inputs:: labeler("mp2", 1, (1, 2)) #> '["mp2", [1], [1, 2]]' labeler("mp2", 1, (1, 2), opaque=False) #> '§mp2_(1)@(1, 2)' """ if isinstance(frag, int): frag = (frag,) if isinstance(bas, int): bas = (bas,) if opaque: return json.dumps([str(mc_level_lbl), frag, bas]) else: mc_pre = "" if mc_level_lbl is None else f{mc_level_lbl}_" return f"{mc_pre}({', '.join(map(str, frag))})@({', '.join(map(str, bas))})"
[docs] def delabeler(item: str) -> Tuple[str, Tuple[int, ...], Tuple[int, ...]]: """Back-form from label into tuple. Returns ------- mcfragbas Tuple of opaque or pretty-print model chemistry (may be None for latter), fragments and bases (1-indexed by convention). :: delabeler('["mp2", [1], [1, 2]]') #> ('mp2', [1], [1, 2]) delabeler("§mp2_(1)@(1, 2)") #> ('mp2', [1], [1, 2]) """ if "@" not in item: mc, frag, bas = json.loads(item) return str(mc), frag, bas else: mobj = re.match(r"(?:§(?P<mc>\S*)_)?(?P<frag>.*)@(?P<bas>.*)", item) mc, frag, bas = mobj.groups() # want lists and avoids (1) non-iterable error frag = frag.replace("(", "[").replace(")", "]") bas = bas.replace("(", "[").replace(")", "]") return mc, ast.literal_eval(frag), ast.literal_eval(bas)
def print_nbody_energy( energy_body_dict: Mapping[int, float], header: str, nfragments: int, modelchem_labels, embedding: bool, supersystem_ie_only: bool, supersystem_beyond: Optional[int], ) -> str: """Format summary string for energies of a single bsse_type. Logs and returns output. Parameters ---------- energy_body_dict Input data. header Specialization for table title. nfragments Number of lines in table is number of fragments. modelchem_labels Dictionary mapping active nbody-levels to a tuple with first element the full model chemistry key and second element a short label. A suitable dictionary is `modelchem_labels(manybodycore_instance.nbodies_per_mc_level)`. embedding Whether charge embedding present suppress printing, usually False supersystem_ie_only Whether only 1-body and nfragments-body levels are available, usually False. supersystem_beyond If not None, the number of nbody-levels computed by MBE explicitly. Beyond this gets supersystem SS label. Returns ------- str A text table in Hartrees and kcal/mol ``` ==> N-Body: Counterpoise Corrected (CP) energies <==' n-Body Total Energy Interaction Energy N-body Contribution to Interaction Energy' [Eh] [Eh] [kcal/mol] [Eh] [kcal/mol]' 1 -386.455609352609 0.000000000000 0.000000000000 0.000000000000 0.000000000000' 2 -384.203153844163 2.252455508446 1413.437170812134 2.252455508446 1413.437170812134' FULL/RTN 3 -384.128628718676 2.326980633933 1460.202393089624 0.074525125487 46.765222277490' ``` """ from qcelemental import constants info = f"""\n ==> N-Body: {header} energies <==\n\n""" info += f""" {"MC n-Body":>15} Total Energy Interaction Energy N-body Contribution to Interaction Energy\n""" info += f""" [Eh] [Eh] [kcal/mol] [Eh] [kcal/mol]\n""" previous_e = energy_body_dict[1] tot_e = previous_e != 0.0 nbody_range = list(energy_body_dict) if supersystem_ie_only: nbody_range = [1, nfragments] nbody_range.sort() for nb in range(1, nfragments + 1): lbl = [] if supersystem_beyond and nb > supersystem_beyond: lbl.append("SS") if nb == nfragments: lbl.append("FULL") if nb == max(nbody_range): lbl.append("RTN") lbl = "/".join(lbl) mclbl = modelchem_labels.get(nb, ("", ""))[1] if nb in nbody_range: delta_e = energy_body_dict[nb] - previous_e delta_e_kcal = delta_e * constants.hartree2kcalmol if embedding: int_e = np.nan int_e_kcal = np.nan else: int_e = energy_body_dict[nb] - energy_body_dict[1] int_e_kcal = int_e * constants.hartree2kcalmol if supersystem_ie_only and nb == nfragments: if tot_e: info += f""" {lbl:>11} {mclbl:2} {nb:2} {energy_body_dict[nb]:20.12f} {int_e:20.12f} {int_e_kcal:20.12f} {"N/A":20} {"N/A":20}\n""" else: info += f""" {lbl:>11} {mclbl:2} {nb:2} {"N/A":14} {int_e:20.12f} {int_e_kcal:20.12f} {"N/A":20} {"N/A":20}\n""" else: if tot_e: if embedding: info += f""" {lbl:>11} {mclbl:2} {nb:2} {energy_body_dict[nb]:20.12f} {"N/A":20} {"N/A":14} {delta_e:20.12f} {delta_e_kcal:20.12f}\n""" else: info += f""" {lbl:>11} {mclbl:2} {nb:2} {energy_body_dict[nb]:20.12f} {int_e:20.12f} {int_e_kcal:20.12f} {delta_e:20.12f} {delta_e_kcal:20.12f}\n""" else: info += f""" {lbl:>11} {mclbl:2} {nb:2} {"N/A":14} {int_e:20.12f} {int_e_kcal:20.12f} {delta_e:20.12f} {delta_e_kcal:20.12f}\n""" previous_e = energy_body_dict[nb] else: info += f""" {lbl:>11} {"":2} {nb:2} {"N/A":20} {"N/A":20} {"N/A":20} {"N/A":20} {"N/A":20}\n""" mc_legend = {tup[1]: tup[0] for tup in modelchem_labels.values()} mc_legend = [f'{k}: "{v}"' for k, v in mc_legend.items()] info += f"\n MC Legend: {', '.join(mc_legend)}\n\n" return info def collect_vars( bsse: str, prop: str, body_dict: Mapping[int, Union[float, np.ndarray]], max_nbody: int, embedding: bool = False, supersystem_ie_only: bool = False, has_supersystem: bool = False, ) -> Dict: """From *body_dict*, construct data for ManyBodyResultProperties/ManyBodyProperties. Parameters ---------- bsse Label for a single many-body treatment, generally a value of BsseEnum. prop Label for a single property, generally a value of DriverEnum. body_dict Dictionary of minimal per-body info already specialized for property *prop* and treatment *bsse*. May contain either total data or interaction data (cummulative, not additive) from 1-body to max_nbody-body (see also *supersystem_ie_only*). Interaction data signaled by zero float or array for 1-body. May contain data from multiple model chemistry levels. max_nbody _description_ embedding Is charge embedding enabled, by default False? supersystem_ie_only Is data available in *body_dict* only for 1-body (possibly zero) and nfr-body levels? By default False: data is available for consecutive levels, up to max_nbody-body. has_supersystem Whether contributions higher than max_nbody are a summary correction. Returns ------- dict _description_. Empty return if *embedding* enabled. """ bsse = bsse.lower() prop = prop.lower() previous_e = body_dict[1] property_shape = find_shape(previous_e) tot_e = bool(np.count_nonzero(previous_e)) nbody_range = list(body_dict) nbody_range.sort() res = {} if tot_e: res[f"{bsse}_corrected_total_{prop}"] = body_dict[max_nbody] res[f"{bsse}_corrected_interaction_{prop}"] = body_dict[max_nbody] - body_dict[1] res[f"{bsse}_corrected_interaction_{prop}_through_1_body"] = shaped_zero(property_shape) if supersystem_ie_only: nfr = nbody_range[-1] for nb in [nfr]: res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1] if nb == 2: res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1] if tot_e: for nb in [1, nfr]: res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb] elif has_supersystem: nfr = nbody_range[-1] res[f"{bsse}_corrected_interaction_{prop}"] = body_dict[nfr] - body_dict[1] # reset for nb in range(2, max_nbody + 1): res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1] res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1] for nb in [nfr]: res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1] res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[max_nbody] if tot_e: res[f"{bsse}_corrected_total_{prop}"] = body_dict[nfr] # reset for nb in nbody_range: res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb] else: for nb in range(2, max(nbody_range) + 1): res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1] res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1] if tot_e: for nb in nbody_range: res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb] if embedding: res = {k: v for k, v in res.items() if "interaction" not in k} return res
[docs] def provenance_stamp(routine: str) -> Dict[str, str]: """Return dictionary satisfying QCSchema, https://github.com/MolSSI/QCSchema/blob/master/qcschema/dev/definitions.py#L23-L41 with QCManyBody's credentials for creator and version. The generating routine's name is passed in through `routine`. :: qcmb.utils.provenance_stamp(__name__) #> {'creator': 'QCManyBody', 'version': '0.2.2', 'routine': '__main__'} """ import qcmanybody return {"creator": "QCManyBody", "version": qcmanybody.__version__, "routine": routine}
[docs] def translate_qcvariables(varsmap: Mapping[str, Any]) -> Dict[str, Any]: """Translate between ManyBody results in Psi4/QCDB terminology (qcvars) and QCSchema terminology (skprops). Parameters ---------- varsmap Dictionary with keys all members of QCVariables or ManyBodyProperties/ManyBodyResultProperties and arbitrary values. Returns ------- dict varsmap with keys swapped to other set. Untranslatable keys are omitted. """ # identify direction of translation qcv2skp = any([" " in lbl for lbl in varsmap]) # to_variables are exactly the same at present. still, try more restrictive first try: from qcmanybody.models.v1 import ManyBodyResultProperties labelmap = ManyBodyResultProperties.to_qcvariables(reverse=qcv2skp) except Exception: from qcmanybody.models.v2 import ManyBodyProperties labelmap = ManyBodyProperties.to_qcvariables(reverse=qcv2skp) return {labelmap[lbl]: data for lbl, data in varsmap.items() if lbl in labelmap}
[docs] def modelchem_labels( nb_per_mc: Dict[str, List[Union[int, Literal["supersystem"]]]], presorted: bool = False ) -> Dict[Union[int, Literal["supersystem"]], Tuple[str, str, str]]: """Form ordinal and letter labels for model chemistries. Parameters ---------- nb_per_mc Dictionary mapping model chemistries to lists of n-body levels computed. If a model chemistry is supersystem, the value should be ["supersystem"]. Generally, this is the `ManyBodyCore.nbodies_per_mc_level` data structure. presorted If True, the input dictionary keys and values are already sorted by increasing n-body level. This is the case when the input is `ManyBodyCore.nbodies_per_mc_level`. Returns ------- mc_per_nb Dictionary mapping n-body levels to a tuple of full model chemistry label, single-letter ordinal label, and n-body-levels-covered label. :: modelchem_labels({'ccsd': [1], 'mp2': [2, 3], 'hf': [4]}) #> {1: ('ccsd', '§A', '§1'), 2: ('mp2', '§B', '§23'), 3: ('mp2', '§B', '§23'), 4: ('hf', '§C', '§4')} modelchem_labels({'hi': [1, 2, 3], 'md': [4], 'md2': [5, 6, 7, 8, 9, 10], 'lo': ['supersystem']}) #> {1: ('hi', '§A', '§123'), 2: ('hi', '§A', '§123'), 3: ('hi', '§A', '§123'), # 4: ('md', '§B', '§4'), # 5: ('md2', '§C', '§<10'), 6: ('md2', '§C', '§<10'), 7: ('md2', '§C', '§<10'), 8: ('md2', '§C', '§<10'), 9: ('md2', '§C', '§<10'), 10: ('md2', '§C', '§<10'), # "supersystem": ('lo', '§D', '§SS')} """ sorted_nb_per_mc = { k: sorted(v) for (k, v) in sorted( nb_per_mc.items(), key=lambda mc_nbs: sorted([1000] if (mc_nbs[1] == ["supersystem"]) else mc_nbs[1])[0] ) } if presorted: assert ( sorted_nb_per_mc == nb_per_mc ), f"If presorted, input dictionary should be sorted. {nb_per_mc} != {sorted_nb_per_mc} " lvl_lbl = {} for mc, nbs in sorted_nb_per_mc.items(): if nbs == ["supersystem"]: lvl_lbl[mc] = "§SS" elif max(nbs) > 9: lvl_lbl[mc] = f"§<{max(nbs)}" else: lvl_lbl[mc] = f{''.join(map(str, sorted(nbs)))}" indexed_mc = {k: i for i, k in enumerate(sorted_nb_per_mc.keys())} mc_per_nb = { nb: (mc, f{string.ascii_uppercase[indexed_mc[mc]]}", lvl_lbl[mc]) for mc, nbs in sorted_nb_per_mc.items() for nb in nbs } return mc_per_nb