Source code for qcmanybody.utils

from __future__ import annotations

import ast
import json
import math
import re
import string
from typing import Any, Dict, Iterable, List, Literal, Mapping, Optional, Set, Tuple, Union

import numpy as np
from qcelemental import constants

__all__ = [
    # "collect_vars",
    "delabeler",
    "labeler",
    # "print_nbody_energy",
    "modelchem_labels",
    "provenance_stamp",
    "resize_gradient",
    "resize_hessian",
    # "sum_cluster_data",
    "translate_qcvariables",
]


def find_shape(x: Union[float, np.ndarray]) -> Tuple[int, ...]:
    if isinstance(x, float):
        return (1,)
    else:
        return x.shape


def shaped_zero(shape: Tuple[int, ...]) -> Union[float, np.ndarray]:
    if shape == (1,):
        return 0.0
    else:
        return np.zeros(shape)


def copy_value(x: Union[float, np.ndarray]) -> Union[int, float, np.ndarray]:
    if isinstance(x, float):
        return x
    else:
        return np.copy(x)


def all_same_shape(it: Iterable[Union[float, np.ndarray]]) -> bool:
    """Check if all elements of an iterable have the same shape."""

    it = iter(it)
    try:
        first = next(it)
    except StopIteration:
        return True
    if isinstance(first, float):
        return all(isinstance(x, float) for x in it)
    elif isinstance(first, np.ndarray):
        return all(x.shape == first.shape for x in it)
    else:
        raise TypeError(f"Expected float or np.ndarray, got {type(first)}")


[docs] def resize_gradient( grad: np.ndarray, bas: Tuple[int, ...], fragment_size_dict: Dict[int, int], fragment_slice_dict: Dict[int, slice], *, reverse: bool = False, ) -> np.ndarray: r"""Pads or extracts a gradient array between subsystem and full supersystem sizes. Parameters ---------- grad Gradient matrix of natural size for *bas*, (3 * _<nat in bas\>_, 3). If `reverse=True`, gradient matrix of supersystem size, (3 * _<nat of all fragments\>_, 3). bas 1-indexed fragments active in *grad*. If `reverse=True`, 1-indexed fragments to be extracted from *grad*. fragment_size_dict Dictionary containing the number of atoms of each 1-indexed fragment. For He--HOOH--Me cluster, `{1: 1, 2: 4, 3: 5}`. fragment_slice_dict Dictionary containing slices that index the gradient matrix for each of the 1-indexed fragments. For He--HOOH--Me cluster, `{1: slice(0, 1), 2: slice(1, 5), 3: slice(5, 10)}`. reverse If True, contract *grad* from supersystem size and shape to that which is natural for *bas*. Returns ------- np.ndarray Gradient array padded with zeros to supersystem size, (3 * _<nat of supersystem\>_, 3). If reverse=True, gradient array extracted to natural size, (3 * _<nat in bas\>_, 3). """ if reverse: nat = sum(fragment_size_dict[ifr] for ifr in bas) else: nat = sum(fragment_size_dict.values()) ret = np.zeros((nat, 3)) start = 0 for ifr in bas: end = start + fragment_size_dict[ifr] if reverse: ret[start:end] = grad[fragment_slice_dict[ifr]] else: ret[fragment_slice_dict[ifr]] = grad[start:end] start += fragment_size_dict[ifr] return ret
[docs] def resize_hessian( hess: np.ndarray, bas: Tuple[int, ...], fragment_size_dict: Dict[int, int], fragment_slice_dict: Dict[int, slice], *, reverse: bool = False, ) -> np.ndarray: r"""Pads or extracts a Hessian array between subsystem and full supersystem sizes. Parameters ---------- hess Hessian matrix of natural size for *bas*, (3 * _<nat in bas\>_, 3 * _<nat in bas\>_). If `reverse=True`, Hessian matrix of supersystem size, (3 * _<nat of all fragments\>_, 3 * _<nat of all fragments\>_). bas 1-indexed fragments active in *hess*. If `reverse=True`, 1-indexed fragments to be extracted from *hess*. fragment_size_dict Dictionary containing the number of atoms of each 1-indexed fragment. For He--HOOH--Me cluster, `{1: 1, 2: 4, 3: 5}`. fragment_slice_dict Dictionary containing slices that index the gradient matrix for each of the 1-indexed fragments. For He--HOOH--Me cluster, `{1: slice(0, 1), 2: slice(1, 5), 3: slice(5, 10)}`. reverse If True, contract *hess* from supersystem size and shape to that which is natural for *bas*. Returns ------- ndarray Hessian array padded with zeros to supersystem size, (3 * _<nat of supersystem\>_, 3 * _<nat of supersystem\>_). If reverse=True, Hessian array extracted to natural size, (3 * _<nat in bas\>_, 3 * _<nat in bas\>_). """ if reverse: nat = sum(fragment_size_dict[ifr] for ifr in bas) else: nat = sum(fragment_size_dict.values()) ret = np.zeros((nat * 3, nat * 3)) # Build up start and end slices abs_start, rel_start = 0, 0 abs_slices, rel_slices = [], [] for ifr in bas: rel_end = rel_start + 3 * fragment_size_dict[ifr] rel_slices.append(slice(rel_start, rel_end)) rel_start += 3 * fragment_size_dict[ifr] tmp_slice = fragment_slice_dict[ifr] abs_slices.append(slice(tmp_slice.start * 3, tmp_slice.stop * 3)) for abs_sl1, rel_sl1 in zip(abs_slices, rel_slices): for abs_sl2, rel_sl2 in zip(abs_slices, rel_slices): if reverse: ret[rel_sl1, rel_sl2] = hess[abs_sl1, abs_sl2] else: ret[abs_sl1, abs_sl2] = hess[rel_sl1, rel_sl2] return ret
def sum_cluster_data( data: Dict[str, Union[float, np.ndarray]], compute_list: Set["FragBasIndex"], mc_level_lbl: str, vmfc: bool = False, nb: int = 0, ) -> Union[float, np.ndarray]: """Sum (direct or alternate weight by n-body) like data from Parameters ---------- data Dictionary containing computed property (e/g/H/etc.) for each subsystem/component computation. compute_list A list of (frag, bas) tuples notating all the required computations for the desired sum. mc_level_lbl User label for what modelchem level results should be pulled out of *data*. vmfc Is this a vmfc calculation, by default False? nb 1-indexed n-body level only used when `vmfc=True`, by default 0. Returns ------- Union[float, np.ndarray] Scalar or array containing summed energy, gradient, Hessian, or other result. Usually (nocp or cp; `vmfc=False`), compute_list defines all fragments of a given number of active fragments and active basis fragments, so the return is the 3b@3b sum, for example. Other times (`vmfc=True`), compute list defines all fragments of a given number of active basis fragments. Then alternating weighting is applied so if `nb=3`, the return is the quantity (3b@3b sum - 2b@3b sum + 1b@3b sum), for example. Raises ------ ValueError If the shapes of all the `data` values aren't the same. No summing energies with gradients. """ sign = 1 if not all_same_shape(data.values()): raise ValueError("All values in data dictionary must have the same shape.") first_key = next(iter(data)) shape = find_shape(data[first_key]) ret = shaped_zero(shape) # np.sum might be preferable for precision for G/H but np.sum(generator) ill-defined precise_sum_func = math.fsum if isinstance(ret, float) else sum ret = precise_sum_func( (((-1) ** (nb - len(frag))) if vmfc else 1) * (data[labeler(mc_level_lbl, frag, bas)]) for frag, bas in compute_list ) # A more readable format for the above but not ammenable to using specialty summation functions # ``` # for frag, bas in compute_list: # egh = data[labeler(mc_level_lbl, frag, bas)] # # if vmfc: # sign = (-1) ** (nb - len(frag)) # # ret += sign * egh # ``` return ret
[docs] def labeler( mc_level_lbl: Optional[Union[str, int]], frag: Tuple[int, ...], bas: Tuple[int, ...], *, opaque: bool = True ) -> str: """Form label from model chemistry id and fragment and basis indices. Parameters ---------- mc_level_lbl Key identifying the model chemistry. May be `"(auto)"`. Often the ManyBodyInput.specification.specification keys. When `opaque=False`, result is for pretty printing so instead of a string, `mc_level_lbl` might be an integer index (apply 1-indexing beforehand) or None (if the model chemistry part is unwanted because single-level). frag List of 1-indexed fragments active in the supersystem. bas List of 1-indexed fragments with active basis sets in the supersystem. All those in *frag* plus any ghost. opaque Toggle whether to return JSON-friendly semi-opaque internal str label (True) or eye-friendly label with @ for basis and § for model chemistry (False). Returns ------- str JSON string from inputs:: labeler("mp2", 1, (1, 2)) #> '["mp2", [1], [1, 2]]' labeler("mp2", 1, (1, 2), opaque=False) #> '§mp2_(1)@(1, 2)' """ if isinstance(frag, int): frag = (frag,) if isinstance(bas, int): bas = (bas,) if opaque: return json.dumps([str(mc_level_lbl), frag, bas]) else: mc_pre = "" if mc_level_lbl is None else f{mc_level_lbl}_" return f"{mc_pre}({', '.join(map(str, frag))})@({', '.join(map(str, bas))})"
[docs] def delabeler(item: str) -> Tuple[str, Tuple[int, ...], Tuple[int, ...]]: """Back-form from label into tuple. Returns ------- mcfragbas Tuple of opaque or pretty-print model chemistry (may be None for latter), fragments and bases (1-indexed by convention). :: delabeler('["mp2", [1], [1, 2]]') #> ('mp2', [1], [1, 2]) delabeler("§mp2_(1)@(1, 2)") #> ('mp2', [1], [1, 2]) """ if "@" not in item: mc, frag, bas = json.loads(item) return str(mc), frag, bas else: mobj = re.match(r"(?:§(?P<mc>\S*)_)?(?P<frag>.*)@(?P<bas>.*)", item) mc, frag, bas = mobj.groups() # want lists and avoids (1) non-iterable error frag = frag.replace("(", "[").replace(")", "]") bas = bas.replace("(", "[").replace(")", "]") return mc, ast.literal_eval(frag), ast.literal_eval(bas)
def print_nbody_energy( energy_body_dict: Mapping[int, float], header: str, nfragments: int, modelchem_labels, embedding: bool, supersystem_ie_only: bool, supersystem_beyond: Optional[int], ) -> str: """Format summary string for energies of a single bsse_type. Logs and returns output. Parameters ---------- energy_body_dict Input data. header Specialization for table title. nfragments Number of lines in table is number of fragments. modelchem_labels Dictionary mapping active nbody-levels to a tuple with first element the full model chemistry key and second element a short label. A suitable dictionary is `modelchem_labels(manybodycore_instance.nbodies_per_mc_level)`. embedding Whether charge embedding present suppress printing, usually False supersystem_ie_only Whether only 1-body and nfragments-body levels are available, usually False. supersystem_beyond If not None, the number of nbody-levels computed by MBE explicitly. Beyond this gets supersystem SS label. Returns ------- str A text table in Hartrees and kcal/mol ``` ==> N-Body: Counterpoise Corrected (CP) energies <==' n-Body Total Energy Interaction Energy N-body Contribution to Interaction Energy' [Eh] [Eh] [kcal/mol] [Eh] [kcal/mol]' 1 -386.455609352609 0.000000000000 0.000000000000 0.000000000000 0.000000000000' 2 -384.203153844163 2.252455508446 1413.437170812134 2.252455508446 1413.437170812134' FULL/RTN 3 -384.128628718676 2.326980633933 1460.202393089624 0.074525125487 46.765222277490' ``` """ info = f"""\n ==> N-Body: {header} energies <==\n\n""" info += f""" {"MC n-Body":>15} Total Energy Interaction Energy N-body Contribution to Interaction Energy\n""" info += f""" [Eh] [Eh] [kcal/mol] [Eh] [kcal/mol]\n""" previous_e = energy_body_dict[1] tot_e = previous_e != 0.0 nbody_range = list(energy_body_dict) if supersystem_ie_only: nbody_range = [1, nfragments] nbody_range.sort() for nb in range(1, nfragments + 1): lbl = [] if supersystem_beyond and nb > supersystem_beyond: lbl.append("SS") if nb == nfragments: lbl.append("FULL") if nb == max(nbody_range): lbl.append("RTN") lbl = "/".join(lbl) mclbl = modelchem_labels.get(nb, ("", ""))[1] if nb in nbody_range: delta_e = energy_body_dict[nb] - previous_e delta_e_kcal = delta_e * constants.hartree2kcalmol if embedding: int_e = np.nan int_e_kcal = np.nan else: int_e = energy_body_dict[nb] - energy_body_dict[1] int_e_kcal = int_e * constants.hartree2kcalmol if supersystem_ie_only and nb == nfragments: if tot_e: info += f""" {lbl:>11} {mclbl:2} {nb:2} {energy_body_dict[nb]:20.12f} {int_e:20.12f} {int_e_kcal:20.12f} {"N/A":20} {"N/A":20}\n""" else: info += f""" {lbl:>11} {mclbl:2} {nb:2} {"N/A":14} {int_e:20.12f} {int_e_kcal:20.12f} {"N/A":20} {"N/A":20}\n""" else: if tot_e: if embedding: info += f""" {lbl:>11} {mclbl:2} {nb:2} {energy_body_dict[nb]:20.12f} {"N/A":20} {"N/A":14} {delta_e:20.12f} {delta_e_kcal:20.12f}\n""" else: info += f""" {lbl:>11} {mclbl:2} {nb:2} {energy_body_dict[nb]:20.12f} {int_e:20.12f} {int_e_kcal:20.12f} {delta_e:20.12f} {delta_e_kcal:20.12f}\n""" else: info += f""" {lbl:>11} {mclbl:2} {nb:2} {"N/A":14} {int_e:20.12f} {int_e_kcal:20.12f} {delta_e:20.12f} {delta_e_kcal:20.12f}\n""" previous_e = energy_body_dict[nb] else: info += f""" {lbl:>11} {"":2} {nb:2} {"N/A":20} {"N/A":20} {"N/A":20} {"N/A":20} {"N/A":20}\n""" mc_legend = {tup[1]: tup[0] for tup in modelchem_labels.values()} mc_legend = [f'{k}: "{v}"' for k, v in mc_legend.items()] info += f"\n MC Legend: {', '.join(mc_legend)}\n\n" return info def collect_vars( bsse: str, prop: str, body_dict: Mapping[int, Union[float, np.ndarray]], max_nbody: int, embedding: bool = False, supersystem_ie_only: bool = False, has_supersystem: bool = False, ) -> Dict: """From *body_dict*, construct data for ManyBodyResultProperties. Parameters ---------- bsse Label for a single many-body treatment, generally a value of BsseEnum. prop Label for a single property, generally a value of DriverEnum. body_dict Dictionary of minimal per-body info already specialized for property *prop* and treatment *bsse*. May contain either total data or interaction data (cummulative, not additive) from 1-body to max_nbody-body (see also *supersystem_ie_only*). Interaction data signaled by zero float or array for 1-body. May contain data from multiple model chemistry levels. max_nbody _description_ embedding Is charge embedding enabled, by default False? supersystem_ie_only Is data available in *body_dict* only for 1-body (possibly zero) and nfr-body levels? By default False: data is available for consecutive levels, up to max_nbody-body. has_supersystem Whether contributions higher than max_nbody are a summary correction. Returns ------- dict _description_. Empty return if *embedding* enabled. """ bsse = bsse.lower() prop = prop.lower() previous_e = body_dict[1] property_shape = find_shape(previous_e) tot_e = bool(np.count_nonzero(previous_e)) nbody_range = list(body_dict) nbody_range.sort() res = {} if tot_e: res[f"{bsse}_corrected_total_{prop}"] = body_dict[max_nbody] res[f"{bsse}_corrected_interaction_{prop}"] = body_dict[max_nbody] - body_dict[1] res[f"{bsse}_corrected_interaction_{prop}_through_1_body"] = shaped_zero(property_shape) if supersystem_ie_only: nfr = nbody_range[-1] for nb in [nfr]: res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1] if nb == 2: res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1] if tot_e: for nb in [1, nfr]: res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb] elif has_supersystem: nfr = nbody_range[-1] res[f"{bsse}_corrected_interaction_{prop}"] = body_dict[nfr] - body_dict[1] # reset for nb in range(2, max_nbody + 1): res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1] res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1] for nb in [nfr]: res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1] res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[max_nbody] if tot_e: res[f"{bsse}_corrected_total_{prop}"] = body_dict[nfr] # reset for nb in nbody_range: res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb] else: for nb in range(2, max(nbody_range) + 1): res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1] res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1] if tot_e: for nb in nbody_range: res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb] if embedding: res = {k: v for k, v in res.items() if "interaction" not in k} return res
[docs] def provenance_stamp(routine: str) -> Dict[str, str]: """Return dictionary satisfying QCSchema, https://github.com/MolSSI/QCSchema/blob/master/qcschema/dev/definitions.py#L23-L41 with QCManyBody's credentials for creator and version. The generating routine's name is passed in through `routine`. :: qcmb.utils.provenance_stamp(__name__) #> {'creator': 'QCManyBody', 'version': '0.2.2', 'routine': '__main__'} """ import qcmanybody return {"creator": "QCManyBody", "version": qcmanybody.__version__, "routine": routine}
[docs] def translate_qcvariables(varsmap: Mapping[str, Any]) -> Dict[str, Any]: """Translate between ManyBody results in Psi4/QCDB terminology (qcvars) and QCSchema terminology (skprops). Parameters ---------- varsmap Dictionary with keys all members of QCVariables or ManyBodyResultProperties and arbitrary values. Returns ------- dict varsmap with keys swapped to other set. Untranslatable keys are omitted. """ from qcmanybody.models.v1 import ManyBodyResultProperties # identify direction of translation qcv2skp = any([" " in lbl for lbl in varsmap]) labelmap = ManyBodyResultProperties.to_qcvariables(reverse=qcv2skp) return {labelmap[lbl]: data for lbl, data in varsmap.items() if lbl in labelmap}
[docs] def modelchem_labels( nb_per_mc: Dict[str, List[Union[int, Literal["supersystem"]]]], presorted: bool = False ) -> Dict[Union[int, Literal["supersystem"]], Tuple[str, str, str]]: """Form ordinal and letter labels for model chemistries. Parameters ---------- nb_per_mc Dictionary mapping model chemistries to lists of n-body levels computed. If a model chemistry is supersystem, the value should be ["supersystem"]. Generally, this is the `ManyBodyCore.nbodies_per_mc_level` data structure. presorted If True, the input dictionary keys and values are already sorted by increasing n-body level. This is the case when the input is `ManyBodyCore.nbodies_per_mc_level`. Returns ------- mc_per_nb Dictionary mapping n-body levels to a tuple of full model chemistry label, single-letter ordinal label, and n-body-levels-covered label. :: modelchem_labels({'ccsd': [1], 'mp2': [2, 3], 'hf': [4]}) #> {1: ('ccsd', '§A', '§1'), 2: ('mp2', '§B', '§23'), 3: ('mp2', '§B', '§23'), 4: ('hf', '§C', '§4')} modelchem_labels({'hi': [1, 2, 3], 'md': [4], 'md2': [5, 6, 7, 8, 9, 10], 'lo': ['supersystem']}) #> {1: ('hi', '§A', '§123'), 2: ('hi', '§A', '§123'), 3: ('hi', '§A', '§123'), # 4: ('md', '§B', '§4'), # 5: ('md2', '§C', '§<10'), 6: ('md2', '§C', '§<10'), 7: ('md2', '§C', '§<10'), 8: ('md2', '§C', '§<10'), 9: ('md2', '§C', '§<10'), 10: ('md2', '§C', '§<10'), # "supersystem": ('lo', '§D', '§SS')} """ sorted_nb_per_mc = { k: sorted(v) for (k, v) in sorted( nb_per_mc.items(), key=lambda mc_nbs: sorted([1000] if (mc_nbs[1] == ["supersystem"]) else mc_nbs[1])[0] ) } if presorted: assert ( sorted_nb_per_mc == nb_per_mc ), f"If presorted, input dictionary should be sorted. {nb_per_mc} != {sorted_nb_per_mc} " lvl_lbl = {} for mc, nbs in sorted_nb_per_mc.items(): if nbs == ["supersystem"]: lvl_lbl[mc] = "§SS" elif max(nbs) > 9: lvl_lbl[mc] = f"§<{max(nbs)}" else: lvl_lbl[mc] = f{''.join(map(str, sorted(nbs)))}" indexed_mc = {k: i for i, k in enumerate(sorted_nb_per_mc.keys())} mc_per_nb = { nb: (mc, f{string.ascii_uppercase[indexed_mc[mc]]}", lvl_lbl[mc]) for mc, nbs in sorted_nb_per_mc.items() for nb in nbs } return mc_per_nb