Skip to content

API Documentation

qcmanybody.ManyBodyCore

ManyBodyCore(molecule: Molecule, bsse_type: Sequence[BsseEnum], levels: Mapping[Union[int, Literal['supersystem']], str], *, return_total_data: bool, supersystem_ie_only: bool, embedding_charges: Mapping[int, Sequence[float]])
Source code in qcmanybody/core.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def __init__(
    self,
    molecule: Molecule,
    bsse_type: Sequence[BsseEnum],
    levels: Mapping[Union[int, Literal["supersystem"]], str],
    *,
    return_total_data: bool,
    supersystem_ie_only: bool,
    embedding_charges: Mapping[int, Sequence[float]],
):
    self.embedding_charges = embedding_charges
    if self.embedding_charges:
        if not bool(os.environ.get("QCMANYBODY_EMBEDDING_CHARGES", False)):  # obscure until further validation
            raise ValueError(
                f"Embedding charges for EE-MBE are still in testing. Set environment variable QCMANYBODY_EMBEDDING_CHARGES=1 to use at your own risk."
            )

    if isinstance(molecule, dict):
        mol = Molecule(**molecule)
    elif isinstance(molecule, Molecule):
        mol = molecule.copy()
    else:
        raise ValueError(f"Molecule input type of {type(molecule)} not understood.")
    self.molecule = mol
    self.bsse_type = [BsseEnum(x) for x in bsse_type]
    self.return_total_data = return_total_data
    self.supersystem_ie_only = supersystem_ie_only
    self.nfragments = len(self.molecule.fragments)

    self.levels = levels

    # Levels without supersystem
    self.levels_no_ss = {int(k): v for k, v in levels.items() if k != "supersystem"}

    # Just a set of all the modelchems
    self.mc_levels = set(self.levels.values())

    self.max_nbody = max(self.levels_no_ss.keys())

    if len(self.bsse_type) == 0:
        raise ValueError("No BSSE correction specified")

    if BsseEnum.vmfc in self.bsse_type and len(set(self.levels.values())) == 1:
        # For single-modelchem VMFC, NOCP & sometimes CP are produced for free
        if BsseEnum.nocp not in self.bsse_type:
            self.bsse_type.append(BsseEnum.nocp)
        if BsseEnum.cp not in self.bsse_type and self.max_nbody == self.nfragments:
            self.bsse_type.append(BsseEnum.cp)

    self.return_bsse_type = self.bsse_type[0]

    ###############################
    # Build nbodies_per_mc_level
    # TODO - use Lori's code
    # TODO - dict to list of lists to handle non-contiguous levels
    # TODO multilevel and supersystem_ie_only=T not allowed together
    # TODO supersystem in levels is not to be trusted -- nfrag only and skips levels
    max_level = max(self.levels_no_ss.keys())

    if set(range(1, max_level + 1)) != set(self.levels_no_ss.keys()):
        raise ValueError(f"Levels must be contiguous from 1 to {max_level}")

    self.nbodies_per_mc_level: Dict[str, list] = {mc_level: [] for mc_level in self.mc_levels}
    for k, v in self.levels_no_ss.items():
        self.nbodies_per_mc_level[v].append(k)

    # order nbodies_per_mc_level keys (modelchems) by the lowest n-body level covered; any
    #   supersystem key (replaced below) is at the end. Order nbodies within each modelchem.
    #   Reset mc_levels to match.
    self.nbodies_per_mc_level = {
        k: sorted(v)
        for (k, v) in sorted(self.nbodies_per_mc_level.items(), key=lambda item: sorted(item[1] or [1000])[0])
    }
    assert self.mc_levels == set(self.nbodies_per_mc_level.keys())  # remove after some downstream testing
    self.mc_levels = self.nbodies_per_mc_level.keys()

    for mc, nbs in self.nbodies_per_mc_level.items():
        if nbs and ((nbs[-1] - nbs[0]) != len(nbs) - 1):
            raise ValueError(
                f"QCManyBody: N-Body levels must be contiguous within a model chemistry spec ({mc}: {nbs}). Use an alternate spec name to accomplish this input."
            )
            # TODO - test and reenable if appropriate. my guess is that noncontig nb is fine on the core computing side,
            #   but trouble for computer and nbodies_per_mc_level inverting and indexing. Safer to deflect for now since input tweak allows the calc.

    # Supersystem is always at the end
    if "supersystem" in levels:
        ss_mc = levels["supersystem"]
        self.nbodies_per_mc_level[ss_mc].append("supersystem")

    # To be built on the fly
    self.mc_compute_dict = None

    if self.nfragments == 1:
        # Usually we try to "pass-through" edge cases, so a single-fragment mol would return 0 or ordinary energy,
        #   depending on rtd=T/F. But it seems more likely that user just forgot the fragments field, so we don't
        #   want to start a full energy on monsterMol. Reconsider handling in future.
        raise ValueError("""QCManyBody: Molecule fragmentation has not been specified through `fragments` field.""")

    if not np.array_equal(np.concatenate(self.molecule.fragments), np.arange(len(self.molecule.symbols))):
        raise ValueError("""QCManyBody: non-contiguous fragments could be implemented but aren't at present""")

    # Build size and slices dictionaries. Assumes fragments are contiguous
    self.fragment_size_dict = {}
    self.fragment_slice_dict = {}
    iat = 0
    for ifr in range(1, self.nfragments + 1):
        nat = len(self.molecule.fragments[ifr - 1])
        self.fragment_size_dict[ifr] = nat
        self.fragment_slice_dict[ifr] = slice(iat, iat + nat)
        iat += nat

format_calc_plan

format_calc_plan(sset: str = 'all') -> Tuple[str, Dict[str, Dict[int, int]]]

Formulate per-modelchem and per-body job count data and summary text.

Parameters:

Name Type Description Default
sset str

Among {"all", "nocp", "cp", "vmfc_compute"}, which data structure to return.

'all'

Returns:

Type Description
info

A text summary with per- model chemistry and per- n-body-level job counts.

Model chemistry "c4-ccsd" (§A):         22
     Number of 1-body computations:     16 (nocp: 0, cp: 0, vmfc_compute: 16)
     Number of 2-body computations:      6 (nocp: 0, cp: 0, vmfc_compute: 6)

Model chemistry "c4-mp2" (§B):          28
     Number of 1-body computations:     12 (nocp: 0, cp: 0, vmfc_compute: 12)
     Number of 2-body computations:     12 (nocp: 0, cp: 0, vmfc_compute: 12)
     Number of 3-body computations:      4 (nocp: 0, cp: 0, vmfc_compute: 4)

Dict[str, Dict[int, int]]

Data structure with outer key mc-label, inner key 1-indexed n-body, and value job count.

Source code in qcmanybody/core.py
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def format_calc_plan(self, sset: str = "all") -> Tuple[str, Dict[str, Dict[int, int]]]:
    """Formulate per-modelchem and per-body job count data and summary text.

    Parameters
    ----------
    sset
        Among {"all", "nocp", "cp", "vmfc_compute"}, which data structure to return.

    Returns
    -------
    info
        A text summary with per- model chemistry and per- n-body-level job counts.
        ```
        Model chemistry "c4-ccsd" (§A):         22
             Number of 1-body computations:     16 (nocp: 0, cp: 0, vmfc_compute: 16)
             Number of 2-body computations:      6 (nocp: 0, cp: 0, vmfc_compute: 6)

        Model chemistry "c4-mp2" (§B):          28
             Number of 1-body computations:     12 (nocp: 0, cp: 0, vmfc_compute: 12)
             Number of 2-body computations:     12 (nocp: 0, cp: 0, vmfc_compute: 12)
             Number of 3-body computations:      4 (nocp: 0, cp: 0, vmfc_compute: 4)
        ```
    Dict[str, Dict[int, int]]
        Data structure with outer key mc-label, inner key 1-indexed n-body, and value job count.
    """
    # Rearrange compute_list from key nb having values (species) to compute all of that nb
    #   to key nb having values counting that nb.
    compute_list_count = {}
    for mc, compute_dict in self.compute_map.items():
        compute_list_count[mc] = {}
        for sub in compute_dict:  # all, nocp, cp, vmfc
            all_calcs = set().union(*compute_dict[sub].values())
            compute_list_count[mc][sub] = Counter([len(frag) for (frag, _) in all_calcs])

    mc_labels = modelchem_labels(self.nbodies_per_mc_level, presorted=True)
    full_to_ordinal_mc_lbl = {v[0]: v[1] for v in mc_labels.values()}
    info = []
    for mc, counter in compute_list_count.items():
        all_counter = counter["all"]
        mcheader = f'    Model chemistry "{mc}" ({full_to_ordinal_mc_lbl[mc]}):'
        info.append(f"{mcheader:38} {sum(all_counter.values()):6}")
        for nb, count in sorted(all_counter.items()):
            other_counts = [f"{sub}: {counter[sub][nb]}" for sub in ["nocp", "cp", "vmfc_compute"]]
            info.append(f"        Number of {nb}-body computations: {count:6} ({', '.join(other_counts)})")
        info.append("")
    info = "\n".join(info)

    logger.info(info)
    return info, {mc: dsset[sset] for mc, dsset in compute_list_count.items()}

iterate_molecules

iterate_molecules() -> Iterable[Tuple[str, str, Molecule]]

Iterate over all the molecules needed for the computation.

Yields model chemistry, label, and molecule.

Source code in qcmanybody/core.py
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
def iterate_molecules(self) -> Iterable[Tuple[str, str, Molecule]]:
    """Iterate over all the molecules needed for the computation.

    Yields model chemistry, label, and molecule.
    """

    done_molecules = set()

    for mc, compute_dict in self.compute_map.items():
        # TODO - this is a bit of a hack. Lots of duplication when reaching higher nbody
        for compute_list in compute_dict["all"].values():
            for real_atoms, basis_atoms in compute_list:
                label = labeler(mc, real_atoms, basis_atoms)
                if label in done_molecules:
                    continue

                ghost_atoms = list(set(basis_atoms) - set(real_atoms))

                # Shift to zero-indexing
                real_atoms_0 = [x - 1 for x in real_atoms]
                ghost_atoms_0 = [x - 1 for x in ghost_atoms]
                mol = self.molecule.get_fragment(real_atoms_0, ghost_atoms_0, orient=False, group_fragments=False)
                mol = mol.copy(update={"fix_com": True, "fix_orientation": True})

                if self.embedding_charges:
                    embedding_frags = list(set(range(1, self.nfragments + 1)) - set(basis_atoms))
                    charges = []
                    for ifr in embedding_frags:
                        positions = self.molecule.get_fragment(ifr - 1).geometry.tolist()
                        charges.extend([[chg, i] for i, chg in zip(positions, self.embedding_charges[ifr])])
                    mol.extras["embedding_charges"] = charges

                done_molecules.add(label)
                yield mc, label, mol

analyze

analyze(component_results: Dict[str, Dict[str, Union[float, ndarray]]])

Parameters:

Name Type Description Default
component_results Dict[str, Dict[str, Union[float, ndarray]]]

Nested dictionary with results from all individual molecular system calculations, including all subsystem/basis combinations, all model chemistries, and all properties (e.g., e/g/h).

For example, the below is the format for a nocp gradient run on a helium dimer with 1-body at CCSD and 2-body at MP2. The outer string key can be generated with the qcmanybody.utils.labeler function. The inner string key is any property; QCManyBody presently knows how to process energy/gradient/Hessian.

{'["ccsd", [1], [1]]': {'energy': -2.87, 'gradient': array([[0., 0., 0.]])},
 '["ccsd", [2], [2]]': {'energy': -2.87, 'gradient': array([[0., 0., 0.]])},
 '["mp2", [1], [1]]': {'energy': -2.86, 'gradient': array([[0., 0., 0.]])},
 '["mp2", [2], [2]]': {'energy': -2.86, 'gradient': array([[0., 0., 0.]])},
 '["mp2", [1, 2], [1, 2]]': {'energy': -5.73, 'gradient': array([[ 0., 0., 0.0053], [ 0., 0., -0.0053]])},
}

required
Source code in qcmanybody/core.py
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
def analyze(
    self,
    component_results: Dict[str, Dict[str, Union[float, np.ndarray]]],
):
    """

    Parameters
    ----------
    component_results
        Nested dictionary with results from all individual molecular system
        calculations, including all subsystem/basis combinations, all model
        chemistries, and all properties (e.g., e/g/h).

        For example, the below is the format for a nocp gradient run on a
        helium dimer with 1-body at CCSD and 2-body at MP2. The outer string
        key can be generated with the ``qcmanybody.utils.labeler`` function.
        The inner string key is any property; QCManyBody presently knows how
        to process energy/gradient/Hessian.
        ```
        {'["ccsd", [1], [1]]': {'energy': -2.87, 'gradient': array([[0., 0., 0.]])},
         '["ccsd", [2], [2]]': {'energy': -2.87, 'gradient': array([[0., 0., 0.]])},
         '["mp2", [1], [1]]': {'energy': -2.86, 'gradient': array([[0., 0., 0.]])},
         '["mp2", [2], [2]]': {'energy': -2.86, 'gradient': array([[0., 0., 0.]])},
         '["mp2", [1, 2], [1, 2]]': {'energy': -5.73, 'gradient': array([[ 0., 0., 0.0053], [ 0., 0., -0.0053]])},
        }
        ```
    """

    # All properties that were passed to us
    # * seed with "energy" so free/no-op jobs can process
    available_properties: Set[str] = {"energy"}
    for property_data in component_results.values():
        available_properties.update(property_data.keys())

    # reorganize to component_results_inv[property][label] = 1.23
    component_results_inv = {k: {} for k in available_properties}

    for cluster_label, property_data in component_results.items():
        for property_label, property_value in property_data.items():
            component_results_inv[property_label][cluster_label] = property_value

    # Remove any missing data
    component_results_inv = {k: v for k, v in component_results_inv.items() if v}
    if not component_results_inv:
        # Note B: Rarely, "no results" is expected, like for CP-only,
        #   rtd=False, and max_nbody=1. We'll add a dummy entry so
        #   processing can continue.
        component_results_inv["energy"] = {'["dummy", [1000], [1000]]': 0.0}

    # Actually analyze
    is_embedded = bool(self.embedding_charges)
    component_properties = defaultdict(dict)
    all_results = {}
    nbody_dict = {}
    stdout = ""
    #        all_results["energy_body_dict"] = {"cp": {1: 0.0}}

    for property_label, property_results in component_results_inv.items():
        # Expand gradient and hessian
        if property_label == "gradient":
            property_results = {k: self.resize_gradient(v, delabeler(k)[2]) for k, v in property_results.items()}
        if property_label == "hessian":
            property_results = {k: self.resize_hessian(v, delabeler(k)[2]) for k, v in property_results.items()}

        r = self._analyze(property_label, property_results)
        for k, v in property_results.items():
            component_properties[k]["calcinfo_natom"] = len(self.molecule.symbols)
            component_properties[k][f"return_{property_label}"] = v
        all_results.update(r)

    for bt in self.bsse_type:
        stdout += print_nbody_energy(
            all_results["energy_body_dict"][bt],
            f"{bt.formal()} ({bt.abbr()})",
            self.nfragments,
            modelchem_labels(self.nbodies_per_mc_level, presorted=True),
            is_embedded,
            self.supersystem_ie_only,
            self.max_nbody if self.has_supersystem else None,
        )

    for property_label in available_properties:
        for bt in self.bsse_type:
            nbody_dict.update(
                collect_vars(
                    bt,
                    property_label,
                    all_results[f"{property_label}_body_dict"][bt],
                    self.max_nbody,
                    is_embedded,
                    self.supersystem_ie_only,
                    self.has_supersystem,
                )
            )

    all_results["results"] = nbody_dict
    all_results["component_properties"] = component_properties

    # Make dictionary with "1cp", "2cp", etc
    ebd = all_results["energy_body_dict"]
    all_results["energy_body_dict"] = {str(k) + bt: v for bt in ebd for k, v in ebd[bt].items()}
    all_results["stdout"] = stdout

    return all_results

qcmanybody.ManyBodyComputer

Bases: BaseComputerQCNG

from_manybodyinput classmethod

from_manybodyinput(input_model: ManyBodyInput, build_tasks: bool = True)
Source code in qcmanybody/computer.py
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
@classmethod
def from_manybodyinput(cls, input_model: ManyBodyInput, build_tasks: bool = True):

    computer_model = cls(
        molecule=input_model.molecule,
        driver=input_model.specification.driver,
        # v2: **input_model.specification.keywords.model_dump(exclude_unset=True),
        **input_model.specification.keywords.dict(exclude_unset=True),
        input_data=input_model,  # storage, to reconstitute ManyBodyResult
    )
    nb_per_mc = computer_model.nbodies_per_mc_level

    # print("\n<<<  (ZZ 1) QCEngine harness ManyBodyComputerQCNG.from_qcschema_ben  >>>")
    # v2: pprint.pprint(computer_model.model_dump(), width=200)
    # pprint.pprint(computer_model.dict(), width=200)
    # print(f"nbodies_per_mc_level={nb_per_mc}")

    comp_levels = {}
    for mc_level_idx, mtd in enumerate(computer_model.levels.values()):
        for lvl1 in nb_per_mc[mc_level_idx]:
            key = "supersystem" if lvl1 == "supersystem" else int(lvl1)
            comp_levels[key] = mtd

    specifications = {}
    for mtd, spec in computer_model.input_data.specification.specification.items():
        spec = spec.dict()
        specifications[mtd] = {}
        specifications[mtd]["program"] = spec.pop("program")
        specifications[mtd]["specification"] = spec
        specifications[mtd]["specification"][
            "driver"
        ] = computer_model.driver  # overrides atomic driver with mb driver
        specifications[mtd]["specification"].pop("schema_name", None)

    computer_model.qcmb_core = ManyBodyCore(
        computer_model.molecule,
        computer_model.bsse_type,
        comp_levels,
        return_total_data=computer_model.return_total_data,
        supersystem_ie_only=computer_model.supersystem_ie_only,
        embedding_charges=computer_model.embedding_charges,
    )

    # check that core and computer storage are consistent in mc ordering and grouping and nbody levels
    assert (
        list(computer_model.qcmb_core.nbodies_per_mc_level.values()) == computer_model.nbodies_per_mc_level
    ), f"CORE {computer_model.qcmb_core.nbodies_per_mc_level.values()} != COMPUTER {computer_model.nbodies_per_mc_level}"
    assert list(computer_model.qcmb_core.nbodies_per_mc_level.keys()) == list(
        computer_model.levels.values()
    ), f"CORE {computer_model.qcmb_core.nbodies_per_mc_level.keys()} != COMPUTER {computer_model.levels.values()}"

    if not build_tasks:
        return computer_model

    try:
        import qcengine as qcng
    except ModuleNotFoundError:
        raise ModuleNotFoundError(
            "Python module qcengine not found. Solve by installing it: "
            "`conda install qcengine -c conda-forge` or `pip install qcengine`"
        )

    component_properties = {}
    component_results = {}

    for chem, label, imol in computer_model.qcmb_core.iterate_molecules():
        inp = AtomicInput(molecule=imol, **specifications[chem]["specification"])
        # inp = AtomicInput(molecule=imol, **specifications[chem]["specification"], extras={"psiapi": True})  # faster for p4

        if imol.extras.get("embedding_charges"):  # or test on self.embedding_charges ?
            if specifications[chem]["program"] == "psi4":
                charges = imol.extras["embedding_charges"]
                fkw = inp.keywords.get("function_kwargs", {})
                fkw.update({"external_potentials": charges})
                inp.keywords["function_kwargs"] = fkw
            else:
                raise RuntimeError(
                    f"Don't know how to handle external charges in {specifications[chem]['program']}"
                )

        _, real, bas = delabeler(label)
        result = qcng.compute(inp, specifications[chem]["program"])
        component_results[label] = result

        if not result.success:
            # print(result.error.error_message)
            raise RuntimeError("Calculation did not succeed! Error:\n" + result.error.error_message)

        # pull out stuff
        props = {"energy", "gradient", "hessian"}

        component_properties[label] = {}

        for p in props:
            if hasattr(result.properties, f"return_{p}"):
                v = getattr(result.properties, f"return_{p}")
                # print(f"  {label} {p}: {v}")
                if v is not None:
                    component_properties[label][p] = v

    # print("\n<<<  (ZZ 2) QCEngine harness ManyBodyComputerQCNG.from_qcschema_ben component_properties  >>>")
    # with np.printoptions(precision=6, suppress=True):
    #     pprint.pprint(component_properties, width=200)

    analyze_back = computer_model.qcmb_core.analyze(component_properties)
    analyze_back["nbody_number"] = len(component_properties)
    # print("\n<<<  (ZZ 3) QCEngine harness ManyBodyComputerQCNG.from_qcschema_ben analyze_back  >>>")
    # pprint.pprint(analyze_back, width=200)

    return computer_model.get_results(external_results=analyze_back, component_results=component_results)

ManyBodyComputer

key type required description default
input_data True Input schema containing the relevant settings for performing the many body expansion. This is entirely redundant with the piecemeal assembly of this Computer class and is only stored to be available for error handling and exact reconstruction of ManyBodyResult. None
bsse_type False Requested BSSE treatments. First in list determines which interaction or total energy/gradient/Hessian returned. []
molecule True Target molecule for many body expansion (MBE) or interaction energy (IE) analysis. Fragmentation should already be defined in fragments and related fields. None
driver True The computation driver; i.e., energy, gradient, hessian. In case of ambiguity (e.g., MBE gradient through finite difference energies or MBE energy through composite method), this field refers to the target derivative, not any means specification. None
embedding_charges typing.List[float] False Atom-centered point charges to be used to speed up nbody-level convergence. Charges are placed on molecule fragments whose basis sets are not included in the computation. (An implication is that charges aren't invoked for bsse_type=cp.) Keys: 1-based index of fragment. Values: list of atom charges for that fragment. None
return_total_data False When True, returns the total data (energy/gradient/Hessian) of the system, otherwise returns interaction data. Default is False for energies, True for gradients and Hessians. Note that the calculation of counterpoise corrected total energies implies the calculation of the energies of monomers in the monomer basis, hence specifying return_total_data = True may carry out more computations than return_total_data = False. For gradients and Hessians, return_total_data = False is rarely useful. None
levels False Dictionary of different levels of theory for different levels of expansion. Note that the primary method_string is not used when this keyword is given. supersystem computes all higher order n-body effects up to the number of fragments; this higher-order correction uses the nocp basis, regardless of bsse_type. A method fills in for any lower unlisted nbody levels. Note that if both this and max_nbody are provided, they must be consistent. Examples: SUPERSYSTEM definition suspect* {1: 'ccsd(t)', 2: 'mp2', 'supersystem': 'scf'} * {2: 'ccsd(t)/cc-pvdz', 3: 'mp2'} * Now invalid: {1: 2, 2: 'ccsd(t)/cc-pvdz', 3: 'mp2'} Examples above are processed in the ManyBodyComputer, and once processed, only the values should be used. The keys turn into nbodies_per_mc_level, as notated below. * {1: 'ccsd(t)', 2: 'mp2', 'supersystem': 'scf'} -> nbodies_per_mc_level=[[1], [2], ['supersystem']] * {2: 'ccsd(t)/cc-pvdz', 3: 'mp2'} -> nbodies_per_mc_level=[[1, 2], [3]] None
max_nbody False Maximum number of bodies to include in the many-body treatment. Possible: max_nbody <= nfragments. Default: max_nbody = nfragments. None
supersystem_ie_only False Target the supersystem total/interaction energy (IE) data over the many-body expansion (MBE) analysis, thereby omitting intermediate-body calculations. When False (default), compute each n-body level in the MBE up through max_nbody. When True (only allowed for max_nbody = nfragments ), only compute enough for the overall interaction/total energy: max_nbody-body and 1-body. When True, properties INTERACTION {driver} THROUGH {max_nbody}-BODY will always be available; TOTAL {driver} THROUGH {max_nbody}-BODY will be available depending on return_total_data ; and {max_nbody}-BODY CONTRIBUTION TO {driver} won't be available (except for dimers). This keyword produces no savings for a two-fragment molecule. But for the interaction energy of a three-fragment molecule, for example, 2-body subsystems can be skipped with supersystem_ie_only=True. Do not use with vmfc in bsse_type as it cannot produce savings. False
task_list typing.Any False {}
qcmb_core typing.Any False Low-level interface None

qcmanybody.utils

all_same_shape

all_same_shape(it: Iterable[Union[float, ndarray]]) -> bool

Check if all elements of an iterable have the same shape.

Source code in qcmanybody/utils.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def all_same_shape(it: Iterable[Union[float, np.ndarray]]) -> bool:
    """Check if all elements of an iterable have the same shape."""

    it = iter(it)
    try:
        first = next(it)
    except StopIteration:
        return True
    if isinstance(first, float):
        return all(isinstance(x, float) for x in it)
    elif isinstance(first, np.ndarray):
        return all(x.shape == first.shape for x in it)
    else:
        raise TypeError(f"Expected float or np.ndarray, got {type(first)}")

resize_gradient

resize_gradient(grad: ndarray, bas: Tuple[int, ...], fragment_size_dict: Dict[int, int], fragment_slice_dict: Dict[int, slice], *, reverse: bool = False) -> ndarray

Pads or extracts a gradient array between subsystem and full supersystem sizes.

Parameters:

Name Type Description Default
grad ndarray

Gradient matrix of natural size for bas, (3 * <nat in bas>, 3). If reverse=True, gradient matrix of supersystem size, (3 * <nat of all fragments>, 3).

required
bas Tuple[int, ...]

1-indexed fragments active in grad. If reverse=True, 1-indexed fragments to be extracted from grad.

required
fragment_size_dict Dict[int, int]

Dictionary containing the number of atoms of each 1-indexed fragment. For He--HOOH--Me cluster, {1: 1, 2: 4, 3: 5}.

required
fragment_slice_dict Dict[int, slice]

Dictionary containing slices that index the gradient matrix for each of the 1-indexed fragments. For He--HOOH--Me cluster, {1: slice(0, 1), 2: slice(1, 5), 3: slice(5, 10)}.

required
reverse bool

If True, contract grad from supersystem size and shape to that which is natural for bas.

False

Returns:

Type Description
ndarray

Gradient array padded with zeros to supersystem size, (3 * <nat of supersystem>, 3). If reverse=True, gradient array extracted to natural size, (3 * <nat in bas>, 3).

Source code in qcmanybody/utils.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def resize_gradient(
    grad: np.ndarray,
    bas: Tuple[int, ...],
    fragment_size_dict: Dict[int, int],
    fragment_slice_dict: Dict[int, slice],
    *,
    reverse: bool = False,
) -> np.ndarray:
    r"""Pads or extracts a gradient array between subsystem and full supersystem sizes.

    Parameters
    ----------
    grad
        Gradient matrix of natural size for *bas*, (3 * _<nat in bas\>_, 3).
        If `reverse=True`, gradient matrix of supersystem size, (3 * _<nat of all fragments\>_, 3).
    bas
        1-indexed fragments active in *grad*.
        If `reverse=True`, 1-indexed fragments to be extracted from *grad*.
    fragment_size_dict
        Dictionary containing the number of atoms of each 1-indexed fragment.
        For He--HOOH--Me cluster, `{1: 1, 2: 4, 3: 5}`.
    fragment_slice_dict
        Dictionary containing slices that index the gradient matrix for each of the 1-indexed fragments.
        For He--HOOH--Me cluster, `{1: slice(0, 1), 2: slice(1, 5), 3: slice(5, 10)}`.
    reverse
        If True, contract *grad* from supersystem size and shape to that which is natural for *bas*.

    Returns
    -------
    np.ndarray
        Gradient array padded with zeros to supersystem size, (3 * _<nat of supersystem\>_, 3).
        If reverse=True, gradient array extracted to natural size, (3 * _<nat in bas\>_, 3).

    """
    if reverse:
        nat = sum(fragment_size_dict[ifr] for ifr in bas)
    else:
        nat = sum(fragment_size_dict.values())
    ret = np.zeros((nat, 3))

    start = 0
    for ifr in bas:
        end = start + fragment_size_dict[ifr]
        if reverse:
            ret[start:end] = grad[fragment_slice_dict[ifr]]
        else:
            ret[fragment_slice_dict[ifr]] = grad[start:end]
        start += fragment_size_dict[ifr]

    return ret

resize_hessian

resize_hessian(hess: ndarray, bas: Tuple[int, ...], fragment_size_dict: Dict[int, int], fragment_slice_dict: Dict[int, slice], *, reverse: bool = False) -> ndarray

Pads or extracts a Hessian array between subsystem and full supersystem sizes.

Parameters:

Name Type Description Default
hess ndarray

Hessian matrix of natural size for bas, (3 * <nat in bas>, 3 * <nat in bas>). If reverse=True, Hessian matrix of supersystem size, (3 * <nat of all fragments>, 3 * <nat of all fragments>).

required
bas Tuple[int, ...]

1-indexed fragments active in hess. If reverse=True, 1-indexed fragments to be extracted from hess.

required
fragment_size_dict Dict[int, int]

Dictionary containing the number of atoms of each 1-indexed fragment. For He--HOOH--Me cluster, {1: 1, 2: 4, 3: 5}.

required
fragment_slice_dict Dict[int, slice]

Dictionary containing slices that index the gradient matrix for each of the 1-indexed fragments. For He--HOOH--Me cluster, {1: slice(0, 1), 2: slice(1, 5), 3: slice(5, 10)}.

required
reverse bool

If True, contract hess from supersystem size and shape to that which is natural for bas.

False

Returns:

Type Description
ndarray

Hessian array padded with zeros to supersystem size, (3 * <nat of supersystem>, 3 * <nat of supersystem>). If reverse=True, Hessian array extracted to natural size, (3 * <nat in bas>, 3 * <nat in bas>).

Source code in qcmanybody/utils.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def resize_hessian(
    hess: np.ndarray,
    bas: Tuple[int, ...],
    fragment_size_dict: Dict[int, int],
    fragment_slice_dict: Dict[int, slice],
    *,
    reverse: bool = False,
) -> np.ndarray:
    r"""Pads or extracts a Hessian array between subsystem and full supersystem sizes.

    Parameters
    ----------
    hess
        Hessian matrix of natural size for *bas*, (3 * _<nat in bas\>_, 3 * _<nat in bas\>_).
        If `reverse=True`, Hessian matrix of supersystem size, (3 * _<nat of all fragments\>_,
        3 * _<nat of all fragments\>_).
    bas
        1-indexed fragments active in *hess*.
        If `reverse=True`, 1-indexed fragments to be extracted from *hess*.
    fragment_size_dict
        Dictionary containing the number of atoms of each 1-indexed fragment.
        For He--HOOH--Me cluster, `{1: 1, 2: 4, 3: 5}`.
    fragment_slice_dict
        Dictionary containing slices that index the gradient matrix for each of the 1-indexed fragments.
        For He--HOOH--Me cluster, `{1: slice(0, 1), 2: slice(1, 5), 3: slice(5, 10)}`.
    reverse
        If True, contract *hess* from supersystem size and shape to that which is natural for *bas*.

    Returns
    -------
    ndarray
        Hessian array padded with zeros to supersystem size, (3 * _<nat of supersystem\>_,
        3 * _<nat of supersystem\>_). If reverse=True, Hessian array extracted to natural size,
        (3 * _<nat in bas\>_, 3 * _<nat in bas\>_).

    """
    if reverse:
        nat = sum(fragment_size_dict[ifr] for ifr in bas)
    else:
        nat = sum(fragment_size_dict.values())
    ret = np.zeros((nat * 3, nat * 3))

    # Build up start and end slices
    abs_start, rel_start = 0, 0
    abs_slices, rel_slices = [], []
    for ifr in bas:
        rel_end = rel_start + 3 * fragment_size_dict[ifr]
        rel_slices.append(slice(rel_start, rel_end))
        rel_start += 3 * fragment_size_dict[ifr]

        tmp_slice = fragment_slice_dict[ifr]
        abs_slices.append(slice(tmp_slice.start * 3, tmp_slice.stop * 3))

    for abs_sl1, rel_sl1 in zip(abs_slices, rel_slices):
        for abs_sl2, rel_sl2 in zip(abs_slices, rel_slices):
            if reverse:
                ret[rel_sl1, rel_sl2] = hess[abs_sl1, abs_sl2]
            else:
                ret[abs_sl1, abs_sl2] = hess[rel_sl1, rel_sl2]

    return ret

sum_cluster_data

sum_cluster_data(data: Dict[str, Union[float, ndarray]], compute_list: Set[FragBasIndex], mc_level_lbl: str, vmfc: bool = False, nb: int = 0) -> Union[float, ndarray]

Sum (direct or alternate weight by n-body) like data from

Parameters:

Name Type Description Default
data Dict[str, Union[float, ndarray]]

Dictionary containing computed property (e/g/H/etc.) for each subsystem/component computation.

required
compute_list Set[FragBasIndex]

A list of (frag, bas) tuples notating all the required computations for the desired sum.

required
mc_level_lbl str

User label for what modelchem level results should be pulled out of data.

required
vmfc bool

Is this a vmfc calculation, by default False?

False
nb int

1-indexed n-body level only used when vmfc=True, by default 0.

0

Returns:

Type Description
Union[float, ndarray]

Scalar or array containing summed energy, gradient, Hessian, or other result. Usually (nocp or cp; vmfc=False), compute_list defines all fragments of a given number of active fragments and active basis fragments, so the return is the 3b@3b sum, for example. Other times (vmfc=True), compute list defines all fragments of a given number of active basis fragments. Then alternating weighting is applied so if nb=3, the return is the quantity (3b@3b sum - 2b@3b sum + 1b@3b sum), for example.

Raises:

Type Description
ValueError

If the shapes of all the data values aren't the same. No summing energies with gradients.

Source code in qcmanybody/utils.py
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def sum_cluster_data(
    data: Dict[str, Union[float, np.ndarray]],
    compute_list: Set[FragBasIndex],
    mc_level_lbl: str,
    vmfc: bool = False,
    nb: int = 0,
) -> Union[float, np.ndarray]:
    """Sum (direct or alternate weight by n-body) like data from

    Parameters
    ----------
    data
        Dictionary containing computed property (e/g/H/etc.) for each subsystem/component computation.
    compute_list
        A list of (frag, bas) tuples notating all the required computations for the desired sum.
    mc_level_lbl
        User label for what modelchem level results should be pulled out of *data*.
    vmfc
        Is this a vmfc calculation, by default False?
    nb
        1-indexed n-body level only used when `vmfc=True`, by default 0.

    Returns
    -------
    Union[float, np.ndarray]
        Scalar or array containing summed energy, gradient, Hessian, or other result.
        Usually (nocp or cp; `vmfc=False`), compute_list defines all fragments of a given number of
        active fragments and active basis fragments, so the return is the 3b@3b sum, for example.
        Other times (`vmfc=True`), compute list defines all fragments of a given number of active basis
        fragments. Then alternating weighting is applied so if `nb=3`, the return is the quantity
        (3b@3b sum - 2b@3b sum + 1b@3b sum), for example.

    Raises
    ------
    ValueError
        If the shapes of all the `data` values aren't the same. No summing energies with gradients.
    """
    sign = 1

    if not all_same_shape(data.values()):
        raise ValueError("All values in data dictionary must have the same shape.")

    first_key = next(iter(data))
    shape = find_shape(data[first_key])
    ret = shaped_zero(shape)

    precise_sum_func = math.fsum if isinstance(ret, float) else np.sum
    ret = precise_sum_func(
        (((-1) ** (nb - len(frag))) if vmfc else 1) * (data[labeler(mc_level_lbl, frag, bas)])
        for frag, bas in compute_list
    )

    # A more readable format for the above but not ammenable to using specialty summation functions
    # ```
    # for frag, bas in compute_list:
    #     egh = data[labeler(mc_level_lbl, frag, bas)]
    #
    #     if vmfc:
    #         sign = (-1) ** (nb - len(frag))
    #
    #     ret += sign * egh
    # ```

    return ret

labeler

labeler(mc_level_lbl: Optional[Union[str, int]], frag: Tuple[int, ...], bas: Tuple[int, ...], *, opaque: bool = True) -> str

Form label from model chemistry id and fragment and basis indices.

Parameters:

Name Type Description Default
mc_level_lbl Optional[Union[str, int]]

Key identifying the model chemistry. May be "(auto)". Often the ManyBodyInput.specification.specification keys. When opaque=False, result is for pretty printing so instead of a string, mc_level_lbl might be an integer index (apply 1-indexing beforehand) or None (if the model chemistry part is unwanted because single-level).

required
frag Tuple[int, ...]

List of 1-indexed fragments active in the supersystem.

required
bas Tuple[int, ...]

List of 1-indexed fragments with active basis sets in the supersystem. All those in frag plus any ghost.

required
opaque bool

Toggle whether to return JSON-friendly semi-opaque internal str label (True) or eye-friendly label with @ for basis and § for model chemistry (False).

True

Returns:

Type Description
str

JSON string from inputs:

labeler("mp2", 1, (1, 2))
#> '["mp2", [1], [1, 2]]'
labeler("mp2", 1, (1, 2), opaque=False)
#> '§mp2_(1)@(1, 2)'
Source code in qcmanybody/utils.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
def labeler(
    mc_level_lbl: Optional[Union[str, int]], frag: Tuple[int, ...], bas: Tuple[int, ...], *, opaque: bool = True
) -> str:
    """Form label from model chemistry id and fragment and basis indices.

    Parameters
    ----------
    mc_level_lbl
        Key identifying the model chemistry. May be `"(auto)"`. Often the
        ManyBodyInput.specification.specification keys.
        When `opaque=False`, result is for pretty printing so instead of a string,
        `mc_level_lbl` might be an integer index (apply 1-indexing beforehand)
        or None (if the model chemistry part is unwanted because single-level).
    frag
        List of 1-indexed fragments active in the supersystem.
    bas
        List of 1-indexed fragments with active basis sets in the supersystem.
        All those in *frag* plus any ghost.
    opaque
        Toggle whether to return JSON-friendly semi-opaque internal str label (True) or
        eye-friendly label with @ for basis and § for model chemistry (False).

    Returns
    -------
    str
        JSON string from inputs:

        ```python
        labeler("mp2", 1, (1, 2))
        #> '["mp2", [1], [1, 2]]'
        labeler("mp2", 1, (1, 2), opaque=False)
        #> '§mp2_(1)@(1, 2)'
        ```
    """
    if isinstance(frag, int):
        frag = (frag,)
    if isinstance(bas, int):
        bas = (bas,)

    if opaque:
        return json.dumps([str(mc_level_lbl), frag, bas])
    else:
        mc_pre = "" if mc_level_lbl is None else f{mc_level_lbl}_"
        return f"{mc_pre}({', '.join(map(str, frag))})@({', '.join(map(str, bas))})"

delabeler

delabeler(item: str) -> Tuple[str, Tuple[int, ...], Tuple[int, ...]]

Back-form from label into tuple.

Returns:

Type Description
mcfragbas

Tuple of opaque or pretty-print model chemistry (may be None for latter), fragments and bases (1-indexed by convention).

delabeler('["mp2", [1], [1, 2]]')
#> ('mp2', [1], [1, 2])
delabeler("§mp2_(1)@(1, 2)")
#> ('mp2', [1], [1, 2])
Source code in qcmanybody/utils.py
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
def delabeler(item: str) -> Tuple[str, Tuple[int, ...], Tuple[int, ...]]:
    """Back-form from label into tuple.

    Returns
    -------
    mcfragbas
        Tuple of opaque or pretty-print model chemistry (may be None for latter),
        fragments and bases (1-indexed by convention).

        ```python
        delabeler('["mp2", [1], [1, 2]]')
        #> ('mp2', [1], [1, 2])
        delabeler("§mp2_(1)@(1, 2)")
        #> ('mp2', [1], [1, 2])
        ```
    """

    if "@" not in item:
        mc, frag, bas = json.loads(item)
        return str(mc), frag, bas
    else:
        mobj = re.match(r"(?:§(?P<mc>\S*)_)?(?P<frag>.*)@(?P<bas>.*)", item)
        mc, frag, bas = mobj.groups()
        # want lists and avoids (1) non-iterable error
        frag = frag.replace("(", "[").replace(")", "]")
        bas = bas.replace("(", "[").replace(")", "]")
        return mc, ast.literal_eval(frag), ast.literal_eval(bas)

print_nbody_energy

print_nbody_energy(energy_body_dict: Mapping[int, float], header: str, nfragments: int, modelchem_labels, embedding: bool, supersystem_ie_only: bool, supersystem_beyond: Optional[int]) -> str

Format summary string for energies of a single bsse_type. Logs and returns output.

Parameters:

Name Type Description Default
energy_body_dict Mapping[int, float]

Input data.

required
header str

Specialization for table title.

required
nfragments int

Number of lines in table is number of fragments.

required
modelchem_labels

Dictionary mapping active nbody-levels to a tuple with first element the full model chemistry key and second element a short label. A suitable dictionary is modelchem_labels(manybodycore_instance.nbodies_per_mc_level).

required
embedding bool

Whether charge embedding present suppress printing, usually False

required
supersystem_ie_only bool

Whether only 1-body and nfragments-body levels are available, usually False.

required
supersystem_beyond Optional[int]

If not None, the number of nbody-levels computed by MBE explicitly. Beyond this gets supersystem SS label.

required

Returns:

Type Description
str

A text table in Hartrees and kcal/mol

==> N-Body: Counterpoise Corrected (CP) energies <=='

        n-Body     Total Energy            Interaction Energy                          N-body Contribution to Interaction Energy'
                   [Eh]                    [Eh]                  [kcal/mol]            [Eh]                  [kcal/mol]'
             1     -386.455609352609        0.000000000000        0.000000000000        0.000000000000        0.000000000000'
             2     -384.203153844163        2.252455508446     1413.437170812134        2.252455508446     1413.437170812134'
  FULL/RTN   3     -384.128628718676        2.326980633933     1460.202393089624        0.074525125487       46.765222277490'
Source code in qcmanybody/utils.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
def print_nbody_energy(
    energy_body_dict: Mapping[int, float],
    header: str,
    nfragments: int,
    modelchem_labels,
    embedding: bool,
    supersystem_ie_only: bool,
    supersystem_beyond: Optional[int],
) -> str:
    """Format summary string for energies of a single bsse_type. Logs and returns output.

    Parameters
    ----------
    energy_body_dict
        Input data.
    header
        Specialization for table title.
    nfragments
        Number of lines in table is number of fragments.
    modelchem_labels
        Dictionary mapping active nbody-levels to a tuple with first element the
        full model chemistry key and second element a short label. A suitable
        dictionary is `modelchem_labels(manybodycore_instance.nbodies_per_mc_level)`.
    embedding
        Whether charge embedding present suppress printing, usually False
    supersystem_ie_only
        Whether only 1-body and nfragments-body levels are available, usually False.
    supersystem_beyond
        If not None, the number of nbody-levels computed by MBE explicitly. Beyond this gets supersystem SS label.

    Returns
    -------
    str
        A text table in Hartrees and kcal/mol

        ```
        ==> N-Body: Counterpoise Corrected (CP) energies <=='

                n-Body     Total Energy            Interaction Energy                          N-body Contribution to Interaction Energy'
                           [Eh]                    [Eh]                  [kcal/mol]            [Eh]                  [kcal/mol]'
                     1     -386.455609352609        0.000000000000        0.000000000000        0.000000000000        0.000000000000'
                     2     -384.203153844163        2.252455508446     1413.437170812134        2.252455508446     1413.437170812134'
          FULL/RTN   3     -384.128628718676        2.326980633933     1460.202393089624        0.074525125487       46.765222277490'
        ```
    """
    info = f"""\n   ==> N-Body: {header} energies <==\n\n"""
    info += f"""        {"MC n-Body":>15}  Total Energy            Interaction Energy                          N-body Contribution to Interaction Energy\n"""
    info += f"""                         [Eh]                    [Eh]                  [kcal/mol]            [Eh]                  [kcal/mol]\n"""
    previous_e = energy_body_dict[1]
    tot_e = previous_e != 0.0
    nbody_range = list(energy_body_dict)
    if supersystem_ie_only:
        nbody_range = [1, nfragments]
    nbody_range.sort()
    for nb in range(1, nfragments + 1):
        lbl = []
        if supersystem_beyond and nb > supersystem_beyond:
            lbl.append("SS")
        if nb == nfragments:
            lbl.append("FULL")
        if nb == max(nbody_range):
            lbl.append("RTN")
        lbl = "/".join(lbl)

        mclbl = modelchem_labels.get(nb, ("", ""))[1]

        if nb in nbody_range:
            delta_e = energy_body_dict[nb] - previous_e
            delta_e_kcal = delta_e * constants.hartree2kcalmol
            if embedding:
                int_e = np.nan
                int_e_kcal = np.nan
            else:
                int_e = energy_body_dict[nb] - energy_body_dict[1]
                int_e_kcal = int_e * constants.hartree2kcalmol
            if supersystem_ie_only and nb == nfragments:
                if tot_e:
                    info += f"""  {lbl:>11} {mclbl:2} {nb:2}  {energy_body_dict[nb]:20.12f}  {int_e:20.12f}  {int_e_kcal:20.12f}        {"N/A":20}  {"N/A":20}\n"""
                else:
                    info += f"""  {lbl:>11} {mclbl:2} {nb:2}        {"N/A":14}  {int_e:20.12f}  {int_e_kcal:20.12f}        {"N/A":20}  {"N/A":20}\n"""
            else:
                if tot_e:
                    if embedding:
                        info += f"""  {lbl:>11} {mclbl:2} {nb:2}  {energy_body_dict[nb]:20.12f}        {"N/A":20}  {"N/A":14}  {delta_e:20.12f}  {delta_e_kcal:20.12f}\n"""
                    else:
                        info += f"""  {lbl:>11} {mclbl:2} {nb:2}  {energy_body_dict[nb]:20.12f}  {int_e:20.12f}  {int_e_kcal:20.12f}  {delta_e:20.12f}  {delta_e_kcal:20.12f}\n"""
                else:
                    info += f"""  {lbl:>11} {mclbl:2} {nb:2}        {"N/A":14}  {int_e:20.12f}  {int_e_kcal:20.12f}  {delta_e:20.12f}  {delta_e_kcal:20.12f}\n"""
            previous_e = energy_body_dict[nb]
        else:
            info += f"""  {lbl:>11} {"":2} {nb:2}        {"N/A":20}  {"N/A":20}  {"N/A":20}  {"N/A":20}  {"N/A":20}\n"""

    mc_legend = {tup[1]: tup[0] for tup in modelchem_labels.values()}
    mc_legend = [f'{k}: "{v}"' for k, v in mc_legend.items()]
    info += f"\n   MC Legend: {', '.join(mc_legend)}\n\n"
    return info

collect_vars

collect_vars(bsse: str, prop: str, body_dict: Mapping[int, Union[float, ndarray]], max_nbody: int, embedding: bool = False, supersystem_ie_only: bool = False, has_supersystem: bool = False) -> Dict

From body_dict, construct data for ManyBodyResultProperties.

Parameters:

Name Type Description Default
bsse str

Label for a single many-body treatment, generally a value of BsseEnum.

required
prop str

Label for a single property, generally a value of DriverEnum.

required
body_dict Mapping[int, Union[float, ndarray]]

Dictionary of minimal per-body info already specialized for property prop and treatment bsse. May contain either total data or interaction data (cummulative, not additive) from 1-body to max_nbody-body (see also supersystem_ie_only). Interaction data signaled by zero float or array for 1-body. May contain data from multiple model chemistry levels.

required
max_nbody int

description

required
embedding bool

Is charge embedding enabled, by default False?

False
supersystem_ie_only bool

Is data available in body_dict only for 1-body (possibly zero) and nfr-body levels? By default False: data is available for consecutive levels, up to max_nbody-body.

False
has_supersystem bool

Whether contributions higher than max_nbody are a summary correction.

False

Returns:

Type Description
dict

description. Empty return if embedding enabled.

Source code in qcmanybody/utils.py
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
def collect_vars(
    bsse: str,
    prop: str,
    body_dict: Mapping[int, Union[float, np.ndarray]],
    max_nbody: int,
    embedding: bool = False,
    supersystem_ie_only: bool = False,
    has_supersystem: bool = False,
) -> Dict:
    """From *body_dict*, construct data for ManyBodyResultProperties.

    Parameters
    ----------
    bsse
        Label for a single many-body treatment, generally a value of BsseEnum.
    prop
        Label for a single property, generally a value of DriverEnum.
    body_dict
        Dictionary of minimal per-body info already specialized for property *prop* and treatment
        *bsse*. May contain either total data or interaction data (cummulative, not additive) from
        1-body to max_nbody-body (see also *supersystem_ie_only*). Interaction data signaled by zero
        float or array for 1-body. May contain data from multiple model chemistry levels.
    max_nbody
        _description_
    embedding
        Is charge embedding enabled, by default False?
    supersystem_ie_only
        Is data available in *body_dict* only for 1-body (possibly zero) and nfr-body levels?
        By default False: data is available for consecutive levels, up to max_nbody-body.
    has_supersystem
        Whether contributions higher than max_nbody are a summary correction.

    Returns
    -------
    dict
        _description_. Empty return if *embedding* enabled.
    """
    bsse = bsse.lower()
    prop = prop.lower()
    previous_e = body_dict[1]
    property_shape = find_shape(previous_e)
    tot_e = bool(np.count_nonzero(previous_e))
    nbody_range = list(body_dict)
    nbody_range.sort()
    res = {}

    if tot_e:
        res[f"{bsse}_corrected_total_{prop}"] = body_dict[max_nbody]
    res[f"{bsse}_corrected_interaction_{prop}"] = body_dict[max_nbody] - body_dict[1]
    res[f"{bsse}_corrected_interaction_{prop}_through_1_body"] = shaped_zero(property_shape)

    if supersystem_ie_only:
        nfr = nbody_range[-1]
        for nb in [nfr]:
            res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1]
            if nb == 2:
                res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1]
        if tot_e:
            for nb in [1, nfr]:
                res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb]
    elif has_supersystem:
        nfr = nbody_range[-1]
        res[f"{bsse}_corrected_interaction_{prop}"] = body_dict[nfr] - body_dict[1]  # reset
        for nb in range(2, max_nbody + 1):
            res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1]
            res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1]
        for nb in [nfr]:
            res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1]
            res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[max_nbody]
        if tot_e:
            res[f"{bsse}_corrected_total_{prop}"] = body_dict[nfr]  # reset
            for nb in nbody_range:
                res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb]
    else:
        for nb in range(2, max(nbody_range) + 1):
            res[f"{bsse}_corrected_interaction_{prop}_through_{nb}_body"] = body_dict[nb] - body_dict[1]
            res[f"{bsse}_corrected_{nb}_body_contribution_to_{prop}"] = body_dict[nb] - body_dict[nb - 1]
        if tot_e:
            for nb in nbody_range:
                res[f"{bsse}_corrected_total_{prop}_through_{nb}_body"] = body_dict[nb]

    if embedding:
        res = {k: v for k, v in res.items() if "interaction" not in k}

    return res

provenance_stamp

provenance_stamp(routine: str) -> Dict[str, str]

Return dictionary satisfying QCSchema, https://github.com/MolSSI/QCSchema/blob/master/qcschema/dev/definitions.py#L23-L41 with QCManyBody's credentials for creator and version. The generating routine's name is passed in through routine.

qcmb.utils.provenance_stamp(__name__)
#> {'creator': 'QCManyBody', 'version': '0.2.2', 'routine': '__main__'}
Source code in qcmanybody/utils.py
506
507
508
509
510
511
512
513
514
515
516
517
518
519
def provenance_stamp(routine: str) -> Dict[str, str]:
    """Return dictionary satisfying QCSchema,
    https://github.com/MolSSI/QCSchema/blob/master/qcschema/dev/definitions.py#L23-L41
    with QCManyBody's credentials for creator and version. The
    generating routine's name is passed in through `routine`.

    ```python
    qcmb.utils.provenance_stamp(__name__)
    #> {'creator': 'QCManyBody', 'version': '0.2.2', 'routine': '__main__'}
    ```
    """
    import qcmanybody

    return {"creator": "QCManyBody", "version": qcmanybody.__version__, "routine": routine}

translate_qcvariables

translate_qcvariables(varsmap: Mapping[str, Any]) -> Dict[str, Any]

Translate between ManyBody results in Psi4/QCDB terminology (qcvars) and QCSchema terminology (skprops).

Parameters:

Name Type Description Default
varsmap Mapping[str, Any]

Dictionary with keys all members of QCVariables or ManyBodyResultProperties and arbitrary values.

required

Returns:

Type Description
dict

varsmap with keys swapped to other set. Untranslatable keys are omitted.

Source code in qcmanybody/utils.py
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
def translate_qcvariables(varsmap: Mapping[str, Any]) -> Dict[str, Any]:
    """Translate between ManyBody results in Psi4/QCDB terminology (qcvars) and QCSchema terminology (skprops).

    Parameters
    ----------
    varsmap
        Dictionary with keys all members of QCVariables or ManyBodyResultProperties and arbitrary values.

    Returns
    -------
    dict
        varsmap with keys swapped to other set. Untranslatable keys are omitted.

    """
    from qcmanybody.models import ManyBodyResultProperties

    # identify direction of translation
    qcv2skp = any([" " in lbl for lbl in varsmap])
    labelmap = ManyBodyResultProperties.to_qcvariables(reverse=qcv2skp)

    return {labelmap[lbl]: data for lbl, data in varsmap.items() if lbl in labelmap}

modelchem_labels

modelchem_labels(nb_per_mc: Dict[str, List[Union[int, Literal['supersystem']]]], presorted: bool = False) -> Dict[Union[int, Literal['supersystem']], Tuple[str, str, str]]

Form ordinal and letter labels for model chemistries.

Parameters:

Name Type Description Default
nb_per_mc Dict[str, List[Union[int, Literal['supersystem']]]]

Dictionary mapping model chemistries to lists of n-body levels computed. If a model chemistry is supersystem, the value should be ["supersystem"]. Generally, this is the ManyBodyCore.nbodies_per_mc_level data structure.

required
presorted bool

If True, the input dictionary keys and values are already sorted by increasing n-body level. This is the case when the input is ManyBodyCore.nbodies_per_mc_level.

False

Returns:

Type Description
mc_per_nb

Dictionary mapping n-body levels to a tuple of full model chemistry label, single-letter ordinal label, and n-body-levels-covered label.

modelchem_labels({'ccsd': [1], 'mp2': [2, 3], 'hf': [4]})
#> {1: ('ccsd', '§A', '§1'), 2: ('mp2', '§B', '§23'), 3: ('mp2', '§B', '§23'), 4: ('hf', '§C', '§4')}

modelchem_labels({'hi': [1, 2, 3], 'md': [4], 'md2': [5, 6, 7, 8, 9, 10], 'lo': ['supersystem']})
#> {1: ('hi', '§A', '§123'), 2: ('hi', '§A', '§123'), 3: ('hi', '§A', '§123'),
#   4: ('md', '§B', '§4'),
#   5: ('md2', '§C', '§<10'), 6: ('md2', '§C', '§<10'), 7: ('md2', '§C', '§<10'), 8: ('md2', '§C', '§<10'), 9: ('md2', '§C', '§<10'), 10: ('md2', '§C', '§<10'),
#   "supersystem": ('lo', '§D', '§SS')}
Source code in qcmanybody/utils.py
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
def modelchem_labels(
    nb_per_mc: Dict[str, List[Union[int, Literal["supersystem"]]]], presorted: bool = False
) -> Dict[Union[int, Literal["supersystem"]], Tuple[str, str, str]]:
    """Form ordinal and letter labels for model chemistries.

    Parameters
    ----------
    nb_per_mc
        Dictionary mapping model chemistries to lists of n-body levels computed.
        If a model chemistry is supersystem, the value should be ["supersystem"].
        Generally, this is the `ManyBodyCore.nbodies_per_mc_level` data structure.
    presorted
        If True, the input dictionary keys and values are already sorted by increasing n-body level.
        This is the case when the input is `ManyBodyCore.nbodies_per_mc_level`.

    Returns
    -------
    mc_per_nb
        Dictionary mapping n-body levels to a tuple of full model chemistry label,
        single-letter ordinal label, and n-body-levels-covered label.

        ```python
        modelchem_labels({'ccsd': [1], 'mp2': [2, 3], 'hf': [4]})
        #> {1: ('ccsd', '§A', '§1'), 2: ('mp2', '§B', '§23'), 3: ('mp2', '§B', '§23'), 4: ('hf', '§C', '§4')}

        modelchem_labels({'hi': [1, 2, 3], 'md': [4], 'md2': [5, 6, 7, 8, 9, 10], 'lo': ['supersystem']})
        #> {1: ('hi', '§A', '§123'), 2: ('hi', '§A', '§123'), 3: ('hi', '§A', '§123'),
        #   4: ('md', '§B', '§4'),
        #   5: ('md2', '§C', '§<10'), 6: ('md2', '§C', '§<10'), 7: ('md2', '§C', '§<10'), 8: ('md2', '§C', '§<10'), 9: ('md2', '§C', '§<10'), 10: ('md2', '§C', '§<10'),
        #   "supersystem": ('lo', '§D', '§SS')}
        ```
    """
    sorted_nb_per_mc = {
        k: sorted(v)
        for (k, v) in sorted(
            nb_per_mc.items(), key=lambda mc_nbs: sorted([1000] if (mc_nbs[1] == ["supersystem"]) else mc_nbs[1])[0]
        )
    }
    if presorted:
        assert (
            sorted_nb_per_mc == nb_per_mc
        ), f"If presorted, input dictionary should be sorted. {nb_per_mc} != {sorted_nb_per_mc}   "

    lvl_lbl = {}
    for mc, nbs in sorted_nb_per_mc.items():
        if nbs == ["supersystem"]:
            lvl_lbl[mc] = "§SS"
        elif max(nbs) > 9:
            lvl_lbl[mc] = f"§<{max(nbs)}"
        else:
            lvl_lbl[mc] = f{''.join(map(str, sorted(nbs)))}"

    indexed_mc = {k: i for i, k in enumerate(sorted_nb_per_mc.keys())}

    mc_per_nb = {
        nb: (mc, f{string.ascii_uppercase[indexed_mc[mc]]}", lvl_lbl[mc])
        for mc, nbs in sorted_nb_per_mc.items()
        for nb in nbs
    }

    return mc_per_nb

qcmanybody.builder

build_nbody_compute_list

build_nbody_compute_list(bsse_type: Iterable[BsseEnum], nfragments: int, nbodies: Iterable[Union[int, Literal['supersystem']]], return_total_data: bool, supersystem_ie_only: bool, supersystem_max_nbody: Optional[int] = None) -> Dict[str, Dict[int, Set[FragBasIndex]]]

Generates lists of N-Body computations needed for requested BSSE treatments.

Parameters:

Name Type Description Default
bsse_type Iterable[BsseEnum]

Requested BSSE treatments.

required
nfragments int

Number of distinct fragments comprising the full molecular supersystem.

required
nbodies Iterable[Union[int, Literal['supersystem']]]

List of n-body levels (e.g., [2] or [1, 2] or ["supersystem"]) for which to generate tasks. Note the natural 1-indexing, so [1] covers one-body contributions.

required
return_total_data bool

Whether the total data (True; energy/gradient/Hessian) of the molecular system has been requested, as opposed to interaction data (False).

required
supersystem_ie_only bool

Target the supersystem total/interaction energy (IE) data over the many-body expansion (MBE) " analysis, thereby omitting intermediate-body calculations.

required
supersystem_max_nbody Optional[int]

Maximum n-body to use for a supersystem calculation. Must be specified if "supersystem" is in nbodies

None

Returns:

Type Description
compute_dict

Dictionary containing subdicts enumerating compute lists for each possible BSSE treatment. Subdict keys are n-body levels and values are sets of all the mc_(frag, bas) indices needed to compute that n-body level. A given index can appear multiple times within a subdict and among subdicts.

compute_dict["cp"] = {
    1: set(),
    2: {((1,), (1, 2)),
        ((2,), (1, 2)),
        ((1, 2), (1, 2))}
}

Subdicts below are always returned. Any may be empty if not requested through bsse_type.

  • 'all' |w---w| full list of computations required
  • 'cp' |w---w| list of computations required for CP procedure
  • 'nocp' |w---w| list of computations required for non-CP procedure
  • 'vmfc_compute' |w---w| list of computations required for VMFC procedure
  • 'vmfc_levels' |w---w| list of levels required for VMFC procedure
Source code in qcmanybody/builder.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def build_nbody_compute_list(
    bsse_type: Iterable[BsseEnum],
    nfragments: int,
    nbodies: Iterable[Union[int, Literal["supersystem"]]],
    return_total_data: bool,
    supersystem_ie_only: bool,
    supersystem_max_nbody: Optional[int] = None,
) -> Dict[str, Dict[int, Set[FragBasIndex]]]:
    """Generates lists of N-Body computations needed for requested BSSE treatments.

    Parameters
    ----------
    bsse_type
        Requested BSSE treatments.
    nfragments
        Number of distinct fragments comprising the full molecular supersystem.
    nbodies
        List of n-body levels (e.g., `[2]` or `[1, 2]` or `["supersystem"]`) for which to generate tasks.
        Note the natural 1-indexing, so `[1]` covers one-body contributions.
    return_total_data
        Whether the total data (True; energy/gradient/Hessian) of the molecular system has been requested,
        as opposed to interaction data (False).
    supersystem_ie_only
        Target the supersystem total/interaction energy (IE) data over the many-body expansion (MBE) "
        analysis, thereby omitting intermediate-body calculations.
    supersystem_max_nbody
        Maximum n-body to use for a supersystem calculation. Must be specified if "supersystem" is in `nbodies`

    Returns
    -------
    compute_dict
        Dictionary containing subdicts enumerating compute lists for each possible BSSE treatment.
        Subdict keys are n-body levels and values are sets of all the `mc_(frag, bas)` indices
        needed to compute that n-body level. A given index can appear multiple times within a
        subdict and among subdicts.

            compute_dict["cp"] = {
                1: set(),
                2: {((1,), (1, 2)),
                    ((2,), (1, 2)),
                    ((1, 2), (1, 2))}
            }

        Subdicts below are always returned. Any may be empty if not requested through *bsse_type*.

        * ``'all'`` |w---w| full list of computations required
        * ``'cp'`` |w---w| list of computations required for CP procedure
        * ``'nocp'`` |w---w| list of computations required for non-CP procedure
        * ``'vmfc_compute'`` |w---w| list of computations required for VMFC procedure
        * ``'vmfc_levels'`` |w---w| list of levels required for VMFC procedure

    """

    include_supersystem = False
    if "supersystem" in nbodies:
        if supersystem_max_nbody is None:
            raise ValueError("supersystem_max_nbody must be provided if 'supersystem' contains nbodies")

        include_supersystem = True

    nbodies: List[int] = [x for x in nbodies if x != "supersystem"]

    # What levels do we need?
    fragment_range = range(1, nfragments + 1)

    # Need nbodies and all lower-body in full basis
    cp_compute_list = {x: set() for x in nbodies}
    nocp_compute_list = {x: set() for x in nbodies}
    vmfc_compute_list = {x: set() for x in nbodies}
    vmfc_level_list = {x: set() for x in nbodies}  # Need to sum something slightly different

    # Verify proper passing of bsse_type. already validated in Computer
    bsse_type_remainder = set(bsse_type) - {e.value for e in BsseEnum}
    if bsse_type_remainder:
        raise RuntimeError(f"Unrecognized BSSE type(s): {bsse_type_remainder}")

    # Build up compute sets
    if "cp" in bsse_type:
        # Everything is in counterpoise/nfr-mer basis
        basis_tuple = tuple(fragment_range)

        if supersystem_ie_only:
            for sublevel in [1, nfragments]:
                for x in itertools.combinations(fragment_range, sublevel):
                    cp_compute_list[nfragments].add((x, basis_tuple))
        else:
            for nb in nbodies:
                # Note A.1: nb=1 is skipped because the nfr-mer-basis monomer
                #   contributions cancel at 1-body. These skipped tasks will be
                #   ordered anyways if higher bodies are requested. Monomers for
                #   the purpose of total energies use monomer basis, not these
                #   skipped tasks. See coordinating Note A.2 .
                if nb > 1:
                    for sublevel in range(1, nb + 1):
                        for x in itertools.combinations(fragment_range, sublevel):
                            cp_compute_list[nb].add((x, basis_tuple))

    if "nocp" in bsse_type:
        # Everything in natural/n-mer basis
        if supersystem_ie_only:
            for sublevel in [1, nfragments]:
                for x in itertools.combinations(fragment_range, sublevel):
                    nocp_compute_list[nfragments].add((x, x))
        else:
            for nb in nbodies:
                for sublevel in range(1, nb + 1):
                    for x in itertools.combinations(fragment_range, sublevel):
                        nocp_compute_list[nb].add((x, x))

    if "vmfc" in bsse_type:
        # Like a CP for all combinations of pairs or greater
        for nb in nbodies:
            for cp_combos in itertools.combinations(fragment_range, nb):
                basis_tuple = tuple(cp_combos)
                # TODO vmfc_compute_list and vmfc_level_list are identical, so consolidate
                for interior_nbody in range(1, nb + 1):
                    for x in itertools.combinations(cp_combos, interior_nbody):
                        combo_tuple = (x, basis_tuple)
                        vmfc_compute_list[nb].add(combo_tuple)
                        vmfc_level_list[len(basis_tuple)].add(combo_tuple)

    if return_total_data and 1 in nbodies:
        # Monomers in monomer basis
        nocp_compute_list.setdefault(1, set())
        for ifr in fragment_range:
            nocp_compute_list[1].add(((ifr,), (ifr,)))

    if include_supersystem:
        # Add supersystem info to the compute list (nocp only)
        for nb in range(1, supersystem_max_nbody + 1):
            cp_compute_list.setdefault(nb, set())
            nocp_compute_list.setdefault(nb, set())
            vmfc_compute_list.setdefault(nb, set())
            for sublevel in range(1, nb + 1):
                for x in itertools.combinations(fragment_range, sublevel):
                    nocp_compute_list[nb].add((x, x))

        # Add the total supersystem (nfragments@nfragments)
        nocp_compute_list.setdefault(nfragments, set())
        nocp_compute_list[nfragments].add((tuple(fragment_range), tuple(fragment_range)))

    # Build a comprehensive compute range
    # * do not use list length to count number of {nb}-body computations
    compute_list = {x: set() for x in nbodies}
    for nb in nbodies:
        compute_list[nb] |= cp_compute_list[nb]
        compute_list[nb] |= nocp_compute_list[nb]
        compute_list[nb] |= vmfc_compute_list[nb]

    if include_supersystem:
        for nb, lst in nocp_compute_list.items():
            compute_list.setdefault(nb, set())
            compute_list[nb] |= lst

    compute_dict = {
        "all": compute_list,
        "cp": cp_compute_list,
        "nocp": nocp_compute_list,
        "vmfc_compute": vmfc_compute_list,
        "vmfc_levels": vmfc_level_list,
    }
    return compute_dict