Source code for qcelemental.molparse.to_schema

from copy import deepcopy
from typing import Any, Dict, Union

import numpy as np

from ..exceptions import ValidationError
from ..physical_constants import constants
from ..util import unnp
from .to_string import formula_generator


[docs]def to_schema( molrec: Dict[str, Any], dtype: Union[str, int], units: str = "Bohr", *, np_out: bool = False, copy: bool = True ) -> Dict[str, Any]: r"""Translate molparse internal Molecule spec into dictionary from other schemas. Parameters ---------- molrec Psi4 json Molecule spec. dtype {'psi4', 1, 2} Molecule schema format. ``1`` is https://molssi-qc-schema.readthedocs.io/en/latest/auto_topology.html V1 + #44 + #53 ``2`` is ``1`` with internal schema_name/version (https://github.com/MolSSI/QCSchema/pull/60) units {'Bohr', 'Angstrom'} Units in which to write string. There is not an option to write in intrinsic/input units. Some `dtype` may not allow all units. np_out When `True`, fields originating from geom, elea, elez, elem, mass, real, elbl will be ndarray. Use `False` to get a json-able version. Returns ------- qcschema : dict Dictionary of the `dtype` repr of `molrec`. """ qcschema: Dict = {} geom = np.array(molrec["geom"], copy=copy) if molrec["units"] == "Bohr" and units == "Bohr": pass elif molrec["units"] == "Angstrom" and units == "Bohr" and "input_units_to_au" in molrec: geom = geom * molrec["input_units_to_au"] else: geom = geom * constants.conversion_factor(molrec["units"], units) nat = geom.shape[0] // 3 name = molrec.get("name", formula_generator(molrec["elem"])) # tagline = """auto-generated by qcdb from molecule {}""".format(name) if dtype == "psi4": if units not in ["Angstrom", "Bohr"]: raise ValidationError( """Psi4 Schema {} allows only 'Bohr'/'Angstrom' coordinates, not {}.""".format(dtype, units) ) qcschema = deepcopy(molrec) qcschema["geom"] = geom qcschema["units"] = units qcschema["name"] = name elif dtype in [1, 2]: if units != "Bohr": raise ValidationError("""QCSchema {} allows only 'Bohr' coordinates, not {}.""".format(dtype, units)) molecule: Dict = {} molecule["validated"] = True molecule["symbols"] = np.array(molrec["elem"], copy=copy) molecule["geometry"] = geom molecule["masses"] = np.array(molrec["mass"], copy=copy) molecule["atomic_numbers"] = np.array(molrec["elez"], copy=copy) molecule["mass_numbers"] = np.array(molrec["elea"], copy=copy) molecule["atom_labels"] = np.array(molrec["elbl"], copy=copy) molecule["name"] = name if "comment" in molrec: molecule["comment"] = molrec["comment"] molecule["molecular_charge"] = molrec["molecular_charge"] molecule["molecular_multiplicity"] = molrec["molecular_multiplicity"] molecule["real"] = np.array(molrec["real"], copy=copy) fidx = np.split(np.arange(nat), molrec["fragment_separators"]) molecule["fragments"] = [fr.tolist() for fr in fidx] molecule["fragment_charges"] = np.array(molrec["fragment_charges"]).tolist() molecule["fragment_multiplicities"] = np.array(molrec["fragment_multiplicities"]).tolist() molecule["fix_com"] = molrec["fix_com"] molecule["fix_orientation"] = molrec["fix_orientation"] if "fix_symmetry" in molrec: molecule["fix_symmetry"] = molrec["fix_symmetry"] molecule["provenance"] = deepcopy(molrec["provenance"]) if "connectivity" in molrec: molecule["connectivity"] = deepcopy(molrec["connectivity"]) if dtype == 1: qcschema = {"schema_name": "qcschema_input", "schema_version": 1, "molecule": molecule} elif dtype == 2: qcschema = molecule qcschema.update({"schema_name": "qcschema_molecule", "schema_version": 2}) else: raise ValidationError( "Schema dtype not understood, valid options are {{'psi4', 1, 2}}. Found {}.".format(dtype) ) if not np_out: qcschema = unnp(qcschema) return qcschema
# if return_type == 'json': # return json.dumps(qcschema) # elif return_type == 'yaml': # import yaml # return yaml.dump(qcschema) # else: # raise ValidationError("""Return type ({}) not recognized.""".format(return_type))