From a742dc7146295789fb33d1211a93ce48917cdd6c Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 4 Sep 2020 00:15:17 -0400
Subject: [PATCH 01/10] qcsk: export models to JSON Schema with mkdir -p
 qcschema python -c "exec(\"import pathlib, qcelemental\nfor md in
 qcelemental.models.qcschema_models():\n\tmfile = (pathlib.Path('qcschema') /
 md.__name__).with_suffix('.schema')\n\twith open(mfile, 'w') as
 fp:\n\t\tfp.write(md.schema_json(indent=None))\")" python -c "exec(\"import
 json, pathlib, pydantic, qcelemental\nwith open((pathlib.Path('qcschema') /
 'QCSchema').with_suffix('.schema'), 'w') as
 fp:\n\tjson.dump(pydantic.schema.schema(qcelemental.models.qcschema_models(),
 title='QCSchema'), fp, indent=4)\")"

---
 .gitignore                       |  4 ++++
 Makefile                         |  6 ++++++
 qcelemental/models/__init__.py   | 17 +++++++++++++++--
 qcelemental/models/basemodels.py |  3 +++
 qcelemental/models/molecule.py   |  6 +++++-
 qcelemental/models/results.py    |  2 +-
 6 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 666c7467..45f83e57 100644
--- a/.gitignore
+++ b/.gitignore
@@ -119,3 +119,7 @@ runinfo/*
 # VSCode
 .vscode/
 raw_data/**/*_blob.py
+
+# autogen
+qcschema/*.schema
+qcelemental/tests/qcschema_instances/*/*.json
diff --git a/Makefile b/Makefile
index 45c82220..0135c37c 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,12 @@ data: cpu_data
 cpu_data:
 	(cd raw_data/cpu_data; python build_cpu_data.py; mv cpu_data_blob.py ../../qcelemental/info/data/)
 
+.PHONY: qcschema
+qcschema:
+	mkdir -p qcschema
+	python -c "exec(\"import pathlib, qcelemental\nfor md in qcelemental.models.qcschema_models():\n\tmfile = (pathlib.Path('qcschema') / md.__name__).with_suffix('.schema')\n\twith open(mfile, 'w') as fp:\n\t\tfp.write(md.schema_json(indent=None))\")"
+	python -c "exec(\"import json, pathlib, pydantic, qcelemental\nwith open((pathlib.Path('qcschema') / 'QCSchema').with_suffix('.schema'), 'w') as fp:\n\tjson.dump(pydantic.schema.schema(qcelemental.models.qcschema_models(), title='QCSchema'), fp, indent=4)\")"
+
 .PHONY: clean
 clean:
 	rm -rf `find . -name __pycache__`
diff --git a/qcelemental/models/__init__.py b/qcelemental/models/__init__.py
index f2c6102c..4a69f39a 100644
--- a/qcelemental/models/__init__.py
+++ b/qcelemental/models/__init__.py
@@ -12,5 +12,18 @@
 from .basis import BasisSet
 from .common_models import ComputeError, DriverEnum, FailedOperation, Provenance
 from .molecule import Molecule
-from .procedures import Optimization, OptimizationInput, OptimizationResult
-from .results import AtomicInput, AtomicResult, AtomicResultProperties, Result, ResultInput, ResultProperties
+from .procedures import OptimizationInput, OptimizationResult
+from .procedures import Optimization  # scheduled for removal
+from .results import AtomicInput, AtomicResult, AtomicResultProperties
+from .results import Result, ResultInput, ResultProperties  # scheduled for removal
+
+
+def qcschema_models():
+    return [
+        AtomicInput,
+        AtomicResult,
+        AtomicResultProperties,
+        BasisSet,
+        Molecule,
+        Provenance,
+    ]
diff --git a/qcelemental/models/basemodels.py b/qcelemental/models/basemodels.py
index eb99a845..044b79a6 100644
--- a/qcelemental/models/basemodels.py
+++ b/qcelemental/models/basemodels.py
@@ -182,3 +182,6 @@ def compare(self, other: Union["ProtoModel", BaseModel], **kwargs) -> bool:
 class AutodocBaseSettings(BaseSettings):
     def __init_subclass__(cls) -> None:
         cls.__doc__ = AutoPydanticDocGenerator(cls, always_apply=True)
+
+
+qcschema_draft = "http://json-schema.org/draft-04/schema#"
diff --git a/qcelemental/models/molecule.py b/qcelemental/models/molecule.py
index a7ba80ba..dad4a2ac 100644
--- a/qcelemental/models/molecule.py
+++ b/qcelemental/models/molecule.py
@@ -16,7 +16,7 @@
 from ..physical_constants import constants
 from ..testing import compare, compare_values
 from ..util import deserialize, measure_coordinates, msgpackext_loads, provenance_stamp, which_import
-from .basemodels import ProtoModel
+from .basemodels import ProtoModel, qcschema_draft
 from .common_models import Provenance, qcschema_molecule_default
 from .types import Array
 
@@ -259,6 +259,10 @@ class Config(ProtoModel.Config):
             "fragment_multiplicities_": "fragment_multiplicities",
         }
 
+        def schema_extra(schema, model):
+            # below addresses the draft-04 issue until https://github.com/samuelcolvin/pydantic/issues/1478 .
+            schema["$schema"] = qcschema_draft
+
     def __init__(self, orient: bool = False, validate: Optional[bool] = None, **kwargs: Any) -> None:
         """Initializes the molecule object from dictionary-like values.
 
diff --git a/qcelemental/models/results.py b/qcelemental/models/results.py
index a1399256..656cad75 100644
--- a/qcelemental/models/results.py
+++ b/qcelemental/models/results.py
@@ -5,7 +5,7 @@
 from pydantic import Field, constr, validator
 
 from ..util import provenance_stamp
-from .basemodels import ProtoModel
+from .basemodels import ProtoModel, qcschema_draft
 from .basis import BasisSet
 from .common_models import ComputeError, DriverEnum, Model, Provenance, qcschema_input_default, qcschema_output_default
 from .molecule import Molecule

From 16e2b7b3a1b4107c23ff9830ea553c15d13c8c35 Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 4 Sep 2020 00:34:37 -0400
Subject: [PATCH 02/10] qcsk: generate example json from tests and test against
 exported schema pytest --validate

---
 .github/workflows/CI.yml                      |  7 ++++
 devtools/conda-envs/base.yaml                 |  1 +
 devtools/conda-envs/minimal.yaml              |  1 +
 qcelemental/models/basemodels.py              | 10 ++++++
 qcelemental/molutil/test_molutil.py           | 11 +++---
 qcelemental/tests/addons.py                   | 19 ++++++++++
 .../qcschema_instances/AtomicInput/dummy      |  0
 .../qcschema_instances/AtomicResult/dummy     |  0
 .../AtomicResultProperties/dummy              |  0
 .../tests/qcschema_instances/BasisSet/dummy   |  0
 .../tests/qcschema_instances/Molecule/dummy   |  0
 .../tests/qcschema_instances/Provenance/dummy |  0
 .../tests/qcschema_instances/README.md        |  4 +++
 qcelemental/tests/test_model_general.py       | 11 ++++--
 qcelemental/tests/test_model_results.py       | 35 ++++++++++++------
 .../tests/test_molparse_from_schema.py        |  5 ++-
 qcelemental/tests/test_molparse_to_schema.py  | 27 +++++++++-----
 qcelemental/tests/test_molparse_to_string.py  |  5 ++-
 qcelemental/tests/test_zqcschema.py           | 36 +++++++++++++++++++
 19 files changed, 144 insertions(+), 28 deletions(-)
 create mode 100644 qcelemental/tests/qcschema_instances/AtomicInput/dummy
 create mode 100644 qcelemental/tests/qcschema_instances/AtomicResult/dummy
 create mode 100644 qcelemental/tests/qcschema_instances/AtomicResultProperties/dummy
 create mode 100644 qcelemental/tests/qcschema_instances/BasisSet/dummy
 create mode 100644 qcelemental/tests/qcschema_instances/Molecule/dummy
 create mode 100644 qcelemental/tests/qcschema_instances/Provenance/dummy
 create mode 100644 qcelemental/tests/qcschema_instances/README.md
 create mode 100644 qcelemental/tests/test_zqcschema.py

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index d6f1f0ff..066ba7a4 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -54,5 +54,12 @@ jobs:
         eval "$(conda shell.bash hook)" && conda activate test
         pytest -rws -v --cov=qcelemental --color=yes --cov-report=xml qcelemental/
 
+    - name: PyTest Validate
+      shell: bash
+      if: matrix.cfg.label == 'full'
+      run: |
+        eval "$(conda shell.bash hook)" && conda activate test
+        pytest -rws -v --color=yes --validate qcelemental/
+
     - name: CodeCov  
       uses: codecov/codecov-action@v1
diff --git a/devtools/conda-envs/base.yaml b/devtools/conda-envs/base.yaml
index 0350c9f3..ec43b74a 100644
--- a/devtools/conda-envs/base.yaml
+++ b/devtools/conda-envs/base.yaml
@@ -20,3 +20,4 @@ dependencies:
   - pytest-cov
   - codecov
   - scipy  # tests an aspect of a helper fn not used by qcel functionality
+  - jsonschema
diff --git a/devtools/conda-envs/minimal.yaml b/devtools/conda-envs/minimal.yaml
index bf609d50..6d72a43f 100644
--- a/devtools/conda-envs/minimal.yaml
+++ b/devtools/conda-envs/minimal.yaml
@@ -14,3 +14,4 @@ dependencies:
   - pytest=4.6.4  # technically, qcel works with 4.0.0 but c-f doesn't have py38 builds for it
   - pytest-cov
   - codecov
+  - jsonschema
diff --git a/qcelemental/models/basemodels.py b/qcelemental/models/basemodels.py
index 044b79a6..1cd4328d 100644
--- a/qcelemental/models/basemodels.py
+++ b/qcelemental/models/basemodels.py
@@ -125,6 +125,8 @@ def serialize(
         include: Optional[Set[str]] = None,
         exclude: Optional[Set[str]] = None,
         exclude_unset: Optional[bool] = None,
+        exclude_defaults: Optional[bool] = None,
+        exclude_none: Optional[bool] = None,
     ) -> Union[bytes, str]:
         """Generates a serialized representation of the model
 
@@ -138,6 +140,10 @@ def serialize(
             Fields to be excluded in the serialization.
         exclude_unset : Optional[bool], optional
             If True, skips fields that have default values provided.
+        exclude_defaults: Optional[bool], optional
+            If True, skips fields that have set or defaulted values equal to the default.
+        exclude_none: Optional[bool], optional
+            If True, skips fields that have value ``None``.
 
         Returns
         -------
@@ -152,6 +158,10 @@ def serialize(
             kwargs["exclude"] = exclude
         if exclude_unset:
             kwargs["exclude_unset"] = exclude_unset
+        if exclude_defaults:
+            kwargs["exclude_defaults"] = exclude_defaults
+        if exclude_none:
+            kwargs["exclude_none"] = exclude_none
 
         data = self.dict(**kwargs)
 
diff --git a/qcelemental/molutil/test_molutil.py b/qcelemental/molutil/test_molutil.py
index 4b826f5e..bf8cf012 100644
--- a/qcelemental/molutil/test_molutil.py
+++ b/qcelemental/molutil/test_molutil.py
@@ -8,7 +8,7 @@
 import qcelemental as qcel
 from qcelemental.testing import compare, compare_molrecs, compare_recursive, compare_values
 
-from ..tests.addons import using_networkx
+from ..tests.addons import drop_qcsk, using_networkx
 
 pp = pprint.PrettyPrinter(width=120)
 
@@ -44,8 +44,9 @@ def test_scramble_descrambles_plain():
         s22_12.scramble(do_shift=True, do_rotate=True, do_resort=True, do_plot=False, verbose=0, do_test=True)
 
 
-def test_relative_geoms_align_free():
+def test_relative_geoms_align_free(request):
     s22_12 = qcel.models.Molecule.from_data(ss22_12)
+    drop_qcsk(s22_12, request.node.name)
 
     for trial in range(3):
         cmol, _ = s22_12.scramble(
@@ -57,8 +58,9 @@ def test_relative_geoms_align_free():
         assert compare_molrecs(rmolrec, cmolrec, atol=1.0e-4, relative_geoms="align")
 
 
-def test_relative_geoms_align_fixed():
+def test_relative_geoms_align_fixed(request):
     s22_12 = qcel.models.Molecule.from_data(ss22_12 + "nocom\nnoreorient\n")
+    drop_qcsk(s22_12, request.node.name)
 
     for trial in range(3):
         cmol, _ = s22_12.scramble(
@@ -344,7 +346,7 @@ def test_scramble_specific():
     assert compare(mill_str, mill.pretty_print())
 
 
-def test_hessian_align():
+def test_hessian_align(request):
     # from Psi4 test test_hessian_vs_cfour[HOOH_TS-H_analytic]
 
     # fmt: off
@@ -458,6 +460,7 @@ def test_hessian_align():
 
     p4mol = qcel.models.Molecule.from_data(p4_hooh_xyz)
     c4mol = qcel.models.Molecule.from_data(c4_hooh_xyz)
+    drop_qcsk(c4mol, request.node.name)
     aqmol, data = p4mol.align(c4mol, atoms_map=True, mols_align=True, verbose=4)
     mill = data["mill"]
 
diff --git a/qcelemental/tests/addons.py b/qcelemental/tests/addons.py
index e400c5ea..fe2d9e4e 100644
--- a/qcelemental/tests/addons.py
+++ b/qcelemental/tests/addons.py
@@ -1,5 +1,7 @@
+import json
 import socket
 from contextlib import contextmanager
+from pathlib import Path
 
 import pytest
 
@@ -49,3 +51,20 @@ def xfail_on_pubchem_busy():
             pytest.xfail("Pubchem server busy")
         else:
             raise e
+
+
+_data_path = Path(__file__).parent.resolve() / "qcschema_instances"
+
+
+def drop_qcsk(instance, tnm: str, schema_name: str = None):
+    if isinstance(instance, qcelemental.models.ProtoModel) and schema_name is None:
+        schema_name = type(instance).__name__
+    drop = (_data_path / schema_name / tnm).with_suffix(".json")
+
+    with open(drop, "w") as fp:
+        if isinstance(instance, qcelemental.models.ProtoModel):
+            fp.write(instance.json(exclude_unset=True, exclude_none=True))
+        elif isinstance(instance, dict):
+            json.dump(instance, fp, sort_keys=True, indent=2)
+        else:
+            raise TypeError
diff --git a/qcelemental/tests/qcschema_instances/AtomicInput/dummy b/qcelemental/tests/qcschema_instances/AtomicInput/dummy
new file mode 100644
index 00000000..e69de29b
diff --git a/qcelemental/tests/qcschema_instances/AtomicResult/dummy b/qcelemental/tests/qcschema_instances/AtomicResult/dummy
new file mode 100644
index 00000000..e69de29b
diff --git a/qcelemental/tests/qcschema_instances/AtomicResultProperties/dummy b/qcelemental/tests/qcschema_instances/AtomicResultProperties/dummy
new file mode 100644
index 00000000..e69de29b
diff --git a/qcelemental/tests/qcschema_instances/BasisSet/dummy b/qcelemental/tests/qcschema_instances/BasisSet/dummy
new file mode 100644
index 00000000..e69de29b
diff --git a/qcelemental/tests/qcschema_instances/Molecule/dummy b/qcelemental/tests/qcschema_instances/Molecule/dummy
new file mode 100644
index 00000000..e69de29b
diff --git a/qcelemental/tests/qcschema_instances/Provenance/dummy b/qcelemental/tests/qcschema_instances/Provenance/dummy
new file mode 100644
index 00000000..e69de29b
diff --git a/qcelemental/tests/qcschema_instances/README.md b/qcelemental/tests/qcschema_instances/README.md
new file mode 100644
index 00000000..daac83c8
--- /dev/null
+++ b/qcelemental/tests/qcschema_instances/README.md
@@ -0,0 +1,4 @@
+These subdirectories are populated by running the QCElemental test suite, ``pytest``.
+Files are JSON representations of QCSchema instances stored or created in the course of testing.
+These in turn are checked for compliance against the exported QCSchema models in test case ``test_qcschema``
+by running ``pytest --validate qcelemental/``.
diff --git a/qcelemental/tests/test_model_general.py b/qcelemental/tests/test_model_general.py
index 5adee3fd..f9f3b658 100644
--- a/qcelemental/tests/test_model_general.py
+++ b/qcelemental/tests/test_model_general.py
@@ -13,10 +13,13 @@
     Provenance,
 )
 
+from .addons import drop_qcsk
 
-def test_result_properties_default_skip():
+
+def test_result_properties_default_skip(request):
 
     obj = AtomicResultProperties(scf_one_electron_energy="-5.0")
+    drop_qcsk(obj, request.node.name)
 
     assert pytest.approx(obj.scf_one_electron_energy) == -5.0
 
@@ -31,9 +34,10 @@ def test_result_properties_default_repr():
     assert len(repr(obj)) < 100
 
 
-def test_repr_provenance():
+def test_repr_provenance(request):
 
     prov = Provenance(creator="qcel", version="v0.3.2")
+    drop_qcsk(prov, request.node.name)
 
     assert "qcel" in str(prov)
     assert "qcel" in repr(prov)
@@ -54,11 +58,12 @@ def test_repr_failed_op():
     )
 
 
-def test_repr_result():
+def test_repr_result(request):
 
     result = AtomicInput(
         **{"driver": "gradient", "model": {"method": "UFF"}, "molecule": {"symbols": ["He"], "geometry": [0, 0, 0]}}
     )
+    drop_qcsk(result, request.node.name)
     assert "molecule_hash" in str(result)
     assert "molecule_hash" in repr(result)
     assert "'gradient'" in str(result)
diff --git a/qcelemental/tests/test_model_results.py b/qcelemental/tests/test_model_results.py
index 01db8a0f..c8d9d367 100644
--- a/qcelemental/tests/test_model_results.py
+++ b/qcelemental/tests/test_model_results.py
@@ -4,6 +4,8 @@
 import qcelemental as qcel
 from qcelemental.models import basis
 
+from .addons import drop_qcsk
+
 center_data = {
     "bs_sto3g_h": {
         "electron_shells": [
@@ -154,12 +156,13 @@ def test_basis_shell_centers(center_name):
     assert basis.BasisCenter(**center_data[center_name])
 
 
-def test_basis_set_build():
+def test_basis_set_build(request):
     bas = basis.BasisSet(
         name="custom_basis",
         center_data=center_data,
         atom_map=["bs_sto3g_o", "bs_sto3g_h", "bs_sto3g_h", "bs_def2tzvp_zr"],
     )
+    drop_qcsk(bas, request.node.name)
 
     assert len(bas.center_data) == 3
     assert len(bas.atom_map) == 4
@@ -213,19 +216,23 @@ def test_basis_map_raises():
         assert basis.BasisSet(name="custom_basis", center_data=center_data, atom_map=["something_odd"])
 
 
-def test_result_build(result_data_fixture):
+def test_result_build(result_data_fixture, request):
     ret = qcel.models.AtomicResult(**result_data_fixture)
+    drop_qcsk(ret, request.node.name)
     assert ret.wavefunction is None
 
 
-def test_result_build_wavefunction_delete(wavefunction_data_fixture):
+def test_result_build_wavefunction_delete(wavefunction_data_fixture, request):
     del wavefunction_data_fixture["protocols"]
     ret = qcel.models.AtomicResult(**wavefunction_data_fixture)
+    drop_qcsk(ret, request.node.name)
     assert ret.wavefunction is None
 
 
-def test_wavefunction_build(wavefunction_data_fixture):
-    assert qcel.models.AtomicResult(**wavefunction_data_fixture)
+def test_wavefunction_build(wavefunction_data_fixture, request):
+    ret = qcel.models.AtomicResult(**wavefunction_data_fixture)
+    drop_qcsk(ret, request.node.name)
+    assert ret
 
 
 def test_wavefunction_matrix_size_error(wavefunction_data_fixture):
@@ -268,7 +275,7 @@ def test_wavefunction_return_result_pointer(wavefunction_data_fixture):
         ("return_results", True, ["orbitals_a", "fock_a", "fock_b"], ["orbitals_a", "fock_a"]),
     ],
 )
-def test_wavefunction_protocols(protocol, restricted, provided, expected, wavefunction_data_fixture):
+def test_wavefunction_protocols(protocol, restricted, provided, expected, wavefunction_data_fixture, request):
 
     wfn_data = wavefunction_data_fixture["wavefunction"]
 
@@ -289,6 +296,7 @@ def test_wavefunction_protocols(protocol, restricted, provided, expected, wavefu
             wfn_data[scf_name] = np.random.rand(bas.nbf, bas.nbf)
 
     wfn = qcel.models.AtomicResult(**wavefunction_data_fixture)
+    drop_qcsk(wfn, request.node.name)
 
     if len(expected) == 0:
         assert wfn.wavefunction is None
@@ -316,7 +324,7 @@ def test_optimization_trajectory_protocol(keep, indices, optimization_data_fixtu
     "default, defined, default_result, defined_result",
     [(None, None, True, None), (False, {"a": True}, False, {"a": True})],
 )
-def test_error_correction_protocol(default, defined, default_result, defined_result, result_data_fixture):
+def test_error_correction_protocol(default, defined, default_result, defined_result, result_data_fixture, request):
     policy = {}
     if default is not None:
         policy["default_policy"] = default
@@ -324,6 +332,7 @@ def test_error_correction_protocol(default, defined, default_result, defined_res
         policy["policies"] = defined
     result_data_fixture["protocols"] = {"error_correction": policy}
     res = qcel.models.AtomicResult(**result_data_fixture)
+    drop_qcsk(res, request.node.name)
 
     assert res.protocols.error_correction.default_policy == default_result
     assert res.protocols.error_correction.policies == defined_result
@@ -348,18 +357,20 @@ def test_error_correction_logic():
     assert correction_policy.allows("a")
 
 
-def test_result_build_stdout_delete(result_data_fixture):
+def test_result_build_stdout_delete(result_data_fixture, request):
     result_data_fixture["protocols"] = {"stdout": False}
     ret = qcel.models.AtomicResult(**result_data_fixture)
+    drop_qcsk(ret, request.node.name)
     assert ret.stdout is None
 
 
-def test_result_build_stdout(result_data_fixture):
+def test_result_build_stdout(result_data_fixture, request):
     ret = qcel.models.AtomicResult(**result_data_fixture)
+    drop_qcsk(ret, request.node.name)
     assert ret.stdout == "I ran."
 
 
-def test_failed_operation(result_data_fixture):
+def test_failed_operation(result_data_fixture, request):
     water = qcel.models.Molecule.from_data(
         """
         O 0 0 0
@@ -367,6 +378,7 @@ def test_failed_operation(result_data_fixture):
         H 0 2 0
     """
     )
+    drop_qcsk(water, request.node.name)
 
     failed = qcel.models.FailedOperation(
         extras={"garbage": water},
@@ -380,12 +392,13 @@ def test_failed_operation(result_data_fixture):
     assert "its all good" in failed_json
 
 
-def test_result_properties_array():
+def test_result_properties_array(request):
     lquad = [1, 2, 3, 2, 4, 5, 3, 5, 6]
 
     obj = qcel.models.AtomicResultProperties(
         scf_one_electron_energy="-5.0", scf_dipole_moment=[1, 2, 3], scf_quadrupole_moment=lquad
     )
+    drop_qcsk(obj, request.node.name)
 
     assert pytest.approx(obj.scf_one_electron_energy) == -5.0
     assert obj.scf_dipole_moment.shape == (3,)
diff --git a/qcelemental/tests/test_molparse_from_schema.py b/qcelemental/tests/test_molparse_from_schema.py
index c452509b..5d65ce8b 100644
--- a/qcelemental/tests/test_molparse_from_schema.py
+++ b/qcelemental/tests/test_molparse_from_schema.py
@@ -6,6 +6,8 @@
 import qcelemental as qcel
 from qcelemental.testing import compare_molrecs
 
+from .addons import drop_qcsk
+
 _schema_prov_stamp = {"creator": "QCElemental", "version": "1.0", "routine": "qcelemental.molparse.from_schema"}
 
 
@@ -131,9 +133,10 @@ def test_from_schema_1p5_14e():
     assert compare_molrecs(schema14_psi4_np, ans, 4)
 
 
-def test_from_schema_2_14e():
+def test_from_schema_2_14e(request):
     schema = copy.deepcopy(schema14_1)
     schema.update({"schema_name": "qcschema_molecule", "schema_version": 2})
+    drop_qcsk(schema, request.node.name, "Molecule")
 
     ans = qcel.molparse.from_schema(schema)
     assert compare_molrecs(schema14_psi4_np, ans, 4)
diff --git a/qcelemental/tests/test_molparse_to_schema.py b/qcelemental/tests/test_molparse_to_schema.py
index c64c9653..72cdc39a 100644
--- a/qcelemental/tests/test_molparse_to_schema.py
+++ b/qcelemental/tests/test_molparse_to_schema.py
@@ -7,6 +7,8 @@
 import qcelemental
 from qcelemental.testing import compare_molrecs
 
+from .addons import drop_qcsk
+
 _string_prov_stamp = {"creator": "QCElemental", "version": "1.0", "routine": "qcelemental.molparse.from_string"}
 _schema_prov_stamp = {"creator": "QCElemental", "version": "1.0", "routine": "qcelemental.molparse.from_schema"}
 
@@ -58,12 +60,13 @@
 }
 
 
-def test_1_14a():
+def test_1_14a(request):
     fullans = copy.deepcopy(schema14_1)
     fullans["molecule"]["provenance"] = _string_prov_stamp
 
     final = qcelemental.molparse.from_string(subject14)
     kmol = qcelemental.molparse.to_schema(final["qm"], dtype=1)
+    drop_qcsk(kmol["molecule"], request.node.name, "Molecule")
     assert compare_molrecs(fullans["molecule"], kmol["molecule"])
 
     fullans = copy.deepcopy(schema14_psi4)
@@ -74,12 +77,13 @@ def test_1_14a():
     assert compare_molrecs(fullans, molrec)
 
 
-def test_2_14b():
+def test_2_14b(request):
     fullans = copy.deepcopy(schema14_2)
     fullans["provenance"] = _string_prov_stamp
 
     final = qcelemental.molparse.from_string(subject14)
     kmol = qcelemental.molparse.to_schema(final["qm"], dtype=2)
+    drop_qcsk(kmol, request.node.name, "Molecule")
     assert compare_molrecs(fullans, kmol)
 
     fullans = copy.deepcopy(schema14_psi4)
@@ -109,7 +113,7 @@ def test_dtype_error():
 
 
 @pytest.mark.parametrize("dtype", [1, 2])
-def test_qcschema_ang_error(dtype):
+def test_atomic_units_qcschema_ang_error(dtype):
 
     final = qcelemental.molparse.from_string(subject14)
     with pytest.raises(qcelemental.ValidationError) as e:
@@ -182,13 +186,14 @@ def test_psi4_nm_error():
 }
 
 
-def test_1_15a():
+def test_1_15a(request):
     fullans = copy.deepcopy(schema15_1)
     fullans["molecule"]["provenance"] = _string_prov_stamp
 
     final = qcelemental.molparse.from_string(subject15)
     final["qm"]["comment"] = "I has a comment"
     kmol = qcelemental.molparse.to_schema(final["qm"], dtype=1)
+    drop_qcsk(kmol["molecule"], request.node.name, "Molecule")
     assert compare_molrecs(fullans["molecule"], kmol["molecule"])
 
     fullans = copy.deepcopy(schema15_psi4)
@@ -201,13 +206,14 @@ def test_1_15a():
     assert compare_molrecs(fullans, molrec)
 
 
-def test_2_15b():
+def test_2_15b(request):
     fullans = copy.deepcopy(schema15_2)
     fullans["provenance"] = _string_prov_stamp
 
     final = qcelemental.molparse.from_string(subject15)
     final["qm"]["comment"] = "I has a comment"
     kmol = qcelemental.molparse.to_schema(final["qm"], dtype=2)
+    drop_qcsk(kmol, request.node.name, "Molecule")
     assert compare_molrecs(fullans, kmol)
 
     fullans = copy.deepcopy(schema15_psi4)
@@ -280,7 +286,7 @@ def test_psi4_15c():
 }
 
 
-def test_froto_1_16a():
+def test_froto_1_16a(request):
     basic = {
         "schema_name": "qc_schema_output",
         "schema_version": 1,
@@ -296,10 +302,11 @@ def test_froto_1_16a():
     fullans["molecule"]["provenance"] = _schema_prov_stamp
 
     roundtrip = qcelemental.molparse.to_schema(qcelemental.molparse.from_schema(basic), dtype=1)
+    drop_qcsk(roundtrip["molecule"], request.node.name, "Molecule")
     assert compare_molrecs(fullans["molecule"], roundtrip["molecule"])
 
 
-def test_froto_2_16a():
+def test_froto_2_16a(request):
     basic = {
         "schema_name": "qcschema_molecule",
         "schema_version": 2,
@@ -313,14 +320,18 @@ def test_froto_2_16a():
     fullans["provenance"] = _schema_prov_stamp
 
     roundtrip = qcelemental.molparse.to_schema(qcelemental.molparse.from_schema(basic), dtype=2)
+    drop_qcsk(roundtrip, request.node.name, "Molecule")
     assert compare_molrecs(fullans, roundtrip)
 
 
 @pytest.mark.parametrize("dtype", [1, 2])
-def test_tofro_16b(dtype):
+def test_tofro_16b(dtype, request):
 
     fullans = copy.deepcopy(schema16_psi4)
     fullans["provenance"] = _schema_prov_stamp
 
     roundtrip = qcelemental.molparse.from_schema(qcelemental.molparse.to_schema(schema16_psi4, dtype=dtype))
+    qcsk = qcelemental.molparse.to_schema(schema16_psi4, dtype=dtype)
+    qcsk = qcsk["molecule"] if dtype == 1 else qcsk
+    drop_qcsk(qcsk, request.node.name, "Molecule")
     assert compare_molrecs(fullans, roundtrip)
diff --git a/qcelemental/tests/test_molparse_to_string.py b/qcelemental/tests/test_molparse_to_string.py
index 91b9bd77..a817bf61 100644
--- a/qcelemental/tests/test_molparse_to_string.py
+++ b/qcelemental/tests/test_molparse_to_string.py
@@ -3,6 +3,8 @@
 import qcelemental as qcel
 from qcelemental.testing import compare
 
+from .addons import drop_qcsk
+
 # CODATA2014 = 1.05835442134
 # CODATA2018 = 1.058354421806
 au2 = 2.0 * qcel.constants.bohr2angstroms
@@ -319,9 +321,10 @@ def test_to_string_xyz(inp, expected):
         ("subject2", {"dtype": "nglview-sdf"}, "ans2_ngslviewsdf"),
     ],
 )
-def test_molecule_to_string(inp, kwargs, expected):
+def test_molecule_to_string(inp, kwargs, expected, request):
 
     smol = _molecule_inputs[inp].to_string(**kwargs)
+    drop_qcsk(_molecule_inputs[inp], request.node.name)
     assert compare(_molecule_outputs[expected], smol)
 
 
diff --git a/qcelemental/tests/test_zqcschema.py b/qcelemental/tests/test_zqcschema.py
new file mode 100644
index 00000000..6d314b8d
--- /dev/null
+++ b/qcelemental/tests/test_zqcschema.py
@@ -0,0 +1,36 @@
+import json
+
+import pytest
+
+import qcelemental as qcel
+
+from .addons import _data_path
+
+
+@pytest.fixture(scope="module")
+def qcschema_models():
+    return {md.__name__: json.loads(md.schema_json()) for md in qcel.models.qcschema_models()}
+
+
+files = sorted(_data_path.rglob("*.json"))
+ids = [fl.parent.stem + "_" + fl.stem[5:] for fl in files]
+
+
+@pytest.mark.parametrize("fl", files, ids=ids)
+def test_qcschema(fl, qcschema_models):
+    import jsonschema
+
+    model = fl.parent.stem
+    instance = json.loads(fl.read_text())
+
+    res = jsonschema.validate(instance, qcschema_models[model])
+    assert res is None
+
+
+#    import pprint
+#    print("\n\n<<< SCHEMA")
+#    pprint.pprint(schemas["BasisSet"])
+#    print("\n\n<<< INSTANCE")
+#    pprint.pprint(instance)
+
+#    assert 0

From d3fb4e58867e8c47a71ec1960d15254f3d24acd4 Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 4 Sep 2020 00:46:21 -0400
Subject: [PATCH 03/10] qcsk: mypy fixes incl. dynamic provenance. bump
 pydantic to v1.5

---
 devtools/conda-envs/minimal.yaml         |  2 +-
 qcelemental/models/molecule.py           | 27 ++++++++++++++----------
 qcelemental/models/results.py            | 11 ++++++++--
 qcelemental/molparse/chgmult.py          |  4 ++--
 qcelemental/molparse/to_string.py        |  2 +-
 qcelemental/molutil/molecular_formula.py |  4 ++--
 qcelemental/tests/test_molecule.py       |  8 +++----
 qcelemental/util/importing.py            |  4 ++--
 setup.py                                 |  2 +-
 9 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/devtools/conda-envs/minimal.yaml b/devtools/conda-envs/minimal.yaml
index 6d72a43f..46591ed4 100644
--- a/devtools/conda-envs/minimal.yaml
+++ b/devtools/conda-envs/minimal.yaml
@@ -8,7 +8,7 @@ dependencies:
   - nomkl
   - python
   - pint=0.10.0  # technically, qcel has no lower bound for pint version for py36,37 but needs 0.10 for 38
-  - pydantic=1.2.0  # technically, qcel works with 1.0.0 but c-f doesn't have py38 builds for it
+  - pydantic=1.5.0
 
     # Testing
   - pytest=4.6.4  # technically, qcel works with 4.0.0 but c-f doesn't have py38 builds for it
diff --git a/qcelemental/models/molecule.py b/qcelemental/models/molecule.py
index dad4a2ac..df5dde38 100644
--- a/qcelemental/models/molecule.py
+++ b/qcelemental/models/molecule.py
@@ -6,12 +6,17 @@
 import json
 import warnings
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast
 
 import numpy as np
-from pydantic import Field, constr, validator
-
-from ..molparse import from_arrays, from_schema, from_string, to_schema, to_string
+from pydantic import ConstrainedFloat, ConstrainedInt, Field, constr, validator
+
+# molparse imports separated b/c https://github.com/python/mypy/issues/7203
+from ..molparse.from_arrays import from_arrays
+from ..molparse.from_schema import from_schema
+from ..molparse.from_string import from_string
+from ..molparse.to_schema import to_schema
+from ..molparse.to_string import to_string
 from ..periodic_table import periodictable
 from ..physical_constants import constants
 from ..testing import compare, compare_values
@@ -225,8 +230,8 @@ class Molecule(ProtoModel):
         None, description="Maximal point group symmetry which ``geometry`` should be treated. Lowercase."
     )
     # Extra
-    provenance: Provenance = Field(  # type: ignore
-        provenance_stamp(__name__),
+    provenance: Provenance = Field(
+        default_factory=partial(provenance_stamp, __name__),
         description="The provenance information about how this Molecule (and its attributes) were generated, "
         "provided, and manipulated.",
     )
@@ -1046,7 +1051,7 @@ def nuclear_repulsion_energy(self, ifr: int = None) -> float:
         Nuclear repulsion energy in entire molecule or in fragment.
 
         """
-        Zeff = [z * int(real) for z, real in zip(self.atomic_numbers, self.real)]
+        Zeff = [z * int(real) for z, real in zip(cast(Iterable[int], self.atomic_numbers), self.real)]
         atoms = list(range(self.geometry.shape[0]))
 
         if ifr is not None:
@@ -1072,7 +1077,7 @@ def nelectrons(self, ifr: int = None) -> int:
         Number of electrons in entire molecule or in fragment.
 
         """
-        Zeff = [z * int(real) for z, real in zip(self.atomic_numbers, self.real)]
+        Zeff = [z * int(real) for z, real in zip(cast(Iterable[int], self.atomic_numbers), self.real)]
 
         if ifr is None:
             nel = sum(Zeff) - self.molecular_charge
@@ -1150,7 +1155,7 @@ def align(
         runiq = np.asarray(
             [
                 hashlib.sha1((sym + str(mas)).encode("utf-8")).hexdigest()
-                for sym, mas in zip(ref_mol.symbols, ref_mol.masses)
+                for sym, mas in zip(cast(Iterable[str], ref_mol.symbols), ref_mol.masses)
             ]
         )
         concern_mol = self
@@ -1161,7 +1166,7 @@ def align(
         cuniq = np.asarray(
             [
                 hashlib.sha1((sym + str(mas)).encode("utf-8")).hexdigest()
-                for sym, mas in zip(concern_mol.symbols, concern_mol.masses)
+                for sym, mas in zip(cast(Iterable[str], concern_mol.symbols), concern_mol.masses)
             ]
         )
 
@@ -1297,7 +1302,7 @@ def scramble(
         runiq = np.asarray(
             [
                 hashlib.sha1((sym + str(mas)).encode("utf-8")).hexdigest()
-                for sym, mas in zip(ref_mol.symbols, ref_mol.masses)
+                for sym, mas in zip(cast(Iterable[str], ref_mol.symbols), ref_mol.masses)
             ]
         )
         nat = rgeom.shape[0]
diff --git a/qcelemental/models/results.py b/qcelemental/models/results.py
index 656cad75..88a29178 100644
--- a/qcelemental/models/results.py
+++ b/qcelemental/models/results.py
@@ -1,4 +1,5 @@
 from enum import Enum
+from functools import partial
 from typing import TYPE_CHECKING, Any, Dict, Optional, Set, Union
 
 import numpy as np
@@ -320,7 +321,7 @@ class AtomicResultProtocols(ProtoModel):
     )
     stdout: bool = Field(True, description="Primary output file to keep from a Result computation")
     error_correction: ErrorCorrectionProtocol = Field(
-        ErrorCorrectionProtocol(), description="Policies for error correction"
+        default_factory=ErrorCorrectionProtocol, description="Policies for error correction"
     )
 
     class Config:
@@ -347,7 +348,13 @@ class AtomicInput(ProtoModel):
 
     extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.")
 
-    provenance: Provenance = Field(Provenance(**provenance_stamp(__name__)), description=str(Provenance.__base_doc__))
+    provenance: Provenance = Field(
+        default_factory=partial(provenance_stamp, __name__), description=str(Provenance.__base_doc__)
+    )
+
+    class Config(ProtoModel.Config):
+        def schema_extra(schema, model):
+            schema["$schema"] = qcschema_draft
 
     def __repr_args__(self) -> "ReprArgs":
         return [
diff --git a/qcelemental/molparse/chgmult.py b/qcelemental/molparse/chgmult.py
index 32d53d66..7d1ff470 100644
--- a/qcelemental/molparse/chgmult.py
+++ b/qcelemental/molparse/chgmult.py
@@ -360,8 +360,8 @@ def validate_and_fill_chgmult(
     cgmp_rules.append("4")
     for ifr in range(nfr):
         cgmp_range.append(
-            lambda c, fc, m, fm, ifr=ifr: _sufficient_electrons_for_mult(fzel[ifr], fc[ifr], fm[ifr])
-        )  # type: ignore
+            lambda c, fc, m, fm, ifr=ifr: _sufficient_electrons_for_mult(fzel[ifr], fc[ifr], fm[ifr])  # type: ignore
+        )
         cgmp_rules.append("4-" + str(ifr))
 
     #   * (R5) require total parity consistent among neutral_electrons, chg, and mult
diff --git a/qcelemental/molparse/to_string.py b/qcelemental/molparse/to_string.py
index 0686252f..20c1b9d6 100644
--- a/qcelemental/molparse/to_string.py
+++ b/qcelemental/molparse/to_string.py
@@ -385,7 +385,7 @@ def to_dict(self) -> Dict:
 
         atom_format = "{elem}"
         ghost_format = "@{elem}"
-        umap = {"bohr": True, "angstrom": False}
+        umap = {"bohr": "True", "angstrom": "False"}
 
         atoms = _atoms_formatter(molrec, geom, atom_format, ghost_format, width, prec, 2)
 
diff --git a/qcelemental/molutil/molecular_formula.py b/qcelemental/molutil/molecular_formula.py
index 3fee6ca4..7ff1d8fd 100644
--- a/qcelemental/molutil/molecular_formula.py
+++ b/qcelemental/molutil/molecular_formula.py
@@ -1,6 +1,6 @@
 import collections
 import re
-from typing import List
+from typing import Dict, List
 
 
 def order_molecular_formula(formula: str, order: str = "alphabetical") -> str:
@@ -23,7 +23,7 @@ def order_molecular_formula(formula: str, order: str = "alphabetical") -> str:
     matches = re.findall(r"[A-Z][^A-Z]*", formula)
     if not "".join(matches) == formula:
         raise ValueError(f"{formula} is not a valid molecular formula.")
-    count = collections.defaultdict(int)
+    count: Dict[str, int] = collections.defaultdict(int)
     for match in matches:
         match_n = re.match(r"(\D+)(\d*)", match)
         assert match_n
diff --git a/qcelemental/tests/test_molecule.py b/qcelemental/tests/test_molecule.py
index 41e83360..aa56b0d7 100644
--- a/qcelemental/tests/test_molecule.py
+++ b/qcelemental/tests/test_molecule.py
@@ -651,14 +651,14 @@ def test_show():
 
 def test_molecule_connectivity():
     data = {"geometry": np.random.rand(5, 3), "symbols": ["he"] * 5, "validate": False}
-    mol = Molecule(**data, connectivity=None)
+    Molecule(**data, connectivity=None)
 
     connectivity = [[n, n + 1, 1] for n in range(4)]
-    mol = Molecule(**data, connectivity=connectivity)
+    Molecule(**data, connectivity=connectivity)
 
     connectivity[0][0] = -1
     with pytest.raises(ValueError):
-        mol = Molecule(**data, connectivity=connectivity)
+        Molecule(**data, connectivity=connectivity)
 
 
 def test_orient_nomasses():
@@ -719,7 +719,7 @@ def test_sparse_molecule_connectivity():
 
 
 def test_bad_isotope_spec():
-    with pytest.raises(NotAnElementError) as e:
+    with pytest.raises(NotAnElementError):
         qcel.models.Molecule(symbols=["He3"], geometry=[0, 0, 0])
 
 
diff --git a/qcelemental/util/importing.py b/qcelemental/util/importing.py
index 4bd12098..cacd3b81 100644
--- a/qcelemental/util/importing.py
+++ b/qcelemental/util/importing.py
@@ -1,7 +1,7 @@
 import os
 import shutil
 import sys
-from typing import Union
+from typing import List, Union
 
 
 def which_import(
@@ -12,7 +12,7 @@ def which_import(
     raise_msg: str = None,
     package: str = None,
     namespace_ok: bool = False,
-) -> Union[bool, None, str]:
+) -> Union[bool, None, str, List[str]]:
     """Tests to see if a Python module is available.
 
     Returns
diff --git a/setup.py b/setup.py
index bf818749..e320bbcf 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@
         package_data={'': [os.path.join('qcelemental', 'data', '*.json')]},
         setup_requires=[] + pytest_runner,
         python_requires='>=3.6',
-        install_requires=['numpy >= 1.12.0', 'pint >= 0.10.0', 'pydantic >= 1.0.0'],
+        install_requires=["numpy >= 1.12.0", "pint >= 0.10.0", "pydantic >= 1.5.0"],
         extras_require={
             'docs': [
                 'numpydoc',

From 75758a1c0877d8d95cbe11ee34aa1ddc79ac1a97 Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 4 Sep 2020 01:26:17 -0400
Subject: [PATCH 04/10] qcsk: testing configuration and GHA coordination with
 QCSchema repo

---
 .github/workflows/CI.yml       | 11 ++++-
 .github/workflows/QCSchema.yml | 90 ++++++++++++++++++++++++++++++++++
 qcelemental/conftest.py        | 29 +++++++++++
 3 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/QCSchema.yml
 create mode 100644 qcelemental/conftest.py

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 066ba7a4..36aef371 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -1,6 +1,12 @@
 name: CI
 
-on: [push, pull_request]
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
 
 jobs:
   build:
@@ -11,10 +17,13 @@ jobs:
         cfg:
           - conda-env: minimal
             python-version: 3.6
+            label: mindep
           - conda-env: base
             python-version: 3.6
+            label: minpy
           - conda-env: base
             python-version: 3.8
+            label: full
     env:
       PYVER: ${{ matrix.cfg.python-version }}
       CONDA_ENV: ${{ matrix.cfg.conda-env }}
diff --git a/.github/workflows/QCSchema.yml b/.github/workflows/QCSchema.yml
new file mode 100644
index 00000000..3ffd2ebf
--- /dev/null
+++ b/.github/workflows/QCSchema.yml
@@ -0,0 +1,90 @@
+name: QCSchema
+
+on: [pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        conda-env: [base]
+        python-version: [3.7]
+    env:
+      PYVER: ${{ matrix.python-version }}
+      CONDA_ENV: ${{ matrix.conda-env }}
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v2
+      with:
+        path: qcel
+
+    - name: Checkout schema repo
+      uses: actions/checkout@v2
+      with:
+        repository: MolSSI/QCSchema
+        path: qcsk
+        ref: qcsk_export_2
+        #ref: master
+        persist-credentials: true
+        fetch-depth: 0
+        token: ${{ secrets.qcschema_from_qcelemental }}
+
+    - name: Python Setup
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Create Environment
+      shell: bash
+      working-directory: ./qcel
+      run: |
+        eval "$(conda shell.bash hook)" && conda activate
+        python devtools/scripts/create_conda_env.py -n=test -p=$PYVER devtools/conda-envs/$CONDA_ENV.yaml
+
+    - name: Install
+      shell: bash
+      working-directory: ./qcel
+      run: |
+        eval "$(conda shell.bash hook)" && conda activate test
+        python -m pip install . --no-deps
+
+    - name: Environment Information
+      shell: bash
+      run: |
+        eval "$(conda shell.bash hook)" && conda activate test
+        conda list --show-channel-urls
+
+    - name: QCSchema from QCElemental
+      shell: bash
+      working-directory: ./qcel
+      run: |
+        eval "$(conda shell.bash hook)" && conda activate test
+        make qcschema
+        ls -l qcschema
+        cp -p qcschema/* ../qcsk/qcschema/data/vdev/
+        mv ../qcsk/qcschema/data/vdev/QCSchema.schema ../qcsk/qcschema/dev/
+
+    - name: Compare Schemas (generated vs. community)
+      shell: bash
+      working-directory: ./qcsk
+      run: |
+        git diff --color-words
+        pull_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH")
+        branch=qcel-${pull_number}
+        git checkout -b ${branch}
+        git remote -v
+        git config --local user.email "action@github.com"
+        git config --local user.name "GitHub Action"
+        git add -A
+        git commit -m "auto-generated from QCElemental"
+        echo "::set-env name=prbranch::${branch}"
+
+    - name: Propose changes
+      uses: ad-m/github-push-action@master
+      with:
+        directory: ./qcsk
+        repository: MolSSI/QCSchema
+        branch: ${{ env.prbranch }}
+        github_token: ${{ secrets.qcschema_from_qcelemental }}
+        force: true
diff --git a/qcelemental/conftest.py b/qcelemental/conftest.py
new file mode 100644
index 00000000..67fb4a5b
--- /dev/null
+++ b/qcelemental/conftest.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--validate", action="store_true", help="validate JSON from previous test run against exported schema"
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def set_up_overall(request):
+    # in all pytest runs except --validate (which uses the files), clear away the JSON examples and generate fresh
+    if not request.config.getoption("--validate", default=False):
+        _data_path = Path(__file__).parent.resolve() / "tests" / "qcschema_instances"
+        for fl in _data_path.rglob("*.json"):
+            fl.unlink()
+
+
+def pytest_runtest_setup(item):
+    # there's a bug where can only set options if specify path in call, so needs to be ``pytest qcelemental/ --validate``
+
+    # skip     the validate-generated-instances-against-exported-schema tests on most ``pytest`` runs.
+    # run only the validate-generated-instances-against-exported-schema tests on ``pytest --validate`` runs.
+    if not item.config.getoption("--validate", default=False) and item.name.startswith("test_qcschema"):
+        pytest.skip("can't run with --validate option")
+    elif item.config.getoption("--validate", default=False) and not item.name.startswith("test_qcschema"):
+        pytest.skip("need --validate option to run")

From 1573804589cd75bb8c591c6ac97a2fc238d4d56a Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 4 Sep 2020 01:47:10 -0400
Subject: [PATCH 05/10] qcsk: material changes to models

---
 qcelemental/models/basis.py         |  68 +++++--
 qcelemental/models/common_models.py |  22 ++-
 qcelemental/models/molecule.py      |  26 ++-
 qcelemental/models/results.py       | 274 +++++++++++++++++++++++-----
 4 files changed, 323 insertions(+), 67 deletions(-)

diff --git a/qcelemental/models/basis.py b/qcelemental/models/basis.py
index f2e807e3..72274260 100644
--- a/qcelemental/models/basis.py
+++ b/qcelemental/models/basis.py
@@ -1,10 +1,14 @@
 from enum import Enum
 from typing import Dict, List, Optional
 
-from pydantic import Field, constr, validator
+from pydantic import ConstrainedInt, Field, constr, validator
 
 from ..exceptions import ValidationError
-from .basemodels import ProtoModel
+from .basemodels import ProtoModel, qcschema_draft
+
+
+class NonnegativeInt(ConstrainedInt):
+    ge = 0
 
 
 class HarmonicType(str, Enum):
@@ -21,14 +25,24 @@ class ElectronShell(ProtoModel):
     Information for a single electronic shell
     """
 
-    angular_momentum: List[int] = Field(..., description="Angular momentum for this shell.")
+    angular_momentum: List[NonnegativeInt] = Field(
+        ..., description="Angular momentum for the shell as an array of integers.", min_items=1
+    )
     harmonic_type: HarmonicType = Field(..., description=str(HarmonicType.__doc__))
-    exponents: List[float] = Field(..., description="Exponents for this contracted shell.")
+    exponents: List[float] = Field(..., description="Exponents for the contracted shell.", min_items=1)
     coefficients: List[List[float]] = Field(
         ...,
-        description="General contraction coefficients for this shell, individual list components will be the individual segment contraction coefficients.",
+        description="General contraction coefficients for the shell; individual list components will be the individual segment contraction coefficients.",
+        min_items=1,
     )
 
+    class Config(ProtoModel.Config):
+        def schema_extra(schema, model):
+            # edit to allow string storage of basis sets as BSE uses. alternately, could `Union[float, str]` above but that loses some validation
+            schema["properties"]["exponents"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]}
+            schema["properties"]["coefficients"]["items"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]}
+            schema["properties"]["angular_momentum"].update({"uniqueItems": True})
+
     @validator("coefficients")
     def _check_coefficient_length(cls, v, values):
         len_exp = len(values["exponents"])
@@ -89,14 +103,24 @@ class ECPPotential(ProtoModel):
     """
 
     ecp_type: ECPType = Field(..., description=str(ECPType.__doc__))
-    angular_momentum: List[int] = Field(..., description="Angular momentum for the ECPs.")
-    r_exponents: List[int] = Field(..., description="Exponents of the 'r' term.")
-    gaussian_exponents: List[float] = Field(..., description="Exponents of the 'gaussian' term.")
+    angular_momentum: List[NonnegativeInt] = Field(
+        ..., description="Angular momentum for the potential as an array of integers.", min_items=1
+    )
+    r_exponents: List[int] = Field(..., description="Exponents of the 'r' term.", min_items=1)
+    gaussian_exponents: List[float] = Field(..., description="Exponents of the 'gaussian' term.", min_items=1)
     coefficients: List[List[float]] = Field(
         ...,
-        description="General contraction coefficients for this shell, individual list components will be the individual segment contraction coefficients.",
+        description="General contraction coefficients for the potential; individual list components will be the individual segment contraction coefficients.",
+        min_items=1,
     )
 
+    class Config(ProtoModel.Config):
+        def schema_extra(schema, model):
+            # edit to allow string storage of basis sets as BSE uses. alternately, could `Union[float, str]` above but that loses some validation
+            schema["properties"]["gaussian_exponents"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]}
+            schema["properties"]["coefficients"]["items"]["items"] = {"anyOf": [{"type": "number"}, {"type": "string"}]}
+            schema["properties"]["angular_momentum"].update({"uniqueItems": True})
+
     @validator("gaussian_exponents")
     def _check_gaussian_exponents_length(cls, v, values):
         len_exp = len(values["r_exponents"])
@@ -120,9 +144,16 @@ class BasisCenter(ProtoModel):
     Data for a single atom/center in a basis set.
     """
 
-    electron_shells: List[ElectronShell] = Field(..., description="Electronic shells for this center.")
-    ecp_electrons: int = Field(0, description="Number of electrons replace by ECP potentials.")
-    ecp_potentials: Optional[List[ECPPotential]] = Field(None, description="ECPs for this center.")
+    electron_shells: List[ElectronShell] = Field(..., description="Electronic shells for this center.", min_items=1)
+    ecp_electrons: int = Field(0, description="Number of electrons replaced by ECP, MCP, or other field potentials.")
+    ecp_potentials: Optional[List[ECPPotential]] = Field(
+        None, description="ECPs, MCPs, or other field potentials for this center.", min_items=1
+    )
+
+    class Config(ProtoModel.Config):
+        def schema_extra(schema, model):
+            schema["properties"]["electron_shells"].update({"uniqueItems": True})
+            schema["properties"]["ecp_potentials"].update({"uniqueItems": True})
 
 
 class BasisSet(ProtoModel):
@@ -130,8 +161,13 @@ class BasisSet(ProtoModel):
     A quantum chemistry basis description.
     """
 
-    schema_name: constr(strip_whitespace=True, regex="qcschema_basis") = "qcschema_basis"
-    schema_version: int = 1
+    schema_name: constr(strip_whitespace=True, regex="^(qcschema_basis)$") = Field(  # type: ignore
+        "qcschema_basis",
+        description=(f"The QCSchema specification to which this model conforms. Explicitly fixed as qcschema_basis."),
+    )
+    schema_version: int = Field(  # type: ignore
+        1, description="The version number of ``schema_name`` to which this model conforms."
+    )
 
     name: str = Field(..., description="A standard basis name if available (e.g., 'cc-pVDZ').")
     description: Optional[str] = Field(None, description="A brief description of the basis set.")
@@ -142,6 +178,10 @@ class BasisSet(ProtoModel):
 
     nbf: Optional[int] = Field(None, description="The number of basis functions.")
 
+    class Config(ProtoModel.Config):
+        def schema_extra(schema, model):
+            schema["$schema"] = qcschema_draft
+
     @validator("atom_map")
     def _check_atom_map(cls, v, values):
         sv = set(v)
diff --git a/qcelemental/models/common_models.py b/qcelemental/models/common_models.py
index a14330c4..dd387dc1 100644
--- a/qcelemental/models/common_models.py
+++ b/qcelemental/models/common_models.py
@@ -1,10 +1,11 @@
 from enum import Enum
-from typing import TYPE_CHECKING, Any, Dict, Optional
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
 
 import numpy as np
 from pydantic import Field
 
-from .basemodels import ProtoModel
+from .basemodels import ProtoModel, qcschema_draft
+from .basis import BasisSet
 
 if TYPE_CHECKING:
     from pydantic.typing import ReprArgs
@@ -20,12 +21,19 @@ class Provenance(ProtoModel):
     """
 
     creator: str = Field(..., description="The creator of the object.")
-    version: Optional[str] = Field(None, description="The version of the creator.")
-    routine: Optional[str] = Field(None, description="The routine of the creator.")
+
+    version: str = Field(
+        "",
+        description="The version of the creator, blank otherwise. This should be sortable by the very broad [PEP 440](https://www.python.org/dev/peps/pep-0440/).",
+    )
+    routine: str = Field("", description="The name of the routine or function within the creator, blank otherwise.")
 
     class Config(ProtoModel.Config):
         canonical_repr = True
-        extra = "allow"
+        extra: str = "allow"
+
+        def schema_extra(schema, model):
+            schema["$schema"] = qcschema_draft
 
 
 class Model(ProtoModel):
@@ -36,7 +44,7 @@ class Model(ProtoModel):
     method: str = Field(  # type: ignore
         ..., description="The quantum chemistry method to evaluate (e.g., B3LYP, PBE, ...)."
     )
-    basis: Optional[str] = Field(  # type: ignore
+    basis: Optional[Union[str, BasisSet]] = Field(  # type: ignore
         None,
         description="The quantum chemistry basis set to evaluate (e.g., 6-31g, cc-pVDZ, ...). Can be ``None`` for "
         "methods without basis sets.",
@@ -46,7 +54,7 @@ class Model(ProtoModel):
 
     class Config(ProtoModel.Config):
         canonical_repr = True
-        extra = "allow"
+        extra: str = "allow"
 
 
 class DriverEnum(str, Enum):
diff --git a/qcelemental/models/molecule.py b/qcelemental/models/molecule.py
index df5dde38..b4e04619 100644
--- a/qcelemental/models/molecule.py
+++ b/qcelemental/models/molecule.py
@@ -64,6 +64,15 @@ def float_prep(array, around):
     return array
 
 
+class NonnegativeInt(ConstrainedInt):
+    ge = 0
+
+
+class BondOrderFloat(ConstrainedFloat):
+    ge = 0
+    le = 5
+
+
 class Identifiers(ProtoModel):
     """Canonical chemical identifiers"""
 
@@ -93,7 +102,7 @@ class Molecule(ProtoModel):
     Molecule objects geometry, masses, and charges are truncated to 8, 6, and 4 decimal places respectively to assist with duplicate detection.
     """
 
-    schema_name: constr(strip_whitespace=True, regex=qcschema_molecule_default) = Field(  # type: ignore
+    schema_name: constr(strip_whitespace=True, regex="^(qcschema_molecule)$") = Field(  # type: ignore
         qcschema_molecule_default,
         description=(
             f"The QCSchema specification this model conforms to. Explicitly fixed as " f"{qcschema_molecule_default}."
@@ -118,6 +127,7 @@ class Molecule(ProtoModel):
         "this attribute sets atomic order for all other per-atom setting like ``real`` and the first "
         "dimension of ``geometry``. Ghost/Virtual atoms must have an entry in this array-like and are "
         "indicated by the matching the 0-indexed indices in ``real`` field.",
+        shape=["nat"],
     )
     geometry: Array[float] = Field(  # type: ignore
         ...,
@@ -130,6 +140,8 @@ class Molecule(ProtoModel):
         "Can also accept array-likes which can be mapped to (nat,3) such as a 1-D list of length 3*nat, "
         "or the serialized version of the array in (3*nat,) shape; all forms will be reshaped to "
         "(nat,3) for this attribute.",
+        shape=["nat", 3],
+        units="a0",
     )
 
     # Molecule data
@@ -156,6 +168,8 @@ class Molecule(ProtoModel):
         "this is not provided, the mass of each atom is inferred from their most common isotope. If this "
         "is provided, it must be the same length as ``symbols`` but can accept ``None`` entries for "
         "standard masses to infer from the same index in the ``symbols`` field.",
+        shape=["nat"],
+        units="u",
     )
     real_: Optional[Array[bool]] = Field(  # type: ignore
         None,
@@ -164,32 +178,37 @@ class Molecule(ProtoModel):
         "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and the first "
         "dimension of ``geometry``. If this is not provided, all atoms are assumed to be real (``True``)."
         "If this is provided, the reality or ghostality of every atom must be specified.",
+        shape=["nat"],
     )
     atom_labels_: Optional[Array[str]] = Field(  # type: ignore
         None,
         description="Additional per-atom labels as a 1-D array-like of of strings of shape (nat,). Typical use is in "
         "model conversions, such as Elemental <-> Molpro and not typically something which should be user "
         "assigned. See the ``comments`` field for general human-consumable text to affix to the Molecule.",
+        shape=["nat"],
     )
     atomic_numbers_: Optional[Array[np.int16]] = Field(  # type: ignore
         None,
         description="An optional ordered 1-D array-like object of atomic numbers of shape (nat,). Index "
         "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and ``real``. "
         "Values are inferred from the ``symbols`` list if not explicitly set.",
+        shape=["nat"],
     )
     mass_numbers_: Optional[Array[np.int16]] = Field(  # type: ignore
         None,
         description="An optional ordered 1-D array-like object of atomic *mass* numbers of shape (nat). Index "
         "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and ``real``. "
         "Values are inferred from the most common isotopes of the ``symbols`` list if not explicitly set.",
+        shape=["nat"],
     )
 
     # Fragment and connection data
-    connectivity_: Optional[List[Tuple[int, int, float]]] = Field(  # type: ignore
+    connectivity_: Optional[List[Tuple[NonnegativeInt, NonnegativeInt, BondOrderFloat]]] = Field(  # type: ignore
         None,
         description="The connectivity information between each atom in the ``symbols`` array. Each entry in this "
         "list is a Tuple of ``(atom_index_A, atom_index_B, bond_order)`` where the ``atom_index`` "
         "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and ``real``.",
+        min_items=1,
     )
     fragments_: Optional[List[Array[np.int32]]] = Field(  # type: ignore
         None,
@@ -200,18 +219,21 @@ class Molecule(ProtoModel):
         "``fragment_multiplicities``. The 1-D array-like objects are sets of atom indices indicating the "
         "atoms which compose the fragment. The atom indices match the 0-indexed indices of all other "
         "per-atom settings like ``symbols`` and ``real``.",
+        shape=["nfr", "<varies>"],
     )
     fragment_charges_: Optional[List[float]] = Field(  # type: ignore
         None,
         description="The total charge of each fragment in the ``fragments`` list of shape (nfr,). The index of this "
         "list matches the 0-index indices of ``fragment`` list. Will be filled in based on a set of rules "
         "if not provided (and ``fragments`` are specified).",
+        shape=["nfr"],
     )
     fragment_multiplicities_: Optional[List[int]] = Field(  # type: ignore
         None,
         description="The multiplicity of each fragment in the ``fragments`` list of shape (nfr,). The index of this "
         "list matches the 0-index indices of ``fragment`` list. Will be filled in based on a set of "
         "rules if not provided (and ``fragments`` are specified).",
+        shape=["nfr"],
     )
 
     # Orientation
diff --git a/qcelemental/models/results.py b/qcelemental/models/results.py
index 88a29178..889e7681 100644
--- a/qcelemental/models/results.py
+++ b/qcelemental/models/results.py
@@ -19,6 +19,12 @@
 class AtomicResultProperties(ProtoModel):
     """
     Named properties of quantum chemistry computations following the MolSSI QCSchema.
+
+    Notes
+    -----
+    All arrays are stored flat but must be reshapable into the dimensions in attribute ``shape``, with abbreviations as follows:
+        nao: number of atomic orbitals = calcinfo_nbasis
+        nmo: number of molecular orbitals
     """
 
     # Calcinfo
@@ -29,90 +35,194 @@ class AtomicResultProperties(ProtoModel):
     calcinfo_natom: Optional[int] = Field(None, description="The number of atoms in the computation.")
 
     # Canonical
-    nuclear_repulsion_energy: Optional[float] = Field(None, description="The nuclear repulsion energy energy.")
+    nuclear_repulsion_energy: Optional[float] = Field(None, description="The nuclear repulsion energy.")
     return_energy: Optional[float] = Field(
-        None, description="The energy of the requested method, identical to `return_value` for energy computations."
+        None,
+        description="The energy of the requested method, identical to ``return_result`` for ``driver=energy`` computations.",
     )
 
     # SCF Keywords
     scf_one_electron_energy: Optional[float] = Field(
-        None, description="The one-electron (core Hamiltonian) energy contribution to the total SCF energy."
+        None,
+        description="The one-electron (core Hamiltonian) energy contribution to the total SCF energy.",
+        units="E_h",
     )
     scf_two_electron_energy: Optional[float] = Field(
-        None, description="The two-electron energy contribution to the total SCF energy."
+        None,
+        description="The two-electron energy contribution to the total SCF energy.",
+        units="E_h",
     )
     scf_vv10_energy: Optional[float] = Field(
-        None, description="The VV10 functional energy contribution to the total SCF energy."
+        None,
+        description="The VV10 functional energy contribution to the total SCF energy.",
+        units="E_h",
     )
     scf_xc_energy: Optional[float] = Field(
-        None, description="The functional (XC) energy contribution to the total SCF energy."
+        None,
+        description="The functional (XC) energy contribution to the total SCF energy.",
+        units="E_h",
     )
     scf_dispersion_correction_energy: Optional[float] = Field(
         None,
         description="The dispersion correction appended to an underlying functional when a DFT-D method is requested.",
+        units="E_h",
+    )
+    scf_dipole_moment: Optional[Array[float]] = Field(
+        None,
+        description="The SCF X, Y, and Z dipole components",
+        units="e a0",
     )
-    scf_dipole_moment: Optional[Array[float]] = Field(None, description="The X, Y, and Z dipole components.")
     scf_quadrupole_moment: Optional[Array[float]] = Field(
-        None, description="The (3, 3) quadrupole components (redundant; 6 unique)."
+        None,
+        description="The quadrupole components (redundant; 6 unique).",
+        shape=[3, 3],
+        units="e a0^2",
     )
     scf_total_energy: Optional[float] = Field(
-        None, description="The total electronic energy of the SCF stage of the calculation."
+        None,
+        description="The total electronic energy of the SCF stage of the calculation.",
+        units="E_h",
     )
     scf_iterations: Optional[int] = Field(None, description="The number of SCF iterations taken before convergence.")
 
     # MP2 Keywords
     mp2_same_spin_correlation_energy: Optional[float] = Field(
-        None, description="The portion of MP2 doubles correlation energy from same-spin (i.e. triplet) correlations."
+        None,
+        description="The portion of MP2 doubles correlation energy from same-spin (i.e. triplet) correlations, without any user scaling.",
+        units="E_h",
     )
     mp2_opposite_spin_correlation_energy: Optional[float] = Field(
         None,
-        description="The portion of MP2 doubles correlation energy from opposite-spin (i.e. singlet) correlations.",
+        description="The portion of MP2 doubles correlation energy from opposite-spin (i.e. singlet) correlations, without any user scaling.",
+        units="E_h",
     )
     mp2_singles_energy: Optional[float] = Field(
-        None, description="The singles portion of the MP2 correlation energy. Zero except in ROHF."
+        None,
+        description="The singles portion of the MP2 correlation energy. Zero except in ROHF.",
+        units="E_h",
     )
     mp2_doubles_energy: Optional[float] = Field(
         None,
         description="The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations.",
+        units="E_h",
     )
     mp2_total_correlation_energy: Optional[float] = Field(
         None, description="The MP2 correlation energy."
     )  # Old name, to be deprecated
-    mp2_correlation_energy: Optional[float] = Field(None, description="The MP2 correlation energy.")
+    mp2_correlation_energy: Optional[float] = Field(
+        None,
+        description="The MP2 correlation energy.",
+        units="E_h",
+    )
     mp2_total_energy: Optional[float] = Field(
-        None, description="The total MP2 energy (MP2 correlation energy + HF energy)."
+        None,
+        description="The total MP2 energy (MP2 correlation energy + HF energy).",
+        units="E_h",
+    )
+    mp2_dipole_moment: Optional[Array[float]] = Field(
+        None,
+        description="The MP2 X, Y, and Z dipole components.",
+        shape=[3],
+        units="e a0",
     )
-    mp2_dipole_moment: Optional[Array[float]] = Field(None, description="The MP2 X, Y, and Z dipole components.")
 
     # CCSD Keywords
     ccsd_same_spin_correlation_energy: Optional[float] = Field(
-        None, description="The portion of CCSD doubles correlation energy from same-spin (i.e. triplet) correlations."
+        None,
+        description="The portion of CCSD doubles correlation energy from same-spin (i.e. triplet) correlations, without any user scaling.",
+        units="E_h",
     )
     ccsd_opposite_spin_correlation_energy: Optional[float] = Field(
         None,
-        description="The portion of CCSD doubles correlation energy from opposite-spin (i.e. singlet) correlations",
+        description="The portion of CCSD doubles correlation energy from opposite-spin (i.e. singlet) correlations, without any user scaling.",
+        units="E_h",
     )
     ccsd_singles_energy: Optional[float] = Field(
-        None, description="The singles portion of the CCSD correlation energy. Zero except in ROHF."
+        None,
+        description="The singles portion of the CCSD correlation energy. Zero except in ROHF.",
+        units="E_h",
     )
     ccsd_doubles_energy: Optional[float] = Field(
         None,
         description="The doubles portion of the CCSD correlation energy including same-spin and opposite-spin correlations.",
+        units="E_h",
+    )
+    ccsd_correlation_energy: Optional[float] = Field(
+        None,
+        description="The CCSD correlation energy.",
+        units="E_h",
     )
-    ccsd_correlation_energy: Optional[float] = Field(None, description="The CCSD correlation energy.")
     ccsd_total_energy: Optional[float] = Field(
-        None, description="The total CCSD energy (CCSD correlation energy + HF energy)."
+        None,
+        description="The total CCSD energy (CCSD correlation energy + HF energy).",
+        units="E_h",
+    )
+    ccsd_dipole_moment: Optional[Array[float]] = Field(
+        None,
+        description="The CCSD X, Y, and Z dipole components.",
+        shape=[3],
+        units="e a0",
     )
-    ccsd_dipole_moment: Optional[Array[float]] = Field(None, description="The CCSD X, Y, and Z dipole components.")
     ccsd_iterations: Optional[int] = Field(None, description="The number of CCSD iterations taken before convergence.")
 
     # CCSD(T) keywords
-    ccsd_prt_pr_correlation_energy: Optional[float] = Field(None, description="The CCSD(T) correlation energy.")
+    ccsd_prt_pr_correlation_energy: Optional[float] = Field(
+        None,
+        description="The CCSD(T) correlation energy.",
+        units="E_h",
+    )
     ccsd_prt_pr_total_energy: Optional[float] = Field(
-        None, description="The total CCSD(T) energy (CCSD(T) correlation energy + HF energy)."
+        None,
+        description="The total CCSD(T) energy (CCSD(T) correlation energy + HF energy).",
+        units="E_h",
     )
     ccsd_prt_pr_dipole_moment: Optional[Array[float]] = Field(
-        None, description="The CCSD(T) X, Y, and Z dipole components."
+        None,
+        description="The CCSD(T) X, Y, and Z dipole components.",
+        shape=[3],
+        units="e a0",
+    )
+
+    # CCSDT keywords
+    ccsdt_correlation_energy: Optional[float] = Field(
+        None,
+        description="The CCSDT correlation energy.",
+        units="E_h",
+    )
+    ccsdt_total_energy: Optional[float] = Field(
+        None,
+        description="The total CCSDT energy (CCSDT correlation energy + HF energy).",
+        units="E_h",
+    )
+    ccsdt_dipole_moment: Optional[Array[float]] = Field(
+        None,
+        description="The CCSDT X, Y, and Z dipole components.",
+        shape=[3],
+        units="e a0",
+    )
+    ccsdt_iterations: Optional[int] = Field(
+        None, description="The number of CCSDT iterations taken before convergence."
+    )
+
+    # CCSDTQ keywords
+    ccsdtq_correlation_energy: Optional[float] = Field(
+        None,
+        description="The CCSDTQ correlation energy.",
+        units="E_h",
+    )
+    ccsdtq_total_energy: Optional[float] = Field(
+        None,
+        description="The total CCSDTQ energy (CCSDTQ correlation energy + HF energy).",
+        units="E_h",
+    )
+    ccsdtq_dipole_moment: Optional[Array[float]] = Field(
+        None,
+        description="The CCSDTQ X, Y, and Z dipole components.",
+        shape=[3],
+        units="e a0",
+    )
+    ccsdtq_iterations: Optional[int] = Field(
+        None, description="The number of CCSDTQ iterations taken before convergence."
     )
 
     class Config(ProtoModel.Config):
@@ -172,26 +282,87 @@ class WavefunctionProperties(ProtoModel):
     )
 
     # Core Hamiltonian
-    h_core_a: Optional[Array[float]] = Field(None, description="Alpha-spin core (one-electron) Hamiltonian.")
-    h_core_b: Optional[Array[float]] = Field(None, description="Beta-spin core (one-electron) Hamiltonian.")
+    h_core_a: Optional[Array[float]] = Field(
+        None, description="Alpha-spin core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"]
+    )
+    h_core_b: Optional[Array[float]] = Field(
+        None, description="Beta-spin core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"]
+    )
     h_effective_a: Optional[Array[float]] = Field(
-        None, description="Alpha-spin effective core (one-electron) Hamiltonian."
+        None, description="Alpha-spin effective core (one-electron) Hamiltonian in the AO basis.", shape=["nao", "nao"]
     )
     h_effective_b: Optional[Array[float]] = Field(
-        None, description="Beta-spin effective core (one-electron) Hamiltonian "
+        None, description="Beta-spin effective core (one-electron) Hamiltonian in the AO basis", shape=["nao", "nao"]
     )
 
     # SCF Results
-    scf_orbitals_a: Optional[Array[float]] = Field(None, description="SCF alpha-spin orbitals.")
-    scf_orbitals_b: Optional[Array[float]] = Field(None, description="SCF beta-spin orbitals.")
-    scf_density_a: Optional[Array[float]] = Field(None, description="SCF alpha-spin density matrix.")
-    scf_density_b: Optional[Array[float]] = Field(None, description="SCF beta-spin density matrix.")
-    scf_fock_a: Optional[Array[float]] = Field(None, description="SCF alpha-spin Fock matrix.")
-    scf_fock_b: Optional[Array[float]] = Field(None, description="SCF beta-spin Fock matrix.")
-    scf_eigenvalues_a: Optional[Array[float]] = Field(None, description="SCF alpha-spin eigenvalues.")
-    scf_eigenvalues_b: Optional[Array[float]] = Field(None, description="SCF beta-spin eigenvalues.")
-    scf_occupations_a: Optional[Array[float]] = Field(None, description="SCF alpha-spin occupations.")
-    scf_occupations_b: Optional[Array[float]] = Field(None, description="SCF beta-spin occupations.")
+    scf_orbitals_a: Optional[Array[float]] = Field(
+        None, description="SCF alpha-spin orbitals in the AO basis.", shape=["nao", "nmo"]
+    )
+    scf_orbitals_b: Optional[Array[float]] = Field(
+        None, description="SCF beta-spin orbitals in the AO basis.", shape=["nao", "nmo"]
+    )
+    scf_density_a: Optional[Array[float]] = Field(
+        None, description="SCF alpha-spin density matrix in the AO basis.", shape=["nao", "nao"]
+    )
+    scf_density_b: Optional[Array[float]] = Field(
+        None, description="SCF beta-spin density matrix in the AO basis.", shape=["nao", "nao"]
+    )
+    scf_fock_a: Optional[Array[float]] = Field(
+        None, description="SCF alpha-spin Fock matrix in the AO basis.", shape=["nao", "nao"]
+    )
+    scf_fock_b: Optional[Array[float]] = Field(
+        None, description="SCF beta-spin Fock matrix in the AO basis.", shape=["nao", "nao"]
+    )
+    scf_eigenvalues_a: Optional[Array[float]] = Field(
+        None, description="SCF alpha-spin orbital eigenvalues.", shape=["nmo"]
+    )
+    scf_eigenvalues_b: Optional[Array[float]] = Field(
+        None, description="SCF beta-spin orbital eigenvalues.", shape=["nmo"]
+    )
+    scf_occupations_a: Optional[Array[float]] = Field(
+        None, description="SCF alpha-spin orbital occupations.", shape=["nmo"]
+    )
+    scf_occupations_b: Optional[Array[float]] = Field(
+        None, description="SCF beta-spin orbital occupations.", shape=["nmo"]
+    )
+
+    # BELOW from qcsk
+    scf_coulomb_a: Optional[Array[float]] = Field(
+        None, description="SCF alpha-spin Coulomb matrix in the AO basis.", shape=["nao", "nao"]
+    )
+    scf_coulomb_b: Optional[Array[float]] = Field(
+        None, description="SCF beta-spin Coulomb matrix in the AO basis.", shape=["nao", "nao"]
+    )
+    scf_exchange_a: Optional[Array[float]] = Field(
+        None, description="SCF alpha-spin exchange matrix in the AO basis.", shape=["nao", "nao"]
+    )
+    scf_exchange_b: Optional[Array[float]] = Field(
+        None, description="SCF beta-spin exchange matrix in the AO basis.", shape=["nao", "nao"]
+    )
+
+    # Localized-orbital SCF wavefunction quantities
+    localized_orbitals_a: Optional[Array[float]] = Field(
+        None,
+        description="Localized alpha-spin orbitals in the AO basis. All nmo orbitals are included, even if only a subset were localized.",
+        shape=["nao", "nmo"],
+    )
+    localized_orbitals_b: Optional[Array[float]] = Field(
+        None,
+        description="Localized beta-spin orbitals in the AO basis. All nmo orbitals are included, even if only a subset were localized.",
+        shape=["nao", "nmo"],
+    )
+    localized_fock_a: Optional[Array[float]] = Field(
+        None,
+        description="Alpha-spin Fock matrix in the localized molecular orbital basis. All nmo orbitals are included, even if only a subset were localized.",
+        shape=["nmo", "nmo"],
+    )
+    localized_fock_b: Optional[Array[float]] = Field(
+        None,
+        description="Beta-spin Fock matrix in the localized molecular orbital basis. All nmo orbitals are included, even if only a subset were localized.",
+        shape=["nmo", "nmo"],
+    )
+    # ABOVE from qcsk
 
     # Return results, must be defined last
     orbitals_a: Optional[str] = Field(None, description="Index to the alpha-spin orbitals of the primary return.")
@@ -200,14 +371,18 @@ class WavefunctionProperties(ProtoModel):
     density_b: Optional[str] = Field(None, description="Index to the beta-spin density of the primary return.")
     fock_a: Optional[str] = Field(None, description="Index to the alpha-spin Fock matrix of the primary return.")
     fock_b: Optional[str] = Field(None, description="Index to the beta-spin Fock matrix of the primary return.")
-    eigenvalues_a: Optional[str] = Field(None, description="Index to the alpha-spin eigenvalues of the primary return.")
-    eigenvalues_b: Optional[str] = Field(None, description="Index to the beta-spin eigenvalues of the primary return.")
-    occupations_a: Optional[str] = Field(
+    eigenvalues_a: Optional[str] = Field(
         None, description="Index to the alpha-spin orbital eigenvalues of the primary return."
     )
-    occupations_b: Optional[str] = Field(
+    eigenvalues_b: Optional[str] = Field(
         None, description="Index to the beta-spin orbital eigenvalues of the primary return."
     )
+    occupations_a: Optional[str] = Field(
+        None, description="Index to the alpha-spin orbital occupations of the primary return."
+    )
+    occupations_b: Optional[str] = Field(
+        None, description="Index to the beta-spin orbital occupations of the primary return."
+    )
 
     class Config(ProtoModel.Config):
         force_skip_defaults = True
@@ -335,8 +510,13 @@ class AtomicInput(ProtoModel):
     """The MolSSI Quantum Chemistry Schema"""
 
     id: Optional[str] = Field(None, description="An optional ID of the ResultInput object.")
-    schema_name: constr(strip_whitespace=True, regex=qcschema_input_default) = qcschema_input_default  # type: ignore
-    schema_version: int = 1
+    schema_name: constr(strip_whitespace=True, regex="^(qc_?schema_input)$") = Field(  # type: ignore
+        qcschema_input_default,
+        description=(
+            f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_input_default}."
+        ),
+    )
+    schema_version: int = Field(1, description="The version number of ``schema_name`` to which this model conforms.")
 
     molecule: Molecule = Field(..., description="The molecule to use in the computation.")
     driver: DriverEnum = Field(..., description=str(DriverEnum.__doc__))
@@ -367,6 +547,12 @@ def __repr_args__(self) -> "ReprArgs":
 class AtomicResult(AtomicInput):
     schema_name: constr(strip_whitespace=True, regex=qcschema_output_default) = qcschema_output_default  # type: ignore
 
+    schema_name: constr(strip_whitespace=True, regex="^(qc_?schema_output)$") = Field(  # type: ignore
+        qcschema_output_default,
+        description=(
+            f"The QCSchema specification this model conforms to. Explicitly fixed as {qcschema_output_default}."
+        ),
+    )
     properties: AtomicResultProperties = Field(..., description=str(AtomicResultProperties.__base_doc__))
     wavefunction: Optional[WavefunctionProperties] = Field(None, description=str(WavefunctionProperties.__base_doc__))
 

From 1bf66dcb9a14332c105a164ae249684540671b7a Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 4 Sep 2020 01:50:01 -0400
Subject: [PATCH 06/10] qcsk: description string and formatting changes to
 models

---
 qcelemental/models/basis.py         |  32 ++++-----
 qcelemental/models/common_models.py |  35 ++++------
 qcelemental/models/molecule.py      | 105 ++++++++++++++++------------
 qcelemental/models/procedures.py    |   5 +-
 qcelemental/models/results.py       |  39 ++++++-----
 qcelemental/testing.py              |   6 +-
 6 files changed, 114 insertions(+), 108 deletions(-)

diff --git a/qcelemental/models/basis.py b/qcelemental/models/basis.py
index 72274260..6dd498bb 100644
--- a/qcelemental/models/basis.py
+++ b/qcelemental/models/basis.py
@@ -12,18 +12,14 @@ class NonnegativeInt(ConstrainedInt):
 
 
 class HarmonicType(str, Enum):
-    """
-    The angular momentum representation of a shell.
-    """
+    """The angular momentum representation of a shell."""
 
     spherical = "spherical"
     cartesian = "cartesian"
 
 
 class ElectronShell(ProtoModel):
-    """
-    Information for a single electronic shell
-    """
+    """Information for a single electronic shell."""
 
     angular_momentum: List[NonnegativeInt] = Field(
         ..., description="Angular momentum for the shell as an array of integers.", min_items=1
@@ -89,18 +85,14 @@ def is_contracted(self) -> bool:
 
 
 class ECPType(str, Enum):
-    """
-    The type of the ECP potential.
-    """
+    """The type of the ECP potential."""
 
     scalar = "scalar"
     spinorbit = "spinorbit"
 
 
 class ECPPotential(ProtoModel):
-    """
-    Information for a single ECP potential.
-    """
+    """Information for a single ECP potential."""
 
     ecp_type: ECPType = Field(..., description=str(ECPType.__doc__))
     angular_momentum: List[NonnegativeInt] = Field(
@@ -140,9 +132,7 @@ def _check_coefficient_length(cls, v, values):
 
 
 class BasisCenter(ProtoModel):
-    """
-    Data for a single atom/center in a basis set.
-    """
+    """Data for a single atom/center in a basis set."""
 
     electron_shells: List[ElectronShell] = Field(..., description="Electronic shells for this center.", min_items=1)
     ecp_electrons: int = Field(0, description="Number of electrons replaced by ECP, MCP, or other field potentials.")
@@ -169,14 +159,16 @@ class BasisSet(ProtoModel):
         1, description="The version number of ``schema_name`` to which this model conforms."
     )
 
-    name: str = Field(..., description="A standard basis name if available (e.g., 'cc-pVDZ').")
-    description: Optional[str] = Field(None, description="A brief description of the basis set.")
-    center_data: Dict[str, BasisCenter] = Field(..., description="A mapping of all types of centers available.")
+    name: str = Field(..., description="The standard basis name if available (e.g., 'cc-pVDZ').")
+    description: Optional[str] = Field(None, description="Brief description of the basis set.")
+    center_data: Dict[str, BasisCenter] = Field(
+        ..., description="Shared basis data for all atoms/centers in the parent molecule"
+    )
     atom_map: List[str] = Field(
-        ..., description="Mapping of all centers in the parent molecule to centers in `center_data`."
+        ..., description="Mapping of all atoms/centers in the parent molecule to centers in ``center_data``."
     )
 
-    nbf: Optional[int] = Field(None, description="The number of basis functions.")
+    nbf: Optional[int] = Field(None, description="The number of basis functions. Use for convenience or as checksum")
 
     class Config(ProtoModel.Config):
         def schema_extra(schema, model):
diff --git a/qcelemental/models/common_models.py b/qcelemental/models/common_models.py
index dd387dc1..0d88bd91 100644
--- a/qcelemental/models/common_models.py
+++ b/qcelemental/models/common_models.py
@@ -16,12 +16,10 @@
 
 
 class Provenance(ProtoModel):
-    """
-    Provenance information.
-    """
+    """Provenance information."""
 
-    creator: str = Field(..., description="The creator of the object.")
 
+    creator: str = Field(..., description="The name of the program, library, or person who created the object.")
     version: str = Field(
         "",
         description="The version of the creator, blank otherwise. This should be sortable by the very broad [PEP 440](https://www.python.org/dev/peps/pep-0440/).",
@@ -37,17 +35,17 @@ def schema_extra(schema, model):
 
 
 class Model(ProtoModel):
-    """
-    The quantum chemistry model specification for a given operation to compute against
-    """
+    """The computational molecular sciences model to run."""
 
     method: str = Field(  # type: ignore
-        ..., description="The quantum chemistry method to evaluate (e.g., B3LYP, PBE, ...)."
+        ...,
+        description="The quantum chemistry method to evaluate (e.g., B3LYP, PBE, ...). "
+        "For MM, name of the force field.",
     )
     basis: Optional[Union[str, BasisSet]] = Field(  # type: ignore
         None,
         description="The quantum chemistry basis set to evaluate (e.g., 6-31g, cc-pVDZ, ...). Can be ``None`` for "
-        "methods without basis sets.",
+        "methods without basis sets. For molecular mechanics, name of the atom-typer.",
     )
 
     # basis_spec: BasisSpec = None  # This should be exclusive with basis, but for now will be omitted
@@ -58,7 +56,7 @@ class Config(ProtoModel.Config):
 
 
 class DriverEnum(str, Enum):
-    """Allowed quantum chemistry driver values."""
+    """Allowed computation driver values."""
 
     energy = "energy"
     gradient = "gradient"
@@ -74,19 +72,20 @@ def derivative_int(self):
 
 
 class ComputeError(ProtoModel):
-    """A complete description of the error."""
+    """Complete description of the error from an unsuccessful program execution."""
 
     error_type: str = Field(  # type: ignore
         ...,  # Error enumeration not yet strict
-        description="The type of error which was thrown. Restrict this field short classifiers e.g. 'input_error'.",
+        description="The type of error which was thrown. Restrict this field to short classifiers e.g. 'input_error'. Suggested classifiers: https://github.com/MolSSI/QCEngine/blob/master/qcengine/exceptions.py",
     )
     error_message: str = Field(  # type: ignore
         ...,
-        description="Text associated with the thrown error, often the backtrace, but can contain additional "
+        description="Text associated with the thrown error. This is often the backtrace, but it can contain additional "
         "information as well.",
     )
     extras: Optional[Dict[str, Any]] = Field(  # type: ignore
-        None, description="Additional data to ship with the ComputeError object."
+        None,
+        description="Additional information to bundle with the error.",
     )
 
     class Config:
@@ -97,11 +96,7 @@ def __repr_args__(self) -> "ReprArgs":
 
 
 class FailedOperation(ProtoModel):
-    """
-    A record indicating that a given operation (compute, procedure, etc.) has failed and contains the reason and
-    input data which generated the failure.
-
-    """
+    """Record indicating that a given operation (program, procedure, etc.) has failed and containing the reason and input data which generated the failure."""
 
     id: str = Field(  # type: ignore
         None,
@@ -127,7 +122,7 @@ class FailedOperation(ProtoModel):
     )
     extras: Optional[Dict[str, Any]] = Field(  # type: ignore
         None,
-        description="Additional information to bundle with this Failed Operation. Details which pertain specifically "
+        description="Additional information to bundle with the failed operation. Details which pertain specifically "
         "to a thrown error should be contained in the `error` field. See :class:`ComputeError` for details.",
     )
 
diff --git a/qcelemental/models/molecule.py b/qcelemental/models/molecule.py
index b4e04619..59a1db5d 100644
--- a/qcelemental/models/molecule.py
+++ b/qcelemental/models/molecule.py
@@ -5,6 +5,7 @@
 import hashlib
 import json
 import warnings
+from functools import partial
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union, cast
 
@@ -28,7 +29,6 @@
 if TYPE_CHECKING:
     from pydantic.typing import ReprArgs
 
-
 # Rounding quantities for hashing
 GEOMETRY_NOISE = 8
 MASS_NOISE = 6
@@ -96,20 +96,30 @@ class Config(ProtoModel.Config):
 
 class Molecule(ProtoModel):
     """
+    The physical Cartesian representation of the molecular system.
+
     A QCSchema representation of a Molecule. This model contains
     data for symbols, geometry, connectivity, charges, fragmentation, etc while also supporting a wide array of I/O and manipulation capabilities.
 
     Molecule objects geometry, masses, and charges are truncated to 8, 6, and 4 decimal places respectively to assist with duplicate detection.
+
+    Notes
+    -----
+    All arrays are stored flat but must be reshapable into the dimensions in attribute ``shape``, with abbreviations as follows:
+        nat: number of atomic = calcinfo_natom
+        nfr: number of fragments
+        <varies>: irregular dimension not systematically reshapable
+
     """
 
     schema_name: constr(strip_whitespace=True, regex="^(qcschema_molecule)$") = Field(  # type: ignore
         qcschema_molecule_default,
         description=(
-            f"The QCSchema specification this model conforms to. Explicitly fixed as " f"{qcschema_molecule_default}."
+            f"The QCSchema specification to which this model conforms. Explicitly fixed as {qcschema_molecule_default}."
         ),
     )
     schema_version: int = Field(  # type: ignore
-        2, description="The version number of ``schema_name`` that this Molecule model conforms to."
+        2, description="The version number of ``schema_name`` to which this model conforms."
     )
     validated: bool = Field(  # type: ignore
         False,
@@ -123,21 +133,22 @@ class Molecule(ProtoModel):
     # Required data
     symbols: Array[str] = Field(  # type: ignore
         ...,
-        description="An ordered (nat,) array-like object of atomic elemental symbols of shape (nat,). The index of "
-        "this attribute sets atomic order for all other per-atom setting like ``real`` and the first "
-        "dimension of ``geometry``. Ghost/Virtual atoms must have an entry in this array-like and are "
-        "indicated by the matching the 0-indexed indices in ``real`` field.",
+        description="The ordered array of atomic elemental symbols in title case. This field's index "
+        "sets atomic order for all other per-atom fields like ``real`` and the first dimension of "
+        "``geometry``. Ghost/virtual atoms must have an entry here in ``symbols``; ghostedness is "
+        "indicated through the ``real`` field.",
         shape=["nat"],
     )
     geometry: Array[float] = Field(  # type: ignore
         ...,
-        description="An ordered (nat,3) array-like for XYZ atomic coordinates [a0]. "
+        description="The ordered array for Cartesian XYZ atomic coordinates [a0]. "
         "Atom ordering is fixed; that is, a consumer who shuffles atoms must not reattach the input "
         "(pre-shuffling) molecule schema instance to any output (post-shuffling) per-atom results "
         "(e.g., gradient). Index of the first dimension matches the 0-indexed indices of all other "
         "per-atom settings like ``symbols`` and ``real``."
         "\n"
-        "Can also accept array-likes which can be mapped to (nat,3) such as a 1-D list of length 3*nat, "
+        "Serialized storage is always flat, (3*nat,), but QCSchema implementations will want to reshape it. "
+        "QCElemental can also accept array-likes which can be mapped to (nat,3) such as a 1-D list of length 3*nat, "
         "or the serialized version of the array in (3*nat,) shape; all forms will be reshaped to "
         "(nat,3) for this attribute.",
         shape=["nat", 3],
@@ -146,26 +157,27 @@ class Molecule(ProtoModel):
 
     # Molecule data
     name: Optional[str] = Field(  # type: ignore
-        None, description="A common or human-readable name to assign to this molecule. Can be arbitrary."
+        None,
+        description="Common or human-readable name to assign to this molecule. This field can be arbitrary; see ``identifiers`` for well-defined labels.",
     )
     identifiers: Optional[Identifiers] = Field(  # type: ignore
         None,
-        description="An optional dictionary of additional identifiers by which this Molecule can be referenced, "
+        description="An optional dictionary of additional identifiers by which this molecule can be referenced, "
         "such as INCHI, canonical SMILES, etc. See the :class:``Identifiers`` model for more details.",
     )
     comment: Optional[str] = Field(  # type: ignore
         None,
-        description="Additional comments for this Molecule. Intended for pure human/user consumption " "and clarity.",
+        description="Additional comments for this molecule. Intended for pure human/user consumption and clarity.",
     )
-    molecular_charge: float = Field(0.0, description="The net electrostatic charge of this Molecule.")  # type: ignore
-    molecular_multiplicity: int = Field(1, description="The total multiplicity of this Molecule.")  # type: ignore
+    molecular_charge: float = Field(0.0, description="The net electrostatic charge of the molecule.")  # type: ignore
+    molecular_multiplicity: int = Field(1, description="The total multiplicity of the molecule.")  # type: ignore
 
     # Atom data
     masses_: Optional[Array[float]] = Field(  # type: ignore
         None,
-        description="An ordered 1-D array-like object of atomic masses [u] of shape (nat,). Index order "
-        "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and ``real``. If "
-        "this is not provided, the mass of each atom is inferred from their most common isotope. If this "
+        description="The ordered array of atomic masses. Index order "
+        "matches the 0-indexed indices of all other per-atom fields like ``symbols`` and ``real``. If "
+        "this is not provided, the mass of each atom is inferred from its most common isotope. If this "
         "is provided, it must be the same length as ``symbols`` but can accept ``None`` entries for "
         "standard masses to infer from the same index in the ``symbols`` field.",
         shape=["nat"],
@@ -173,65 +185,67 @@ class Molecule(ProtoModel):
     )
     real_: Optional[Array[bool]] = Field(  # type: ignore
         None,
-        description="An ordered 1-D array-like object of shape (nat,) indicating if each atom is real (``True``) or "
+        description="The ordered array indicating if each atom is real (``True``) or "
         "ghost/virtual (``False``). Index "
         "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and the first "
         "dimension of ``geometry``. If this is not provided, all atoms are assumed to be real (``True``)."
-        "If this is provided, the reality or ghostality of every atom must be specified.",
+        "If this is provided, the reality or ghostedness of every atom must be specified.",
         shape=["nat"],
     )
     atom_labels_: Optional[Array[str]] = Field(  # type: ignore
         None,
-        description="Additional per-atom labels as a 1-D array-like of of strings of shape (nat,). Typical use is in "
+        description="Additional per-atom labels as an array of strings. Typical use is in "
         "model conversions, such as Elemental <-> Molpro and not typically something which should be user "
-        "assigned. See the ``comments`` field for general human-consumable text to affix to the Molecule.",
+        "assigned. See the ``comments`` field for general human-consumable text to affix to the molecule.",
         shape=["nat"],
     )
     atomic_numbers_: Optional[Array[np.int16]] = Field(  # type: ignore
         None,
         description="An optional ordered 1-D array-like object of atomic numbers of shape (nat,). Index "
         "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and ``real``. "
-        "Values are inferred from the ``symbols`` list if not explicitly set.",
+        "Values are inferred from the ``symbols`` list if not explicitly set. "
+        "Ghostedness should be indicated through ``real`` field, not zeros here.",
         shape=["nat"],
     )
     mass_numbers_: Optional[Array[np.int16]] = Field(  # type: ignore
         None,
         description="An optional ordered 1-D array-like object of atomic *mass* numbers of shape (nat). Index "
         "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and ``real``. "
-        "Values are inferred from the most common isotopes of the ``symbols`` list if not explicitly set.",
+        "Values are inferred from the most common isotopes of the ``symbols`` list if not explicitly set. "
+        "If single isotope not (yet) known for an atom, -1 is placeholder.",
         shape=["nat"],
     )
 
     # Fragment and connection data
     connectivity_: Optional[List[Tuple[NonnegativeInt, NonnegativeInt, BondOrderFloat]]] = Field(  # type: ignore
         None,
-        description="The connectivity information between each atom in the ``symbols`` array. Each entry in this "
-        "list is a Tuple of ``(atom_index_A, atom_index_B, bond_order)`` where the ``atom_index`` "
-        "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and ``real``.",
+        description="A list of bonds within the molecule. Each entry is a tuple "
+        "of ``(atom_index_A, atom_index_B, bond_order)`` where the ``atom_index`` "
+        "matches the 0-indexed indices of all other per-atom settings like ``symbols`` and ``real``. "
+        "Bonds may be freely reordered and inverted.",
         min_items=1,
     )
     fragments_: Optional[List[Array[np.int32]]] = Field(  # type: ignore
         None,
-        description="An indication of which sets of atoms are fragments within the Molecule. This is a list of shape "
-        "(nfr) of 1-D array-like objects of arbitrary length. Each entry in the list indicates a new "
-        "fragment. The index "
-        "of the list matches the 0-indexed indices of ``fragment_charges`` and "
-        "``fragment_multiplicities``. The 1-D array-like objects are sets of atom indices indicating the "
-        "atoms which compose the fragment. The atom indices match the 0-indexed indices of all other "
-        "per-atom settings like ``symbols`` and ``real``.",
+        description="List of indices grouping atoms (0-indexed) into molecular fragments within the molecule. "
+        "Each entry in the outer list is a new fragment; index matches the ordering in ``fragment_charges`` and "
+        "``fragment_multiplicities``. Inner lists are 0-indexed atoms which compose the fragment; every atom must "
+        "be in exactly one inner list. Noncontiguous fragments are allowed, though no QM program is known to support them. "
+        "Fragment ordering is fixed; that is, a consumer who shuffles fragments must not reattach the input "
+        "(pre-shuffling) molecule schema instance to any output (post-shuffling) per-fragment results (e.g., n-body energy arrays).",
         shape=["nfr", "<varies>"],
     )
     fragment_charges_: Optional[List[float]] = Field(  # type: ignore
         None,
-        description="The total charge of each fragment in the ``fragments`` list of shape (nfr,). The index of this "
-        "list matches the 0-index indices of ``fragment`` list. Will be filled in based on a set of rules "
+        description="The total charge of each fragment in the ``fragments`` list. The index of this "
+        "list matches the 0-index indices of ``fragments`` list. Will be filled in based on a set of rules "
         "if not provided (and ``fragments`` are specified).",
         shape=["nfr"],
     )
     fragment_multiplicities_: Optional[List[int]] = Field(  # type: ignore
         None,
-        description="The multiplicity of each fragment in the ``fragments`` list of shape (nfr,). The index of this "
-        "list matches the 0-index indices of ``fragment`` list. Will be filled in based on a set of "
+        description="The multiplicity of each fragment in the ``fragments`` list. The index of this "
+        "list matches the 0-index indices of ``fragments`` list. Will be filled in based on a set of "
         "rules if not provided (and ``fragments`` are specified).",
         shape=["nfr"],
     )
@@ -239,14 +253,18 @@ class Molecule(ProtoModel):
     # Orientation
     fix_com: bool = Field(  # type: ignore
         False,
-        description="An indicator which prevents pre-processing the Molecule object to translate the Center-of-Mass "
-        "to (0,0,0) in euclidean coordinate space. Will result in a different ``geometry`` than the "
-        "one provided if False.",
+        description="Whether translation of geometry is allowed (fix F) or disallowed (fix T)."
+        "When False, QCElemental will pre-process the Molecule object to translate the center of mass "
+        "to (0,0,0) in Euclidean coordinate space, resulting in a different ``geometry`` than the "
+        "one provided. "
+        "guidance: A consumer who translates the geometry must not reattach the input (pre-translation) molecule schema instance to any output (post-translation) origin-sensitive results (e.g., an ordinary energy when EFP present).",
     )
     fix_orientation: bool = Field(  # type: ignore
         False,
-        description="An indicator which prevents pre-processes the Molecule object to orient via the inertia tensor."
-        "Will result in a different ``geometry`` than the one provided if False.",
+        description="Whether rotation of geometry is allowed (fix F) or disallowed (fix T). "
+        "When False, QCElemental will pre-process the Molecule object to orient via the intertial tensor, "
+        "resulting in a different ``geometry`` than the one provided. "
+        "guidance: A consumer who rotates the geometry must not reattach the input (pre-rotation) molecule schema instance to any output (post-rotation) frame-sensitive results (e.g., molecular vibrations).",
     )
     fix_symmetry: Optional[str] = Field(  # type: ignore
         None, description="Maximal point group symmetry which ``geometry`` should be treated. Lowercase."
@@ -264,7 +282,8 @@ class Molecule(ProtoModel):
         "never need to be manually set.",
     )
     extras: Dict[str, Any] = Field(  # type: ignore
-        None, description="Extra information to associate with this Molecule."
+        None,
+        description="Additional information to bundle with the molecule. Use for schema development and scratch space.",
     )
 
     class Config(ProtoModel.Config):
diff --git a/qcelemental/models/procedures.py b/qcelemental/models/procedures.py
index 879660cd..a6ad7989 100644
--- a/qcelemental/models/procedures.py
+++ b/qcelemental/models/procedures.py
@@ -57,7 +57,10 @@ class QCInputSpecification(ProtoModel):
     model: Model = Field(..., description=str(Model.__doc__))
     keywords: Dict[str, Any] = Field({}, description="The program specific keywords to be used.")
 
-    extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.")
+    extras: Dict[str, Any] = Field(
+        {},
+        description="Additional information to bundle with the computation. Use for schema development and scratch space.",
+    )
 
 
 class OptimizationInput(ProtoModel):
diff --git a/qcelemental/models/results.py b/qcelemental/models/results.py
index 889e7681..db1ff358 100644
--- a/qcelemental/models/results.py
+++ b/qcelemental/models/results.py
@@ -17,8 +17,7 @@
 
 
 class AtomicResultProperties(ProtoModel):
-    """
-    Named properties of quantum chemistry computations following the MolSSI QCSchema.
+    """Named properties of quantum chemistry computations following the MolSSI QCSchema.
 
     Notes
     -----
@@ -257,6 +256,7 @@ def dict(self, *args, **kwargs):
 
 
 class WavefunctionProperties(ProtoModel):
+    """Wavefunction properties resulting from a computation. Matrix quantities are stored in column-major order. Presence and contents configurable by protocol."""
 
     # Class properties
     _return_results_names: Set[str] = {
@@ -454,9 +454,7 @@ def _assert_exists(cls, v, values):
 
 
 class WavefunctionProtocolEnum(str, Enum):
-    """
-    Wavefunction to keep from a Result computation.
-    """
+    """Wavefunction to keep from a computation."""
 
     all = "all"
     orbitals_and_eigenvalues = "orbitals_and_eigenvalues"
@@ -487,14 +485,12 @@ def allows(self, policy: str):
 
 
 class AtomicResultProtocols(ProtoModel):
-    """
-    Protocols regarding the manipulation of a Result output data.
-    """
+    """Protocols regarding the manipulation of computational result data."""
 
     wavefunction: WavefunctionProtocolEnum = Field(
         WavefunctionProtocolEnum.none, description=str(WavefunctionProtocolEnum.__doc__)
     )
-    stdout: bool = Field(True, description="Primary output file to keep from a Result computation")
+    stdout: bool = Field(True, description="Primary output file to keep from the computation")
     error_correction: ErrorCorrectionProtocol = Field(
         default_factory=ErrorCorrectionProtocol, description="Policies for error correction"
     )
@@ -509,7 +505,7 @@ class Config:
 class AtomicInput(ProtoModel):
     """The MolSSI Quantum Chemistry Schema"""
 
-    id: Optional[str] = Field(None, description="An optional ID of the ResultInput object.")
+    id: Optional[str] = Field(None, description="The optional ID for the computation.")
     schema_name: constr(strip_whitespace=True, regex="^(qc_?schema_input)$") = Field(  # type: ignore
         qcschema_input_default,
         description=(
@@ -521,12 +517,15 @@ class AtomicInput(ProtoModel):
     molecule: Molecule = Field(..., description="The molecule to use in the computation.")
     driver: DriverEnum = Field(..., description=str(DriverEnum.__doc__))
     model: Model = Field(..., description=str(Model.__base_doc__))
-    keywords: Dict[str, Any] = Field({}, description="The program specific keywords to be used.")
+    keywords: Dict[str, Any] = Field({}, description="The program-specific keywords to be used.")
     protocols: AtomicResultProtocols = Field(
         AtomicResultProtocols(), description=str(AtomicResultProtocols.__base_doc__)
     )
 
-    extras: Dict[str, Any] = Field({}, description="Extra fields that are not part of the schema.")
+    extras: Dict[str, Any] = Field(
+        {},
+        description="Additional information to bundle with the computation. Use for schema development and scratch space.",
+    )
 
     provenance: Provenance = Field(
         default_factory=partial(provenance_stamp, __name__), description=str(Provenance.__base_doc__)
@@ -545,7 +544,7 @@ def __repr_args__(self) -> "ReprArgs":
 
 
 class AtomicResult(AtomicInput):
-    schema_name: constr(strip_whitespace=True, regex=qcschema_output_default) = qcschema_output_default  # type: ignore
+    """Results from a CMS program execution."""
 
     schema_name: constr(strip_whitespace=True, regex="^(qc_?schema_output)$") = Field(  # type: ignore
         qcschema_output_default,
@@ -557,15 +556,17 @@ class AtomicResult(AtomicInput):
     wavefunction: Optional[WavefunctionProperties] = Field(None, description=str(WavefunctionProperties.__base_doc__))
 
     return_result: Union[float, Array[float], Dict[str, Any]] = Field(
-        ..., description="The value requested by the 'driver' attribute."
+        ...,
+        description="The primary return specified by the ``driver`` field. Scalar if energy; array if gradient or hessian; dictionary with property keys if properties.",
     )  # type: ignore
 
-    stdout: Optional[str] = Field(None, description="The standard output of the program.")
-    stderr: Optional[str] = Field(None, description="The standard error of the program.")
-
-    success: bool = Field(
-        ..., description="The success of a given programs execution. If False, other fields may be blank."
+    stdout: Optional[str] = Field(
+        None,
+        description="The primary logging output of the program, whether natively standard output or a file. Presence vs. absence (or null-ness?) configurable by protocol.",
     )
+    stderr: Optional[str] = Field(None, description="The standard error of the program execution.")
+
+    success: bool = Field(..., description="The success of program execution. If False, other fields may be blank.")
     error: Optional[ComputeError] = Field(None, description=str(ComputeError.__base_doc__))
     provenance: Provenance = Field(..., description=str(Provenance.__base_doc__))
 
diff --git a/qcelemental/testing.py b/qcelemental/testing.py
index f43ffe6f..a5555418 100644
--- a/qcelemental/testing.py
+++ b/qcelemental/testing.py
@@ -478,11 +478,7 @@ def compare_molrecs(
     return_message: bool = False,
     return_handler: Callable = None,
 ) -> bool:
-    """Function to compare Molecule dictionaries. Prints
-    #    :py:func:`util.success` when elements of `computed` match elements of
-    #    `expected` to `tol` number of digits (for float arrays).
-
-    """
+    """Function to compare Molecule dictionaries."""
     # Need to manipulate the dictionaries a bit, so hold values
     xptd = copy.deepcopy(expected)
     cptd = copy.deepcopy(computed)

From 10742c819d08e55404b241e57f1725cae1f80728 Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 4 Sep 2020 02:02:35 -0400
Subject: [PATCH 07/10] qcsk: a0 as Bohr abbreviation. test basis sets as str
 or float

---
 qcelemental/models/common_models.py     | 1 -
 qcelemental/physical_constants/ureg.py  | 2 +-
 qcelemental/tests/test_model_results.py | 7 +++++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/qcelemental/models/common_models.py b/qcelemental/models/common_models.py
index 0d88bd91..2a0d9351 100644
--- a/qcelemental/models/common_models.py
+++ b/qcelemental/models/common_models.py
@@ -18,7 +18,6 @@
 class Provenance(ProtoModel):
     """Provenance information."""
 
-
     creator: str = Field(..., description="The name of the program, library, or person who created the object.")
     version: str = Field(
         "",
diff --git a/qcelemental/physical_constants/ureg.py b/qcelemental/physical_constants/ureg.py
index 88cfc4e3..9dead1ed 100644
--- a/qcelemental/physical_constants/ureg.py
+++ b/qcelemental/physical_constants/ureg.py
@@ -50,7 +50,7 @@ def build_units_registry(context):
     ureg.define("debye = 1e-18 * statcoulomb * cm = D")
 
     # Distance
-    ureg.define("bohr = {} * meter = bohr_radius = Bohr = au_length".format(phys_const["bohr radius"]["value"]))
+    ureg.define("bohr = {} * meter = bohr_radius = Bohr = a0 = au_length".format(phys_const["bohr radius"]["value"]))
     ureg.define("wavenumber = 1 / centimeter")
     ureg.define("Angstrom = angstrom")
 
diff --git a/qcelemental/tests/test_model_results.py b/qcelemental/tests/test_model_results.py
index c8d9d367..2093c608 100644
--- a/qcelemental/tests/test_model_results.py
+++ b/qcelemental/tests/test_model_results.py
@@ -22,8 +22,8 @@
             {
                 "harmonic_type": "spherical",
                 "angular_momentum": [0],
-                "exponents": [130.70939, 23.808861, 6.4436089],
-                "coefficients": [[0.15432899, 0.53532814, 0.44463454]],
+                "exponents": [130.70939, "23.808861", 6.4436089],
+                "coefficients": [[0.15432899, "0.53532814", 0.44463454]],
             },
             {
                 "harmonic_type": "cartesian",
@@ -173,6 +173,9 @@ def test_basis_set_build(request):
     assert es[1].is_contracted() is False
     assert es[2].is_contracted()
 
+    assert es[0].exponents == [130.70939, 23.808861, 6.4436089]
+    assert es[0].coefficients == [[0.15432899, 0.53532814, 0.44463454]]
+
 
 def test_basis_electron_center_raises():
     data = center_data["bs_sto3g_h"]["electron_shells"][0].copy()

From 81a2a0c2dee859edf2e001cfe299e7569f00ec1c Mon Sep 17 00:00:00 2001
From: Lori Burns <lori.burns@gmail.com>
Date: Wed, 9 Sep 2020 13:23:06 -0400
Subject: [PATCH 08/10] patch-ups after review

---
 devtools/conda-envs/base.yaml          |  2 +-
 qcelemental/models/results.py          |  3 ---
 qcelemental/physical_constants/ureg.py |  2 +-
 qcelemental/tests/test_datum.py        | 11 +++++++++++
 setup.py                               |  2 +-
 5 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/devtools/conda-envs/base.yaml b/devtools/conda-envs/base.yaml
index ec43b74a..30ae9c97 100644
--- a/devtools/conda-envs/base.yaml
+++ b/devtools/conda-envs/base.yaml
@@ -8,7 +8,7 @@ dependencies:
   - nomkl
   - python
   - pint>=0.10.0
-  - pydantic>=1.0.0
+  - pydantic>=1.5.0,!=1.6.0
 
     # Optional depends
   - msgpack-python
diff --git a/qcelemental/models/results.py b/qcelemental/models/results.py
index db1ff358..18db2a0a 100644
--- a/qcelemental/models/results.py
+++ b/qcelemental/models/results.py
@@ -105,9 +105,6 @@ class AtomicResultProperties(ProtoModel):
         description="The doubles portion of the MP2 correlation energy including same-spin and opposite-spin correlations.",
         units="E_h",
     )
-    mp2_total_correlation_energy: Optional[float] = Field(
-        None, description="The MP2 correlation energy."
-    )  # Old name, to be deprecated
     mp2_correlation_energy: Optional[float] = Field(
         None,
         description="The MP2 correlation energy.",
diff --git a/qcelemental/physical_constants/ureg.py b/qcelemental/physical_constants/ureg.py
index 9dead1ed..88cfc4e3 100644
--- a/qcelemental/physical_constants/ureg.py
+++ b/qcelemental/physical_constants/ureg.py
@@ -50,7 +50,7 @@ def build_units_registry(context):
     ureg.define("debye = 1e-18 * statcoulomb * cm = D")
 
     # Distance
-    ureg.define("bohr = {} * meter = bohr_radius = Bohr = a0 = au_length".format(phys_const["bohr radius"]["value"]))
+    ureg.define("bohr = {} * meter = bohr_radius = Bohr = au_length".format(phys_const["bohr radius"]["value"]))
     ureg.define("wavenumber = 1 / centimeter")
     ureg.define("Angstrom = angstrom")
 
diff --git a/qcelemental/tests/test_datum.py b/qcelemental/tests/test_datum.py
index c9acf24a..a16a5d75 100644
--- a/qcelemental/tests/test_datum.py
+++ b/qcelemental/tests/test_datum.py
@@ -135,3 +135,14 @@ def test_complex_array():
 
     dicary = datum1.dict()
     assert compare_recursive(ans, dicary, 9)
+
+
+def test_qc_units():
+    au2D = 2.541746451895025916414946904
+    au2Q = au2D * 0.52917721067
+
+    onedebye = qcel.Datum("CC dipole", "e a0", np.array([0, 0, 1 / au2D]))
+    onebuckingham = qcel.Datum("CC quadrupole", "e a0^2", np.array([0, 0, 1 / au2Q, 0, 0, 0, 0, 0, 0]).reshape((3, 3)))
+
+    assert compare_values(np.array([0, 0, 1.0]), onedebye.to_units("D"))
+    assert compare_values(np.array([[0, 0, 1.0], [0, 0, 0], [0, 0, 0]]), onebuckingham.to_units("D Å"))
diff --git a/setup.py b/setup.py
index e320bbcf..9926a9f4 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@
         package_data={'': [os.path.join('qcelemental', 'data', '*.json')]},
         setup_requires=[] + pytest_runner,
         python_requires='>=3.6',
-        install_requires=["numpy >= 1.12.0", "pint >= 0.10.0", "pydantic >= 1.5.0"],
+        install_requires=["numpy >= 1.12.0", "pint >= 0.10.0", "pydantic >=1.5.0,!=1.6.0"],
         extras_require={
             'docs': [
                 'numpydoc',

From 4d7df193ec00902eed55afcbd6163fc4675701ed Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 29 Jan 2021 13:51:54 -0500
Subject: [PATCH 09/10] rebase and put off QCSchema.yml GHZ

---
 .github/workflows/{QCSchema.yml => QCSchema.yml_wip} |  0
 qcelemental/conftest.py                              | 11 +++++++++++
 setup.py                                             |  4 +++-
 3 files changed, 14 insertions(+), 1 deletion(-)
 rename .github/workflows/{QCSchema.yml => QCSchema.yml_wip} (100%)

diff --git a/.github/workflows/QCSchema.yml b/.github/workflows/QCSchema.yml_wip
similarity index 100%
rename from .github/workflows/QCSchema.yml
rename to .github/workflows/QCSchema.yml_wip
diff --git a/qcelemental/conftest.py b/qcelemental/conftest.py
index 67fb4a5b..6ed70d8a 100644
--- a/qcelemental/conftest.py
+++ b/qcelemental/conftest.py
@@ -27,3 +27,14 @@ def pytest_runtest_setup(item):
         pytest.skip("can't run with --validate option")
     elif item.config.getoption("--validate", default=False) and not item.name.startswith("test_qcschema"):
         pytest.skip("need --validate option to run")
+
+
+# Uncomment below to probe for tests needing `@using_web`
+
+# import socket
+#
+# class block_network(socket.socket):
+#    def __init__(self, *args, **kwargs):
+#        raise Exception("Network call blocked")
+#
+# socket.socket = block_network
diff --git a/setup.py b/setup.py
index 9926a9f4..e04ba2ae 100644
--- a/setup.py
+++ b/setup.py
@@ -42,6 +42,7 @@
             'tests': [
                 'pytest >= 4.0.0',
                 'pytest-cov',
+                # 'jsonschema',  # needed for speciality `pytest --validate`
             ],
             'align': [
                 'networkx>=2.4.0',
@@ -56,8 +57,9 @@
             ],
         },
         tests_require=[
-            'pytest >= 3.9.1',
+            'pytest >= 4.0.0',
             'pytest-cov',
+            # 'jsonschema',  # needed for speciality `pytest --validate`
         ],
         classifiers=[
             'Development Status :: 4 - Beta',

From b970ecb9d6f76a4c0306c0dea6b99b3519e3a567 Mon Sep 17 00:00:00 2001
From: "Lori A. Burns" <lori.burns@gmail.com>
Date: Fri, 29 Jan 2021 22:20:39 -0500
Subject: [PATCH 10/10] CI on py39 and changelog

---
 .github/workflows/CI.yml  | 4 ++--
 docs/source/changelog.rst | 9 ++++++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 36aef371..61e2c43a 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -11,7 +11,7 @@ on:
 jobs:
   build:
 
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-latest
     strategy:
       matrix:
         cfg:
@@ -22,7 +22,7 @@ jobs:
             python-version: 3.6
             label: minpy
           - conda-env: base
-            python-version: 3.8
+            python-version: 3.9
             label: full
     env:
       PYVER: ${{ matrix.cfg.python-version }}
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 3107db90..9412a664 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -19,15 +19,22 @@ Changelog
 
 New Features
 ++++++++++++
+- (:pr:`247`) Exports models to JSON Schema with ``make schema``.
+- (:pr:`247`) Build bank of JSON examples from Pydantic models defined in tests. Test that bank against exported schema with ``pytest --validate qcelemental/``.
+- (:pr:`247`) Many model descriptions edited, dimensions added to array properties, ``AtomicInput.model.basis`` now takes
+  ``BasisSet`` object not just string, several properties added to match QCSchema, several limitations on number and
+  uniqueness imposed.
 
 Enhancements
 ++++++++++++
+- (:pr:`247`) Improve mypy conformance including dynamic provenance. Necessitates Pydantic >=1.5.
+- (:pr:`247`) ``a0`` without underscore added as computable pint unit.
 - (:pr:`246`) Removes types deprecated in NumPy v1.20.0.
 
 Bug Fixes
 +++++++++
 - (:pr:`244`) Fixes where in code validation throws if ``center_data`` missing from ``BasisSet`` model.
-- (:pr:`1000`) Fixes web tests that weren't marked.
+- (:pr:`249`) Fixes web tests that weren't marked.
 
 
 0.17.0 / 2020-10-01