Skip to content

Commit

Permalink
clarify handling of nan partial charges in to/from_openeye
Browse files Browse the repository at this point in the history
  • Loading branch information
j-wags committed Mar 24, 2020
1 parent 6130887 commit 184394c
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 17 deletions.
18 changes: 9 additions & 9 deletions docs/releasehistory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ Releases follow the ``major.minor.micro`` scheme recommended by `PEP440 <https:/
* ``minor`` increments add features but do not break API compatibility
* ``micro`` increments represent bugfix releases or improvements in documentation

0.6.1 - Bugfixes
----------------
0.7.0 - Current development
---------------------------

Behavior changed
""""""""""""""""
Expand All @@ -26,20 +26,20 @@ Behavior changed
``partial_charges`` attribute is set to ``None`` (the default value), calling ``to_openeye`` will
now produce a OE molecule with partial charges set to ``nan``. This would previously produce an OE
molecule with partial charges of 0.0, which was a loss of information, since it wouldn't be clear
whether the original OFFMol's partial charges were all-zero or ``None``. OpenEye toolkit
whether the original OFFMol's partial charges were REALLY all-zero as opposed to ``None``. OpenEye toolkit
wrapper methods such as ``from_smiles`` and ``from_file`` now produce OFFMols with
``partial_charges = None`` when appropriate (previously these would produce OFFMols with
all-zero charges).
- `PR #281 <https://github.com/openforcefield/openforcefield/pull/281>`_: Per the new SDF
partial charge specification adopted by RDKit, ``Molecule.to_rdkit``
all-zero charges, for the same reasoning as above).
- `PR #281 <https://github.com/openforcefield/openforcefield/pull/281>`_: ``Molecule.to_rdkit``
now sets partial charges on the RDAtom's ``PartialCharges`` property (this was previously set
on the ``partial_charges`` property). If the OFFMol's partial_charges attribute is None, this property
will not be defined.
will not be defined on the RDAtoms.
- `PR #281 <https://github.com/openforcefield/openforcefield/pull/281>`_:
Enforce the behavior during SDF I/O that a SDF may contain multiple MOLECULES, but that the OFF Toolkit
will NEVER assume that it contains multiple CONFORMERS of the same molecule. This is an
DOES NOT assume that it contains multiple CONFORMERS of the same molecule. This is an
important distinction, since otherwise there is ambiguity around whether properties of one
entry in a SDF are shared among several molecule blocks or not (More info
entry in a SDF are shared among several molecule blocks or not, or how to resolve conflicts if properties
are defined differently for several "conformers" of chemically-identical species (More info
`here <https://docs.eyesopen.com/toolkits/python/oechemtk/oemol.html#dude-where-s-my-sd-data>`_.
If the user requests the OFF
Toolkit to write a multi-conformer ``Molecule`` to SDF, only the first conformer will be written.
Expand Down
38 changes: 38 additions & 0 deletions openforcefield/tests/test_toolkits.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def test_smiles(self):
smiles = '[H]C([H])([H])C([H])([H])[H]'
molecule = Molecule.from_smiles(smiles,
toolkit_registry=toolkit_wrapper)
# When creating an OFFMol from SMILES, partial charges should be initialized to None
assert molecule.partial_charges is None
smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
assert smiles == smiles2

Expand Down Expand Up @@ -217,6 +219,40 @@ def test_to_from_openeye_core_props_unset(self):

assert molecule2.to_smiles(toolkit_registry=toolkit_wrapper) == expected_output_smiles

@pytest.mark.skipif(not OpenEyeToolkitWrapper.is_available(), reason='OpenEye Toolkit not available')
def test_to_from_openeye_none_partial_charges(self):
"""Test to ensure that to_openeye and from_openeye correctly handle None partial charges"""
import math
# Create ethanol, which has partial charges defined with float values
ethanol = create_ethanol()
assert ethanol.partial_charges is not None
# Convert to OEMol, which should populate the partial charges on
# the OEAtoms with the same partial charges
oemol = ethanol.to_openeye()
for oeatom in oemol.GetAtoms():
assert not math.isnan(oeatom.GetPartialCharge())
# Change the first OEAtom's partial charge to nan, and ensure that it comes
# back to OFFMol with only the first atom as nan
for oeatom in oemol.GetAtoms():
oeatom.SetPartialCharge(float('nan'))
break
eth_from_oe = Molecule.from_openeye(oemol)
assert math.isnan(eth_from_oe.partial_charges[0] / unit.elementary_charge)
for pc in eth_from_oe.partial_charges[1:]:
assert not math.isnan(pc / unit.elementary_charge)
# Then, set all the OEMol's partial charges to nan, and ensure that
# from_openeye produces an OFFMol with partial_charges = None
for oeatom in oemol.GetAtoms():
oeatom.SetPartialCharge(float('nan'))
eth_from_oe = Molecule.from_openeye(oemol)
assert eth_from_oe.partial_charges is None

# Send the OFFMol with partial_charges = None back to OEMol, and
# ensure that all its charges are nan
oemol2 = eth_from_oe.to_openeye()
for oeatom in oemol2.GetAtoms():
assert math.isnan(oeatom.GetPartialCharge())


@pytest.mark.skipif(not OpenEyeToolkitWrapper.is_available(), reason='OpenEye Toolkit not available')
def test_from_openeye_implicit_hydrogen(self):
Expand Down Expand Up @@ -824,6 +860,8 @@ def test_smiles(self):
smiles = '[H][C]([H])([H])[C]([H])([H])[H]'
molecule = Molecule.from_smiles(smiles,
toolkit_registry=toolkit_wrapper)
# When making a molecule from SMILES, partial charges should be initialized to None
assert molecule.partial_charges is None
smiles2 = molecule.to_smiles(toolkit_registry=toolkit_wrapper)
#print(smiles, smiles2)
assert smiles == smiles2
Expand Down
43 changes: 35 additions & 8 deletions openforcefield/utils/toolkits.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,8 @@ def to_file(self, molecule, file_path, file_format):

# OFFTK strictly treats SDF as a single-conformer format.
# We need to override OETK's behavior here if the user is saving a multiconformer molecule.
# Delete all but the first conformer if writing to SDF.

# Remove all but the first conformer when writing to SDF as we only support single conformer format
if (file_format.lower() == "sdf") and oemol.NumConfs() > 1:
conf1 = [conf for conf in oemol.GetConfs()][0]
flat_coords = list()
Expand Down Expand Up @@ -754,6 +755,18 @@ def from_openeye(oemol, allow_undefined_stereo=False):
Create a Molecule from an OpenEye molecule. If the OpenEye molecule has
implicit hydrogens, this function will make them explicit.
``OEAtom`` s have a different set of allowed value for partial charges than
``openforcefield.topology.Molecule`` s. In the OpenEye toolkits, partial charges
are stored on individual ``OEAtom`` s, and their values are initialized to ``0.0``.
In the Open Force Field Toolkit, an ``openforcefield.topology.Molecule``'s
``partial_charges`` attribute is initialized to ``None`` and can be set to a
``simtk.unit.Quantity``-wrapped numpy array with units of
elementary charge. The Open Force
Field Toolkit considers an ``OEMol`` where every ``OEAtom`` has a partial
charge of ``float('nan')`` to be equivalent to an Open Force Field Molecule's
``partial_charges = None``.
This assumption is made in both ``to_openeye`` and ``from_openeye``.
.. warning :: This API is experimental and subject to change.
Parameters
Expand Down Expand Up @@ -928,20 +941,22 @@ def describe_oeatom(oeatom):
np.zeros(molecule.n_atoms, dtype=np.float),
unit=unit.elementary_charge)

any_partial_charge_is_nan = False
# If all OEAtoms have a partial charge of NaN, then the OFFMol should
# have its partial_charges attribute set to None
any_partial_charge_is_not_nan = False
for oe_idx, oe_atom in enumerate(oemol.GetAtoms()):
off_idx = map_atoms[oe_idx]
unitless_charge = oe_atom.GetPartialCharge()
if math.isnan(unitless_charge):
any_partial_charge_is_nan = True
break
if not math.isnan(unitless_charge):
any_partial_charge_is_not_nan = True
#break
charge = unitless_charge * unit.elementary_charge
partial_charges[off_idx] = charge

if any_partial_charge_is_nan:
molecule.partial_charges = None
else:
if any_partial_charge_is_not_nan:
molecule.partial_charges = partial_charges
else:
molecule.partial_charges = None

return molecule

Expand All @@ -950,6 +965,18 @@ def to_openeye(molecule, aromaticity_model=DEFAULT_AROMATICITY_MODEL):
"""
Create an OpenEye molecule using the specified aromaticity model
``OEAtom`` s have a different set of allowed value for partial charges than
``openforcefield.topology.Molecule`` s. In the OpenEye toolkits, partial charges
are stored on individual ``OEAtom`` s, and their values are initialized to ``0.0``.
In the Open Force Field Toolkit, an ``openforcefield.topology.Molecule``'s
``partial_charges`` attribute is initialized to ``None`` and can be set to a
``simtk.unit.Quantity``-wrapped numpy array with units of
elementary charge. The Open Force
Field Toolkit considers an ``OEMol`` where every ``OEAtom`` has a partial
charge of ``float('nan')`` to be equivalent to an Open Force Field Molecule's
``partial_charges = None``.
This assumption is made in both ``to_openeye`` and ``from_openeye``.
.. todo ::
* Should the aromaticity model be specified in some other way?
Expand Down

0 comments on commit 184394c

Please sign in to comment.