diff --git a/doc/source/tutorials/hydration_freenrg.rst b/doc/source/tutorials/hydration_freenrg.rst index 2b9a416d6..f6f139d33 100644 --- a/doc/source/tutorials/hydration_freenrg.rst +++ b/doc/source/tutorials/hydration_freenrg.rst @@ -53,9 +53,8 @@ possible to visualise the mapping: >>> BSS.Align.viewMapping(ethane, methanol, mapping) -The two molecules are displayed side-by-side with numbers used to indicate -which atoms map to each other. For eample, the carbon indexed 0 in the ethane -molecule maps to a hydrogen indexed 4 in the methanol. +Perturbed bonds and elements are shown in blue. Red atoms are those that are unique to +a particular end state. .. image:: images/ethane_methanol_mapping.png :alt: Mapping between atoms in ethane (left) and methanol (right) for an alchemical transformation. diff --git a/doc/source/tutorials/images/ethane_methanol_mapping.png b/doc/source/tutorials/images/ethane_methanol_mapping.png index d3ea2eaa5..b9e185c95 100644 Binary files a/doc/source/tutorials/images/ethane_methanol_mapping.png and b/doc/source/tutorials/images/ethane_methanol_mapping.png differ diff --git a/python/BioSimSpace/Parameters/_Protocol/_amber.py b/python/BioSimSpace/Parameters/_Protocol/_amber.py index bdddd0851..6694cf572 100644 --- a/python/BioSimSpace/Parameters/_Protocol/_amber.py +++ b/python/BioSimSpace/Parameters/_Protocol/_amber.py @@ -310,15 +310,24 @@ def run(self, molecule, work_dir=None, queue=None): if work_dir is None: work_dir = _os.getcwd() - # Flag whether the molecule is a SMILES string. + # Try to create a molecule from the SMILES string. if isinstance(molecule, str): - is_smiles = True - else: - is_smiles = False + try: + smiles = molecule + molecule = _smiles(molecule) + edit_mol = molecule._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + molecule._sire_object = edit_mol.commit() + except Exception as e: + msg = "Unable to convert SMILES to Molecule using RDKit." + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise _ThirdPartyError(msg) from e + else: + raise _ThirdPartyError(msg) from None - if not is_smiles: - # Create a copy of the molecule. - new_mol = molecule.copy() + # Create a copy of the molecule. + new_mol = molecule.copy() # Choose the program to run with depending on the force field compatibility. # If tLEaP and pdb2gmx are supported, default to tLEaP, then use pdb2gmx if @@ -328,8 +337,7 @@ def run(self, molecule, work_dir=None, queue=None): if self._tleap: if _tleap_exe is not None: output = self._run_tleap(molecule, str(work_dir)) - if not is_smiles: - new_mol._ion_water_model = self._water_model + new_mol._ion_water_model = self._water_model # Otherwise, try using pdb2gmx. elif self._pdb2gmx: if _gmx_exe is not None: @@ -371,41 +379,23 @@ def run(self, molecule, work_dir=None, queue=None): # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. - if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(molecule).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = edit_mol.residue(_SireMol.ResIdx(0)).rename(resname).molecule() - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() - + if self._ensure_compatible: + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, + ) else: - if self._ensure_compatible: + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield @@ -421,33 +411,18 @@ def _run_tleap(self, molecule, work_dir): Parameters ---------- - molecule : :class:`Molecule `, str - The molecule to parameterise, either as a Molecule object or SMILES - string. + molecule : :class:`Molecule ` + The molecule to parameterise. work_dir : str The working directory. """ - # Convert SMILES to a molecule. - if isinstance(molecule, str): - try: - _molecule = _smiles(molecule) - except Exception as e: - msg = "Unable to convert SMILES to Molecule using RDKit." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise _ThirdPartyError(msg) from e - else: - raise _ThirdPartyError(msg) from None - else: - _molecule = molecule - # Write the system to a PDB file. try: # LEaP expects residue numbering to be ascending and continuous. renumbered_molecule = _SireIO.renumberConstituents( - _molecule.toSystem()._sire_object + molecule.toSystem()._sire_object )[0] renumbered_molecule = _Molecule(renumbered_molecule) _IO.saveMolecules( @@ -570,9 +545,8 @@ def _run_pdb2gmx(self, molecule, work_dir): Parameters ---------- - molecule : :class:`Molecule `, str - The molecule to parameterise, either as a Molecule object or SMILES - string. + molecule : :class:`Molecule ` + The molecule to parameterise. work_dir : str The working directory. @@ -587,25 +561,11 @@ def _run_pdb2gmx(self, molecule, work_dir): "'pdb2gmx' does not support the '%s' force field." % self._forcefield ) - # Convert SMILES to a molecule. - if isinstance(molecule, str): - try: - _molecule = _smiles(molecule) - except Exception as e: - msg = "Unable to convert SMILES to Molecule using RDKit." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise _ThirdPartyError(msg) from e - else: - raise _ThirdPartyError(msg) from None - else: - _molecule = molecule - # Write the system to a PDB file. try: _IO.saveMolecules( _os.path.join(str(work_dir), "input"), - _molecule, + molecule, "pdb", property_map=self._property_map, ) @@ -1007,11 +967,14 @@ def run(self, molecule, work_dir=None, queue=None): if work_dir is None: work_dir = _os.getcwd() - # Convert SMILES to a molecule. + # Try to create a molecule from the SMILES string. if isinstance(molecule, str): - is_smiles = True try: - new_mol = _smiles(molecule) + smiles = molecule + molecule = _smiles(molecule) + edit_mol = molecule._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + molecule._sire_object = edit_mol.commit() except Exception as e: msg = "Unable to convert SMILES to Molecule using RDKit." if _isVerbose(): @@ -1019,9 +982,9 @@ def run(self, molecule, work_dir=None, queue=None): raise _ThirdPartyError(msg) from e else: raise _ThirdPartyError(msg) from None - else: - is_smiles = False - new_mol = molecule.copy() + + # Create a copy of the molecule. + new_mol = molecule.copy() # Use the net molecular charge passed as an option. if self._net_charge is not None: @@ -1251,45 +1214,23 @@ def run(self, molecule, work_dir=None, queue=None): # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. - if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(molecule).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = ( - edit_mol.residue(_SireMol.ResIdx(0)) - .rename(resname) - .molecule() + if self._ensure_compatible: + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, ) - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() - else: - if self._ensure_compatible: + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = ff diff --git a/python/BioSimSpace/Parameters/_Protocol/_openforcefield.py b/python/BioSimSpace/Parameters/_Protocol/_openforcefield.py index 4ac3a6336..23c51cc8e 100644 --- a/python/BioSimSpace/Parameters/_Protocol/_openforcefield.py +++ b/python/BioSimSpace/Parameters/_Protocol/_openforcefield.py @@ -216,56 +216,43 @@ def run(self, molecule, work_dir=None, queue=None): if work_dir is None: work_dir = _os.getcwd() - # Flag whether the molecule is a SMILES string. + # Try to create BioSimSpace molecule from the SMILES string. if isinstance(molecule, str): is_smiles = True - else: - is_smiles = False - - if is_smiles: - # Convert SMILES string to an OpenFF molecule. - try: - off_molecule = _OpenFFMolecule.from_smiles(molecule) - except Exception as e: - msg = "Failed to convert SMILES to Open Force Field Molecule." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise IOError(msg) from e - else: - raise IOError(msg) from None - - # Generate a single conformer. - try: - off_molecule.generate_conformers(n_conformers=1) - except Exception as e: - msg = "Unable to generate conformer from Open Force Field molecule." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise IOError(msg) from e - else: - raise IOError(msg) from None - else: - # Try converting to RDKit format. - try: - rdmol = _Convert.toRDKit(molecule, property_map=self._property_map) - except Exception as e: - msg = "Failed to convert molecule to RDKit format." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise (msg) from e - else: - raise _ConversionError(msg) from None - - # Create the Open Forcefield Molecule from the RDKit molecule. try: - off_molecule = _OpenFFMolecule.from_rdkit(rdmol) + smiles = molecule + molecule = _Convert.smiles(molecule) except Exception as e: - msg = "Unable to create OpenFF Molecule!" + msg = "Unable to convert SMILES to Molecule using RDKit." if _isVerbose(): msg += ": " + getattr(e, "message", repr(e)) raise _ThirdPartyError(msg) from e else: raise _ThirdPartyError(msg) from None + else: + is_smiles = False + + # Try converting to RDKit format. + try: + rdmol = _Convert.toRDKit(molecule, property_map=self._property_map) + except Exception as e: + msg = "Failed to convert molecule to RDKit format." + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise (msg) from e + else: + raise _ConversionError(msg) from None + + # Create the Open Forcefield Molecule from the RDKit molecule. + try: + off_molecule = _OpenFFMolecule.from_rdkit(rdmol) + except Exception as e: + msg = "Unable to create OpenFF Molecule!" + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise _ThirdPartyError(msg) from e + else: + raise _ThirdPartyError(msg) from None # Apply AM1-BCC charges using NAGL. if _has_nagl and self._use_nagl: @@ -353,50 +340,32 @@ def run(self, molecule, work_dir=None, queue=None): else: raise IOError(msg) from None - # Make the parameterised molecule compatible with the original topology. + # Make sure we retain stereochemistry information from the SMILES string. if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - # Since the name is written to topology file formats, we - # need to ensure that it doesn't start with an [ character, - # which would break GROMACS. - name = molecule - if name.startswith("["): - name = f"smiles:{name}" - + new_mol = _Convert.smiles(smiles) edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(name).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = edit_mol.residue(_SireMol.ResIdx(0)).rename(resname).molecule() - - # Commit the changes. + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() new_mol._sire_object = edit_mol.commit() + # Make the parameterised molecule compatible with the original topology. + if self._ensure_compatible: + new_mol = molecule.copy() + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, + ) else: - if self._ensure_compatible: - new_mol = molecule.copy() + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield diff --git a/python/BioSimSpace/Process/_amber.py b/python/BioSimSpace/Process/_amber.py index aec4bc743..5c3a5b602 100644 --- a/python/BioSimSpace/Process/_amber.py +++ b/python/BioSimSpace/Process/_amber.py @@ -262,12 +262,11 @@ def _setup(self, **kwargs): # Create a copy of the system. system = self._system.copy() + reference_system = self._reference_system.copy() # Convert the water model topology so that it matches the AMBER naming convention. system._set_water_topology("AMBER", property_map=self._property_map) - self._reference_system._set_water_topology( - "AMBER", property_map=self._property_map - ) + reference_system._set_water_topology("AMBER", property_map=self._property_map) # Create the squashed system. if isinstance(self._protocol, _FreeEnergyMixin): @@ -323,7 +322,7 @@ def _setup(self, **kwargs): else: # Check for perturbable molecules and convert to the chosen end state. system = self._checkPerturbable(system) - reference_system = self._checkPerturbable(self._reference_system) + reference_system = self._checkPerturbable(reference_system) # RST file (coordinates). try: @@ -340,7 +339,7 @@ def _setup(self, **kwargs): try: file = _os.path.splitext(self._ref_file)[0] _IO.saveMolecules( - file, self._reference_system, "rst7", property_map=self._property_map + file, reference_system, "rst7", property_map=self._property_map ) except Exception as e: msg = "Failed to write reference system to 'RST7' format." diff --git a/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_amber.py b/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_amber.py index bdddd0851..6694cf572 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_amber.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_amber.py @@ -310,15 +310,24 @@ def run(self, molecule, work_dir=None, queue=None): if work_dir is None: work_dir = _os.getcwd() - # Flag whether the molecule is a SMILES string. + # Try to create a molecule from the SMILES string. if isinstance(molecule, str): - is_smiles = True - else: - is_smiles = False + try: + smiles = molecule + molecule = _smiles(molecule) + edit_mol = molecule._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + molecule._sire_object = edit_mol.commit() + except Exception as e: + msg = "Unable to convert SMILES to Molecule using RDKit." + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise _ThirdPartyError(msg) from e + else: + raise _ThirdPartyError(msg) from None - if not is_smiles: - # Create a copy of the molecule. - new_mol = molecule.copy() + # Create a copy of the molecule. + new_mol = molecule.copy() # Choose the program to run with depending on the force field compatibility. # If tLEaP and pdb2gmx are supported, default to tLEaP, then use pdb2gmx if @@ -328,8 +337,7 @@ def run(self, molecule, work_dir=None, queue=None): if self._tleap: if _tleap_exe is not None: output = self._run_tleap(molecule, str(work_dir)) - if not is_smiles: - new_mol._ion_water_model = self._water_model + new_mol._ion_water_model = self._water_model # Otherwise, try using pdb2gmx. elif self._pdb2gmx: if _gmx_exe is not None: @@ -371,41 +379,23 @@ def run(self, molecule, work_dir=None, queue=None): # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. - if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(molecule).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = edit_mol.residue(_SireMol.ResIdx(0)).rename(resname).molecule() - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() - + if self._ensure_compatible: + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, + ) else: - if self._ensure_compatible: + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield @@ -421,33 +411,18 @@ def _run_tleap(self, molecule, work_dir): Parameters ---------- - molecule : :class:`Molecule `, str - The molecule to parameterise, either as a Molecule object or SMILES - string. + molecule : :class:`Molecule ` + The molecule to parameterise. work_dir : str The working directory. """ - # Convert SMILES to a molecule. - if isinstance(molecule, str): - try: - _molecule = _smiles(molecule) - except Exception as e: - msg = "Unable to convert SMILES to Molecule using RDKit." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise _ThirdPartyError(msg) from e - else: - raise _ThirdPartyError(msg) from None - else: - _molecule = molecule - # Write the system to a PDB file. try: # LEaP expects residue numbering to be ascending and continuous. renumbered_molecule = _SireIO.renumberConstituents( - _molecule.toSystem()._sire_object + molecule.toSystem()._sire_object )[0] renumbered_molecule = _Molecule(renumbered_molecule) _IO.saveMolecules( @@ -570,9 +545,8 @@ def _run_pdb2gmx(self, molecule, work_dir): Parameters ---------- - molecule : :class:`Molecule `, str - The molecule to parameterise, either as a Molecule object or SMILES - string. + molecule : :class:`Molecule ` + The molecule to parameterise. work_dir : str The working directory. @@ -587,25 +561,11 @@ def _run_pdb2gmx(self, molecule, work_dir): "'pdb2gmx' does not support the '%s' force field." % self._forcefield ) - # Convert SMILES to a molecule. - if isinstance(molecule, str): - try: - _molecule = _smiles(molecule) - except Exception as e: - msg = "Unable to convert SMILES to Molecule using RDKit." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise _ThirdPartyError(msg) from e - else: - raise _ThirdPartyError(msg) from None - else: - _molecule = molecule - # Write the system to a PDB file. try: _IO.saveMolecules( _os.path.join(str(work_dir), "input"), - _molecule, + molecule, "pdb", property_map=self._property_map, ) @@ -1007,11 +967,14 @@ def run(self, molecule, work_dir=None, queue=None): if work_dir is None: work_dir = _os.getcwd() - # Convert SMILES to a molecule. + # Try to create a molecule from the SMILES string. if isinstance(molecule, str): - is_smiles = True try: - new_mol = _smiles(molecule) + smiles = molecule + molecule = _smiles(molecule) + edit_mol = molecule._sire_object.edit() + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() + molecule._sire_object = edit_mol.commit() except Exception as e: msg = "Unable to convert SMILES to Molecule using RDKit." if _isVerbose(): @@ -1019,9 +982,9 @@ def run(self, molecule, work_dir=None, queue=None): raise _ThirdPartyError(msg) from e else: raise _ThirdPartyError(msg) from None - else: - is_smiles = False - new_mol = molecule.copy() + + # Create a copy of the molecule. + new_mol = molecule.copy() # Use the net molecular charge passed as an option. if self._net_charge is not None: @@ -1251,45 +1214,23 @@ def run(self, molecule, work_dir=None, queue=None): # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. - if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(molecule).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = ( - edit_mol.residue(_SireMol.ResIdx(0)) - .rename(resname) - .molecule() + if self._ensure_compatible: + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, ) - - # Commit the changes. - new_mol._sire_object = edit_mol.commit() - else: - if self._ensure_compatible: + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = ff diff --git a/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_openforcefield.py b/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_openforcefield.py index 4ac3a6336..23c51cc8e 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_openforcefield.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Parameters/_Protocol/_openforcefield.py @@ -216,56 +216,43 @@ def run(self, molecule, work_dir=None, queue=None): if work_dir is None: work_dir = _os.getcwd() - # Flag whether the molecule is a SMILES string. + # Try to create BioSimSpace molecule from the SMILES string. if isinstance(molecule, str): is_smiles = True - else: - is_smiles = False - - if is_smiles: - # Convert SMILES string to an OpenFF molecule. - try: - off_molecule = _OpenFFMolecule.from_smiles(molecule) - except Exception as e: - msg = "Failed to convert SMILES to Open Force Field Molecule." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise IOError(msg) from e - else: - raise IOError(msg) from None - - # Generate a single conformer. - try: - off_molecule.generate_conformers(n_conformers=1) - except Exception as e: - msg = "Unable to generate conformer from Open Force Field molecule." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise IOError(msg) from e - else: - raise IOError(msg) from None - else: - # Try converting to RDKit format. - try: - rdmol = _Convert.toRDKit(molecule, property_map=self._property_map) - except Exception as e: - msg = "Failed to convert molecule to RDKit format." - if _isVerbose(): - msg += ": " + getattr(e, "message", repr(e)) - raise (msg) from e - else: - raise _ConversionError(msg) from None - - # Create the Open Forcefield Molecule from the RDKit molecule. try: - off_molecule = _OpenFFMolecule.from_rdkit(rdmol) + smiles = molecule + molecule = _Convert.smiles(molecule) except Exception as e: - msg = "Unable to create OpenFF Molecule!" + msg = "Unable to convert SMILES to Molecule using RDKit." if _isVerbose(): msg += ": " + getattr(e, "message", repr(e)) raise _ThirdPartyError(msg) from e else: raise _ThirdPartyError(msg) from None + else: + is_smiles = False + + # Try converting to RDKit format. + try: + rdmol = _Convert.toRDKit(molecule, property_map=self._property_map) + except Exception as e: + msg = "Failed to convert molecule to RDKit format." + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise (msg) from e + else: + raise _ConversionError(msg) from None + + # Create the Open Forcefield Molecule from the RDKit molecule. + try: + off_molecule = _OpenFFMolecule.from_rdkit(rdmol) + except Exception as e: + msg = "Unable to create OpenFF Molecule!" + if _isVerbose(): + msg += ": " + getattr(e, "message", repr(e)) + raise _ThirdPartyError(msg) from e + else: + raise _ThirdPartyError(msg) from None # Apply AM1-BCC charges using NAGL. if _has_nagl and self._use_nagl: @@ -353,50 +340,32 @@ def run(self, molecule, work_dir=None, queue=None): else: raise IOError(msg) from None - # Make the parameterised molecule compatible with the original topology. + # Make sure we retain stereochemistry information from the SMILES string. if is_smiles: - new_mol = par_mol - - # We'll now add MolName and ResName info to the molecule, since - # this will be missing. - - # Rename the molecule with the original SMILES string. - # Since the name is written to topology file formats, we - # need to ensure that it doesn't start with an [ character, - # which would break GROMACS. - name = molecule - if name.startswith("["): - name = f"smiles:{name}" - + new_mol = _Convert.smiles(smiles) edit_mol = new_mol._sire_object.edit() - edit_mol = edit_mol.rename(name).molecule() - - # Rename the residue LIG. - resname = _SireMol.ResName("LIG") - edit_mol = edit_mol.residue(_SireMol.ResIdx(0)).rename(resname).molecule() - - # Commit the changes. + edit_mol = edit_mol.rename(f"smiles:{smiles}").molecule() new_mol._sire_object = edit_mol.commit() + # Make the parameterised molecule compatible with the original topology. + if self._ensure_compatible: + new_mol = molecule.copy() + new_mol.makeCompatibleWith( + par_mol, + property_map=self._property_map, + overwrite=True, + verbose=False, + ) else: - if self._ensure_compatible: - new_mol = molecule.copy() + try: new_mol.makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False, ) - else: - try: - new_mol.makeCompatibleWith( - par_mol, - property_map=self._property_map, - overwrite=True, - verbose=False, - ) - except: - new_mol = par_mol + except: + new_mol = par_mol # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield diff --git a/tests/Parameters/test_parameters.py b/tests/Parameters/test_parameters.py index 8acd41ff7..e9c07cc13 100644 --- a/tests/Parameters/test_parameters.py +++ b/tests/Parameters/test_parameters.py @@ -137,3 +137,33 @@ def test_leap_commands(molecule0): assert line_pre[-1] < line_post[0] for x in range(len(line_post) - 1): assert line_post[x] < line_post[x + 1] + + +@pytest.mark.skipif( + has_antechamber is False or has_tleap is False, + reason="Requires AmberTools/antechamber and tLEaP to be installed.", +) +def test_smiles_stereo(): + """ + Test that SMILES string stereochemistry is correctly preserved when + parameterising a molecule. + """ + + from rdkit import Chem + + # Define the SMILES string. + smiles = "CC[C@@H](O)C" + + # Create the parameterised molecule. + mol = BSS.Parameters.gaff(smiles).getMolecule() + + # Convert to RDKit format. + rdmol0 = Chem.MolFromSmiles(smiles) + rdmol1 = BSS.Convert.toRDKit(mol) + + # Get the SMILES string. + rdmol0_smiles = Chem.MolToSmiles(rdmol0) + rdmol1_smiles = Chem.MolToSmiles(Chem.RemoveHs(rdmol1)) + + # Make sure the SMILES strings are the same. + assert rdmol0_smiles == rdmol1_smiles diff --git a/tests/Sandpit/Exscientia/Parameters/test_parameters.py b/tests/Sandpit/Exscientia/Parameters/test_parameters.py index f42f2d539..00bb088b2 100644 --- a/tests/Sandpit/Exscientia/Parameters/test_parameters.py +++ b/tests/Sandpit/Exscientia/Parameters/test_parameters.py @@ -142,3 +142,33 @@ def test_leap_commands(molecule0): assert line_pre[-1] < line_post[0] for x in range(len(line_post) - 1): assert line_post[x] < line_post[x + 1] + + +@pytest.mark.skipif( + has_antechamber is False or has_tleap is False, + reason="Requires AmberTools/antechamber and tLEaP to be installed.", +) +def test_smiles_stereo(): + """ + Test that SMILES string stereochemistry is correctly preserved when + parameterising a molecule. + """ + + from rdkit import Chem + + # Define the SMILES string. + smiles = "CC[C@@H](O)C" + + # Create the parameterised molecule. + mol = BSS.Parameters.gaff(smiles).getMolecule() + + # Convert to RDKit format. + rdmol0 = Chem.MolFromSmiles(smiles) + rdmol1 = BSS.Convert.toRDKit(mol) + + # Get the SMILES string. + rdmol0_smiles = Chem.MolToSmiles(rdmol0) + rdmol1_smiles = Chem.MolToSmiles(Chem.RemoveHs(rdmol1)) + + # Make sure the SMILES strings are the same. + assert rdmol0_smiles == rdmol1_smiles