From 6bae2d54162f9f20204655fd1c09a1ba1ee50320 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 8 Aug 2023 09:38:50 +0100 Subject: [PATCH 01/19] Update _restraint.py to handle multiple distance restraints This includes specifying the format of the restraints dictionary and implementing the numerical correction. --- .../Exscientia/FreeEnergy/_restraint.py | 353 +++++++++++++++--- 1 file changed, 295 insertions(+), 58 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py index 53d71a177..15b570379 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py @@ -48,8 +48,13 @@ class Restraint: - """The Restraint class which holds the restraint information for the ABFE - calculations. Currently only Boresch type restraint is supported. + """ + The Restraint class which holds the restraint information for the ABFE + calculations. Currently Boresch restaraints and multiple distance restraints + (between pairs of atoms) are supported. For the multiple distance restraints, + it is assumed that all restraints but one will be released after decoupling, + and an analytical correction will be applied to account for releasing the last + restraint. Boresch restraint is a set of harmonic restraints containing one bond, two angle and three dihedrals, which comes from three atoms in the ligand @@ -62,7 +67,7 @@ class Restraint: Angles: r2-r1-l1 (thetaA0, kthetaA), r1-l1-l2 (thetaB0, kthetaB) Dihedrals: r3-r2-r1-l1 (phiA0, kphiA), r2-r1-l1-l2 (phiB0, kphiB), r1-l1-l2-l3 (phiC0, kphiC) - The restraint_dict has the following compact format. + The Boresch restraint_dict has the following compact format. restraint_dict = { "anchor_points":{"r1": BioSimSpace._SireWrappers.Atom, @@ -83,8 +88,34 @@ class Restraint: "kphiA": BioSimSpace.Types.Energy / (BioSimSpace.Types.Area * BioSimSpace.Types.Area), "kphiB": BioSimSpace.Types.Energy / (BioSimSpace.Types.Area * BioSimSpace.Types.Area), "kphiC": BioSimSpace.Types.Energy / (BioSimSpace.Types.Area * BioSimSpace.Types.Area)}} + + The multiple distance restraints are flat-bottom restraints, which are represented as a dictionaries + of the form: + + distance_restraint_dict = {rl: BioSimSpace._SireWrappers.Atom, + l1: BioSimSpace._SireWrappers.Atom, + r0: BioSimSpace.Types.Length, + kr: BioSimSpace.Types.Energy / BioSimSpace.Types.Area, + r_fb: BioSimSpace.Types.Length} + + where r1 and l1 are the atoms involved in the restraint in the receptor and the ligand respectively, + r0 and kr are the equilibrium distance and the force constant, and r_fb is the flat bottom radius. + + The overall restraint_dict is a dictionary of the form: + + restraint_dict = {"distance_restraints": [distance_restraint_dict, ...], + "permanent_distance_restraint": distance_restraint_dict} + + where distance_restraints is a list of distance restraints which are released after decoupling, + and permanent_distance_restraint is the distance restraint which is not released after decoupling. """ + # Create a dict of supported restraints and compatible engines. + supported_restraints = { + "boresch": ["gromacs", "somd"], + "multiple_distance": ["somd"], + } + def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch"): """ Constructor. @@ -102,7 +133,7 @@ def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch" The temperature of the system restraint_type : str - The type of the restraint. (`Boresch`, ) + The type of the restraint. (`Boresch`, `multiple_distance`) """ if not isinstance(temperature, _Temperature): raise ValueError( @@ -208,10 +239,56 @@ def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch" " values further from 0 or pi radians." ) + elif restraint_type.lower() == "multiple_distance": + self._restraint_type = "multiple_distance" + # Warn the user if there are no distance restraints (although they may be deliberately supplying + # only the permanent distance restraint) + if len(restraint_dict["distance_restraints"]) == 0: + _warnings.warn( + "No distance restraints have been specified other than the permanent distance restraint." + ) + + # Check each distance restraint is of the correct format + all_restraints = restraint_dict["distance_restraints"] + [ + restraint_dict["permanent_distance_restraint"] + ] + for single_restraint_dict in all_restraints: + # Check that the atoms are of type BioSimSpace._SireWrappers.Atom + for key in ["r1", "l1"]: + if not isinstance( + single_restraint_dict["anchor_points"][key], _Atom + ): + raise ValueError( + f"distance_restraint_dict['{key}'] " + "must be of type 'BioSimSpace._SireWrappers.Atom'" + ) + + # Test that all quantities have the correct units + for key in ["r0", "r_fb"]: + if not isinstance(single_restraint_dict[key], _Length): + raise ValueError( + f"distance_restraint_dict['{key}'] must be of type " + "'BioSimSpace.Types.Length'" + ) + if not single_restraint_dict["kr"].dimensions() == ( + 0, + 0, + 0, + 1, + -1, + 0, + -2, + ): + raise ValueError( + "distance_restraint_dict['kr'] must be of type " + "'BioSimSpace.Types.Energy'/'BioSimSpace.Types.Length^2'" + ) + else: raise NotImplementedError( f"Restraint type {type} not implemented " - f"yet. Only Boresch restraint is supported." + f"yet. Only {Restraint.supported_restraints.keys()} " + "are currently supported." ) self._restraint_dict = restraint_dict @@ -236,33 +313,55 @@ def system(self, system): raise TypeError( "'system' must be of type 'BioSimSpace._SireWrappers.System'" ) - else: - if self._restraint_type == "boresch": - # Check if the ligand atoms are decoupled. - # Find the decoupled molecule, assume that only one can be - # decoupled. - (decoupled_mol,) = system.getDecoupledMolecules() - for key in ["l1", "l2", "l3"]: - atom = self._restraint_dict["anchor_points"][key] - # Discussed in https://github.com/michellab/BioSimSpace/pull/337 - if ( - atom._sire_object.molecule().number() - != decoupled_mol._sire_object.number() - ): - raise ValueError( - f"The ligand atom {key} is not from decoupled moleucle." - ) - for key in ["r1", "r2", "r3"]: - atom = self._restraint_dict["anchor_points"][key] - if not atom in system: - raise ValueError( - f"The protein atom {key} is not in the system." - ) - # Store a copy of solvated system. - self._system = system.copy() + if self._restraint_type == "boresch": + # Check if the ligand atoms are decoupled. + # Find the decoupled molecule, assume that only one can be + # decoupled. + (decoupled_mol,) = system.getDecoupledMolecules() + for key in ["l1", "l2", "l3"]: + atom = self._restraint_dict["anchor_points"][key] + # Discussed in https://github.com/michellab/BioSimSpace/pull/337 + if ( + atom._sire_object.molecule().number() + != decoupled_mol._sire_object.number() + ): + raise ValueError( + f"The ligand atom {key} is not from decoupled molecule." + ) + for key in ["r1", "r2", "r3"]: + atom = self._restraint_dict["anchor_points"][key] + if not atom in system: + raise ValueError(f"The receptor atom {key} is not in the system.") + + if self._restraint_type == "multiple_distance": + # Check if the ligand atoms are decoupled. + # Find the decoupled molecule, assume that only one can be + # decoupled. + (decoupled_mol,) = system.getDecoupledMolecules() + + all_restraints = self._restraint_dict["distance_restraints"] + [ + self._restraint_dict["permanent_distance_restraint"] + ] + for single_restraint_dict in all_restraints: + ligand_atom = single_restraint_dict["l1"] + if ( + ligand_atom._sire_object.molecule().number() + != decoupled_mol._sire_object.number() + ): + raise ValueError( + f"The ligand atom {ligand_atom} is not from decoupled molecule." + ) + receptor_atom = single_restraint_dict["r1"] + if not receptor_atom in system: + raise ValueError( + f"The protein atom {receptor_atom} is not in the system." + ) - def _gromacs_boresch(self): + # Store a copy of solvated system. + self._system = system.copy() + + def _gromacs_boresch(self, perturbation_type=None): """Format the Gromacs string for boresch restraint.""" # Format the atoms into index list @@ -368,7 +467,7 @@ def write_dihedral(key_list, equilibrium_values, force_constants): return "\n".join(output) - def _somd_boresch(self): + def _somd_boresch(self, perturbation_type=None): """Format the SOMD string for the Boresch restraints.""" # Indices @@ -420,7 +519,43 @@ def _somd_boresch(self): return restr_string - def toString(self, engine=None): + def _somd_multiple_distance(self, perturbation_type=None): + """Format the SOMD string for the multiple distance restraints.""" + + def _add_restr_to_str(restr, restr_string): + """Apend the information for a single restraint to the string.""" + # Indices + r1 = self._system.getIndex(restr["r1"]) + l1 = self._system.getIndex(restr["l1"]) + # Equilibrium value + r0 = restr["r0"] / _angstrom + # Force constant. Halve these as SOMD defines force constants as E = kx**2 + kr = (restr["kr"] / (_kcal_per_mol / (_angstrom * _angstrom))) / 2 + # Flat bottomed radius + r_fb = restr["r_fb"] / _angstrom + + restr_string += f"({r1}, {l1}): ({r0}, {kr}, {r_fb}), " + + standard_restr_string = "distance restraints dictionary = {" + + for single_restraint in self._restraint_dict["distance_restraints"]: + _add_restr_to_str(single_restraint, standard_restr_string) + + if perturbation_type == "restraint": + # In this case, we want all restraints to be switched on, even "permanent" ones + _add_restr_to_str( + self._restraint_dict["permanent_restraint"], standard_restr_string + ) + return standard_restr_string[:-2] + "}" + else: # Other perturbation types, we want the permanent restraints to be constantly on + standard_restr_string = standard_restr_string[:-2] + "}\n" + permanent_restr_string = "permanent distance restraints dictionary = {" + _add_restr_to_str( + self._restraint_dict["permanent_restraint"], permanent_restr_string + ) + return standard_restr_string + permanent_restr_string[:-2] + "}" + + def toString(self, engine, perturbation_type=None): """ The method for convert the restraint to a format that could be used by MD Engines. @@ -429,43 +564,45 @@ def toString(self, engine=None): ---------- engine : str - The molecular dynamics engine used to generate the restraint. - Available options currently is "GROMACS" and "SOMD". If this argument - is omitted then BioSimSpace will choose an appropriate engine - for you. + The molecular dynamics engine for which to generate the restraint. + Available options are currently "GROMACS" or "SOMD" for Boresch restraints, + or "SOMD" only for multiple distance restraints. + perturbation_type : str, optional, default=None + The type of perturbation to applied during the current stage of the free energy + calculation. This is only used for multiple distance restraints, for which all + restraints are converted to standard distance restraints to allow them to be + turned on when the perturbation type is "restraint", but for which the permanent + distance restraint is always active if the perturbation type is "release_restraint" + (or any other perturbation type). """ - if engine.strip().lower() == "gromacs": - if self._restraint_type == "boresch": - return self._gromacs_boresch() - elif engine.lower() == "somd": - if self._restraint_type == "boresch": - return self._somd_boresch() - else: - raise NotImplementedError( - f"Restraint type {self._restraint_type} not implemented " - f"yet. Only Boresch restraints are supported." - ) - else: + to_str_functions = { + "boresch": {"gromacs": self._gromacs_boresch, "somd": self._somd_boresch}, + "multiple_distance": {"somd": self._somd_multiple_distance}, + } + + engine = engine.strip().lower() + try: + return to_str_functions[self._restraint_type][engine](perturbation_type) + except KeyError: raise NotImplementedError( - f"MD Engine {engine} not implemented " - f"yet. Only Gromacs and SOMD are supported." + f"Restraint type {self._restraint_type} not implemented " + f"yet for {engine}." ) def getCorrection(self, method="analytical"): """ Calculate the free energy of releasing the restraint - to the standard state volume.''' + to the standard state volume. Parameters ---------- - method : str The integration method to use for calculating the correction for releasing the restraint to the standard state concentration. - "numerical" or "analytical". Note that the analytical correction - can introduce errors when the restraints are weak, restrained - angles are close to 0 or pi radians, or the restrained distance - is close to 0. + "numerical" or "analytical". Note that the Boresch analytical + correction can introduce errors when the restraints are weak, + restrained angles are close to 0 or pi radians, or the restrained + distance is close to 0. Returns ---------- @@ -473,6 +610,16 @@ def getCorrection(self, method="analytical"): Free energy of releasing the restraint to the standard state volume, in kcal / mol. """ + # Constants. Take .value() to avoid issues with ** and log of GeneralUnit + v0 = ( + ((_Sire_meter3 / 1000) / _Sire_mole) / _Sire_angstrom3 + ).value() # standard state volume in A^3 + R = ( + _k_boltz.value() * _kcal_per_mol / _kelvin + ).value() # molar gas constant in kcal mol-1 K-1 + + # Parameters + T = self.T / _kelvin # Temperature in Kelvin if self._restraint_type == "boresch": # Constants. Take .value() to avoid issues with ** and log of GeneralUnit @@ -484,7 +631,6 @@ def getCorrection(self, method="analytical"): ).value() # molar gas constant in kcal mol-1 K-1 # Parameters - T = self.T / _kelvin # Temperature in Kelvin prefactor = ( 8 * (_np.pi**2) * v0 ) # In A^3. Divide this to account for force constants of 0 in the @@ -664,7 +810,98 @@ def numerical_dihedral_integrand(phi, phi0, kphi): f"Correction method {method} is not supported. Please choose from 'numerical' or 'analytical'." ) + if self._restraint_type == "multiple_distance": + if method == "analytical": + raise ValueError( + "The analytical correction is not supported for multiple distance restraints." + ) + + else: + + def _numerical_distance_integrand(r, r0, r_fb, kr): + """ + Integrand for harmonic distance restraint. + + Parameters + ---------- + r : float + Distance to be integrated, in Angstrom + r0 : float + Equilibrium distance, in Angstrom + r_fb : float + Flat-bottomed radius, in Angstrom + kr : float + Force constant, in kcal mol-1 A-2 + + Returns + ------- + float + Value of integrand + + Notes + ----- + The domain of the integrand is [0, infinity], but this will + be truncated to [0, 8 RT] for practicality. + """ + r_eff = abs(r - r0) - r_fb + if r_eff < 0: + r_eff = 0 + return (r**2) * _np.exp(-(kr * r_eff**2) / (2 * R * T)) + + def _get_correction(r0, r_fb, kr): + """ + Get the free energy of releasing the harmonic distance restraint. + + Parameters + ---------- + r0 : float + Equilibrium distance, in Angstrom + r_fb : float + Flat-bottomed radius, in Angstrom + kr : float + Force constant, in kcal mol-1 A-2 + + Returns + ------- + float + Free energy of releasing the restraint + + Notes + ----- + The domain of the integrand is [0, infinity], but this will + be truncated to [0, 8 RT] for practicality. + """ + dist_at_8RT = ( + 4 * _np.sqrt((R * T) / kr) + r_fb + ) # Dist. which gives restraint energy = 8 RT + r_min = max(0, r0 - dist_at_8RT) + r_max = r0 + dist_at_8RT + integrand = lambda r: numerical_distance_integrand(r, r0, r_fb, kr) + z_r = _integrate.quad(integrand, r_min, r_max)[0] + dg = -R * T * _np.log(v0 / (4 * _np.pi * z_r)) + + return dg + + # Get the parameters from the permanent distance restraint, which is not released + r0 = ( + self._restraint_dict["permanent_distance_restraint"]["r0"] + / _angstrom + ) + r_fb = ( + self._restraint_dict["permanent_distance_restraint"]["r_fb"] + / _angstrom + ) + kr = self._restraint_dict["permanent_distance_restraint"]["kr"] / ( + _kcal_per_mol / _angstrom2 + ) + _warnings.warn( + "The multiple distance restraint correction is assumes that only " + "the 'permanent_distance_restraint' is active." + ) + return _get_correction(r0, r_fb, kr) + @property def correction(self): """Give the free energy of removing the restraint.""" - return self.getCorrection() + method = "analytical" if self._restraint_type == "boresch" else "numerical" + return self.getCorrection(method=method) From 0c78c6bb7e6218471e9d38555b63680e5174f06e Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 8 Aug 2023 09:48:24 +0100 Subject: [PATCH 02/19] Update names of all Boresch restraint tests --- .../Exscientia/FreeEnergy/test_restraint.py | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py index b72be55fe..7c797f232 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py @@ -13,9 +13,11 @@ # Store the tutorial URL. url = BSS.tutorialUrl() +################### Test Borech Restraint ################ + @pytest.fixture(scope="session") -def restraint_components(): +def boresch_restraint_component(): """Generate a the components required to create a restraint.""" ligand = BSS.IO.readMolecules( [f"{url}/ligand01.prm7.bz2", f"{url}/ligand01.rst7.bz2"] @@ -69,9 +71,9 @@ def restraint_components(): @pytest.fixture(scope="session") -def restraint(restraint_components): +def boresch_restraint(boresch_restraint_component): """Generate the Boresch restraint object.""" - system, restraint_dict = restraint_components + system, restraint_dict = boresch_restraint_component restraint = Restraint( system, restraint_dict, 300 * kelvin, restraint_type="Boresch" @@ -79,20 +81,20 @@ def restraint(restraint_components): return restraint -def test_sanity(restraint): +def test_sanity_boresch(boresch_restraint): """Sanity check.""" - assert isinstance(restraint, Restraint) + assert isinstance(boresch_restraint, Restraint) -def test_numerical_correction(restraint): - dG = restraint.getCorrection(method="numerical") / kcal_per_mol +def test_numerical_correction_boresch(boresch_restraint): + dG = boresch_restraint.getCorrection(method="numerical") / kcal_per_mol assert np.isclose(-7.2, dG, atol=0.1) -def test_analytical_correction(restraint): - dG = restraint.getCorrection(method="analytical") / kcal_per_mol +def test_analytical_correction_boresch(boresch_restraint): + dG = boresch_restraint.getCorrection(method="analytical") / kcal_per_mol assert np.isclose(-7.2, dG, atol=0.1) - assert isinstance(restraint, Restraint) + assert isinstance(boresch_restraint, Restraint) test_force_constants = [ @@ -111,9 +113,11 @@ def test_analytical_correction(restraint): @pytest.mark.parametrize("force_constants, expected", test_force_constants) -def test_input_force_constants(restraint_components, force_constants, expected): +def test_input_force_constants_boresch( + boresch_restraint_component, force_constants, expected +): print(force_constants) - system, restraint_dict = restraint_components + system, restraint_dict = boresch_restraint_component dict_copy = restraint_dict.copy() force_constants_copy = restraint_dict["force_constants"].copy() force_constants_copy.update(force_constants) @@ -125,11 +129,11 @@ def test_input_force_constants(restraint_components, force_constants, expected): Restraint(system, dict_copy, 300 * kelvin, restraint_type="Boresch") -class TestGromacsOutput: +class TestGromacsOutputBoresch: @staticmethod @pytest.fixture(scope="class") - def Topology(restraint): - return restraint.toString(engine="Gromacs").split("\n") + def Topology(boresch_restraint): + return boresch_restraint.toString(engine="Gromacs").split("\n") def test_sanity(self, Topology): """Sanity check.""" @@ -177,11 +181,11 @@ def test_dihedral(self, Topology): assert al == "1498" -class TestSomdOutput: +class TestSomdOutputBoresch: @staticmethod @pytest.fixture(scope="class") - def getRestraintSomd(restraint): - boresch_str = restraint.toString(engine="SOMD").split("=")[1].strip() + def getRestraintSomd(boresch_restraint): + boresch_str = boresch_restraint.toString(engine="SOMD").split("=")[1].strip() boresch_dict = eval(boresch_str) return boresch_dict From 64dd2aa9cc5f6369cff538f4708a7e180e5a0b34 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 8 Aug 2023 09:55:03 +0100 Subject: [PATCH 03/19] Check correct keys in multiple distance restraints dict --- .../Sandpit/Exscientia/FreeEnergy/_restraint.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py index 15b570379..ee8185e2b 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py @@ -241,6 +241,15 @@ def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch" elif restraint_type.lower() == "multiple_distance": self._restraint_type = "multiple_distance" + + if not restraint_dict.keys() == [ + "distance_restraints", + "permanent_distance_restraint", + ]: + raise ValueError( + "restraint_dict must have keys 'distance_restraints' and 'permanent_distance_restraint'" + ) + # Warn the user if there are no distance restraints (although they may be deliberately supplying # only the permanent distance restraint) if len(restraint_dict["distance_restraints"]) == 0: From fd400b857938bd2e14212952fe0907ddad9722c8 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 8 Aug 2023 12:10:37 +0100 Subject: [PATCH 04/19] Add tests for multiple distance restraints and fix bugs this revealed --- .../Exscientia/FreeEnergy/_restraint.py | 46 +++- .../Exscientia/FreeEnergy/test_restraint.py | 254 +++++++++++++++++- 2 files changed, 285 insertions(+), 15 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py index ee8185e2b..8cb1ba4bf 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py @@ -242,10 +242,10 @@ def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch" elif restraint_type.lower() == "multiple_distance": self._restraint_type = "multiple_distance" - if not restraint_dict.keys() == [ + if not set(restraint_dict.keys()) == { "distance_restraints", "permanent_distance_restraint", - ]: + }: raise ValueError( "restraint_dict must have keys 'distance_restraints' and 'permanent_distance_restraint'" ) @@ -262,11 +262,21 @@ def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch" restraint_dict["permanent_distance_restraint"] ] for single_restraint_dict in all_restraints: + if not set(single_restraint_dict.keys()) == { + "r1", + "l1", + "r0", + "r_fb", + "kr", + }: + raise ValueError( + "distance_restraint_dict must have keys 'r1', 'l1', 'r0', 'r_fb', 'kr' " + f"but has keys {list(single_restraint_dict.keys())}" + ) + # Check that the atoms are of type BioSimSpace._SireWrappers.Atom for key in ["r1", "l1"]: - if not isinstance( - single_restraint_dict["anchor_points"][key], _Atom - ): + if not isinstance(single_restraint_dict[key], _Atom): raise ValueError( f"distance_restraint_dict['{key}'] " "must be of type 'BioSimSpace._SireWrappers.Atom'" @@ -544,23 +554,28 @@ def _add_restr_to_str(restr, restr_string): r_fb = restr["r_fb"] / _angstrom restr_string += f"({r1}, {l1}): ({r0}, {kr}, {r_fb}), " + return restr_string standard_restr_string = "distance restraints dictionary = {" for single_restraint in self._restraint_dict["distance_restraints"]: - _add_restr_to_str(single_restraint, standard_restr_string) + standard_restr_string = _add_restr_to_str( + single_restraint, standard_restr_string + ) if perturbation_type == "restraint": # In this case, we want all restraints to be switched on, even "permanent" ones - _add_restr_to_str( - self._restraint_dict["permanent_restraint"], standard_restr_string + standard_restr_string = _add_restr_to_str( + self._restraint_dict["permanent_distance_restraint"], + standard_restr_string, ) return standard_restr_string[:-2] + "}" else: # Other perturbation types, we want the permanent restraints to be constantly on standard_restr_string = standard_restr_string[:-2] + "}\n" permanent_restr_string = "permanent distance restraints dictionary = {" - _add_restr_to_str( - self._restraint_dict["permanent_restraint"], permanent_restr_string + permanent_restr_string = _add_restr_to_str( + self._restraint_dict["permanent_distance_restraint"], + permanent_restr_string, ) return standard_restr_string + permanent_restr_string[:-2] + "}" @@ -591,13 +606,15 @@ def toString(self, engine, perturbation_type=None): engine = engine.strip().lower() try: - return to_str_functions[self._restraint_type][engine](perturbation_type) + str_fn = to_str_functions[self._restraint_type][engine] except KeyError: raise NotImplementedError( f"Restraint type {self._restraint_type} not implemented " f"yet for {engine}." ) + return str_fn(perturbation_type) + def getCorrection(self, method="analytical"): """ Calculate the free energy of releasing the restraint @@ -821,7 +838,7 @@ def numerical_dihedral_integrand(phi, phi0, kphi): if self._restraint_type == "multiple_distance": if method == "analytical": - raise ValueError( + raise NotImplementedError( "The analytical correction is not supported for multiple distance restraints." ) @@ -885,10 +902,13 @@ def _get_correction(r0, r_fb, kr): ) # Dist. which gives restraint energy = 8 RT r_min = max(0, r0 - dist_at_8RT) r_max = r0 + dist_at_8RT - integrand = lambda r: numerical_distance_integrand(r, r0, r_fb, kr) + integrand = lambda r: _numerical_distance_integrand(r, r0, r_fb, kr) z_r = _integrate.quad(integrand, r_min, r_max)[0] dg = -R * T * _np.log(v0 / (4 * _np.pi * z_r)) + # Attatch unit of kcal/mol + dg *= _kcal_per_mol + return dg # Get the parameters from the permanent distance restraint, which is not released diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py index 7c797f232..e442dce37 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py @@ -2,6 +2,11 @@ import numpy as np +from sire.legacy.Units import angstrom3 as _Sire_angstrom3 +from sire.legacy.Units import k_boltz as _k_boltz +from sire.legacy.Units import meter3 as _Sire_meter3 +from sire.legacy.Units import mole as _Sire_mole + import BioSimSpace.Sandpit.Exscientia as BSS from BioSimSpace.Sandpit.Exscientia.Align import decouple from BioSimSpace.Sandpit.Exscientia.FreeEnergy import Restraint @@ -97,7 +102,7 @@ def test_analytical_correction_boresch(boresch_restraint): assert isinstance(boresch_restraint, Restraint) -test_force_constants = [ +test_force_constants_boresch = [ ({"kr": 0}, ValueError), ({"kthetaA": 0}, ValueError), ({"kthetaB": 0}, ValueError), @@ -112,7 +117,7 @@ def test_analytical_correction_boresch(boresch_restraint): ] -@pytest.mark.parametrize("force_constants, expected", test_force_constants) +@pytest.mark.parametrize("force_constants, expected", test_force_constants_boresch) def test_input_force_constants_boresch( boresch_restraint_component, force_constants, expected ): @@ -211,3 +216,248 @@ def test_equil_vals(self, getRestraintSomd): assert equil_vals["phiA0"] == 2.59 assert equil_vals["phiB0"] == -1.20 assert equil_vals["phiC0"] == 2.63 + + +################### Test Multiple Distance Restraint ################### + + +@pytest.fixture(scope="session") +def mdr_restraint_component(): + """ + Generate a the components required to create a + multiple distance restraints restraint object. + """ + ligand = BSS.IO.readMolecules( + [f"{url}/ligand01.prm7.bz2", f"{url}/ligand01.rst7.bz2"] + ).getMolecule(0) + decoupled_ligand = decouple(ligand) + + protein = BSS.IO.readMolecules( + [f"{url}/1jr5.crd.bz2", f"{url}/1jr5.top.bz2"] + ).getMolecule(0) + + system = (protein + decoupled_ligand).toSystem() + + # Create three distance restraints + restraint_dict = { + "distance_restraints": [ + { + "l1": decoupled_ligand.getAtoms()[0], + "r1": protein.getAtoms()[0], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": decoupled_ligand.getAtoms()[1], + "r1": protein.getAtoms()[1], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": decoupled_ligand.getAtoms()[2], + "r1": protein.getAtoms()[2], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + } + + return system, restraint_dict + + +@pytest.fixture(scope="session") +def mdr_restraint(mdr_restraint_component): + """Generate the multiple distance restraints restraint object.""" + system, restraint_dict = mdr_restraint_component + + restraint = Restraint( + system, restraint_dict, 300 * kelvin, restraint_type="multiple_distance" + ) + return restraint + + +@pytest.fixture(scope="session") +def mdr_restraint_component_fb_r0(): + """ + Generate a the components required to create a + multiple distance restraints restraint object with + the values for r0 and r_fb set to 0 for the permanent restraint. + """ + ligand = BSS.IO.readMolecules( + [f"{url}/ligand01.prm7.bz2", f"{url}/ligand01.rst7.bz2"] + ).getMolecule(0) + decoupled_ligand = decouple(ligand) + + protein = BSS.IO.readMolecules( + [f"{url}/1jr5.crd.bz2", f"{url}/1jr5.top.bz2"] + ).getMolecule(0) + + system = (protein + decoupled_ligand).toSystem() + + # Create three distance restraints + restraint_dict = { + "distance_restraints": [ + { + "l1": decoupled_ligand.getAtoms()[0], + "r1": protein.getAtoms()[0], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": decoupled_ligand.getAtoms()[1], + "r1": protein.getAtoms()[1], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": decoupled_ligand.getAtoms()[2], + "r1": protein.getAtoms()[2], + "r0": 0 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 0 * angstrom, + }, + } + + return system, restraint_dict + + +@pytest.fixture(scope="session") +def mdr_restraint_fb_r0(mdr_restraint_component_fb_r0): + """Generate the multiple distance restraints restraint object with + the values for r0 and r_fb set to 0 for the permanent restraint.""" + system, restraint_dict = mdr_restraint_component_fb_r0 + + restraint = Restraint( + system, restraint_dict, 300 * kelvin, restraint_type="multiple_distance" + ) + return restraint + + +def test_sanity_mdr(mdr_restraint): + """Sanity check.""" + assert isinstance(mdr_restraint, Restraint) + + +def test_numerical_correction_mdr(mdr_restraint): + dG = mdr_restraint.getCorrection(method="numerical") / kcal_per_mol + assert np.isclose(-0.991, dG, atol=0.001) + + +def test_numerical_correction_mdr_fb0(mdr_restraint_fb_r0): + """ + Check that the numerical multiple distance restraints correction is + consistent with the analytical result when the flat-bottom radius is 0 + and r0 = 0. + """ + # Get the numerical correction + dG_numerical = mdr_restraint_fb_r0.getCorrection(method="numerical") / kcal_per_mol + + # Calculate the analytical correction + # Constants + v0 = ( + ((_Sire_meter3 / 1000) / _Sire_mole) / _Sire_angstrom3 + ).value() # standard state volume in A^3 + R = ( + _k_boltz.value() * kcal_per_mol / kelvin + ).value() # molar gas constant in kcal mol-1 K-1 + + T = mdr_restraint_fb_r0.T / kelvin # Temperature in Kelvin + + kr = mdr_restraint_fb_r0._restraint_dict["permanent_distance_restraint"]["kr"] / ( + kcal_per_mol / angstrom**2 + ) + dG_analytical = ( + R * T * np.log((2 * np.pi * R * T / kr) ** (3 / 2) / v0) + ) # kcal mol-1 + + assert np.isclose(dG_analytical, dG_numerical, atol=0.001) + + +def test_analytical_correction_mdr(mdr_restraint): + with pytest.raises(NotImplementedError): + dG = mdr_restraint.getCorrection(method="analytical") / kcal_per_mol + + +def test_gromacs_output_mdr(mdr_restraint): + """Test the Gromacs output.""" + with pytest.raises(NotImplementedError): + gromacs_string = mdr_restraint.toString(engine="Gromacs") + + +class TestSomdOutputMDR: + @staticmethod + @pytest.fixture(scope="class") + def getRestraintSomd(mdr_restraint): + """The standard form of the restraints for any perturbation type other than `restraint`.""" + mdr_str = ( + mdr_restraint.toString(engine="SOMD", perturbation_type="release_restraint") + .split("\n")[0] + .split("=")[1] + .strip() + ) + permanent_mdr_str = ( + mdr_restraint.toString(engine="SOMD", perturbation_type="release_restraint") + .split("\n")[1] + .split("=")[1] + .strip() + ) + + mdr_dict = eval(mdr_str) + permanent_mdr_str = eval(permanent_mdr_str) + return mdr_dict, permanent_mdr_str + + @staticmethod + @pytest.fixture(scope="class") + def getRestraintSomdRestraintPert(mdr_restraint): + mdr_str = ( + mdr_restraint.toString(engine="SOMD", perturbation_type="restraint") + .split("=")[1] + .strip() + ) + mdr_dict = eval(mdr_str) + return mdr_dict + + def test_sanity(self, getRestraintSomd): + "Sanity check" + mdr_dict_std = getRestraintSomd[0] + mdr_dict_permanent = getRestraintSomd[1] + assert isinstance(mdr_dict_std, dict) + assert isinstance(mdr_dict_permanent, dict) + + def test_dict_vals(self, getRestraintSomd): + std_mdr_dict = getRestraintSomd[0] + expected_anchors_std = [{0, 1495}, {1, 1496}] + permanent_mdr_dict = getRestraintSomd[1] + expected_anchors_permanent = [{2, 1497}] + + # Test std restraints + for i, restraint in enumerate(std_mdr_dict): + assert set(restraint) == expected_anchors_std[i] + assert std_mdr_dict[restraint][0] == 3.0 # r0 + # kr is halved because of the definition of the force constant in SOMD + assert std_mdr_dict[restraint][1] == 5.0 # kr + assert std_mdr_dict[restraint][2] == 1.0 # r_fb + + # Test permanent restraint + for i, restraint in enumerate(permanent_mdr_dict): + assert set(restraint) == expected_anchors_permanent[i] + assert permanent_mdr_dict[restraint][0] == 3.0 # r0 + # kr is halved because of the definition of the force constant in SOMD + assert permanent_mdr_dict[restraint][1] == 5.0 # kr + assert permanent_mdr_dict[restraint][2] == 1.0 # r_fb + + def test_dict_vals_restraint_pert(self, getRestraintSomdRestraintPert): + expected_anchors = [{0, 1495}, {1, 1496}, {2, 1497}] + + for i, restraint in enumerate(getRestraintSomdRestraintPert): + assert set(restraint) == expected_anchors[i] + assert getRestraintSomdRestraintPert[restraint][0] == 3.0 # r0 + # kr is halved because of the definition of the force constant in SOMD + assert getRestraintSomdRestraintPert[restraint][1] == 5.0 # kr + assert getRestraintSomdRestraintPert[restraint][2] == 1.0 # r_fb From 6d9b6298484988586ca92a73531f4501c86a2ee5 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 8 Aug 2023 16:01:53 +0100 Subject: [PATCH 05/19] Write input files for multiple distance restraints Allow correct SOMD config and pert files to be written for multiple distance restraints. Accompanying tests have also been introduced. This involved introducing an additional perturbation type "release_restraint", used exclusively with multiple distance restraints to remove all distance restraints but the "permanent" distance restraint. --- .../Sandpit/Exscientia/Process/_somd.py | 74 +++++++++ .../Sandpit/Exscientia/Protocol/_config.py | 36 ++++- .../Exscientia/Protocol/_free_energy.py | 5 + .../Protocol/_free_energy_equilibration.py | 5 + .../Protocol/_free_energy_minimisation.py | 5 + .../Exscientia/Protocol/_free_energy_mixin.py | 12 ++ tests/Sandpit/Exscientia/Process/test_somd.py | 79 +++++++++- .../Exscientia/Protocol/test_config.py | 149 ++++++++++++++++-- 8 files changed, 348 insertions(+), 17 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py index 97882c198..33a0182ef 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py @@ -934,6 +934,11 @@ def _to_pert_file( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Returns ------- @@ -983,6 +988,7 @@ def _to_pert_file( "grow_soft", "charge_soft", "restraint", + "release_restraint", ] if perturbation_type not in allowed_perturbation_types: @@ -1300,6 +1306,74 @@ def atom_sorting_criteria(atom): # End atom record. file.write(" endatom\n") + elif perturbation_type == "release_restraint": + if print_all_atoms: + for atom in sorted( + mol.atoms(), key=lambda atom: atom_sorting_criteria(atom) + ): + # Start atom record. + file.write(" atom\n") + + # Only require the final Lennard-Jones and charge properties. + LJ1 = atom.property("LJ1") + charge1_value = atom.property("charge1").value() + + # Atom data. Create dummy pert file with identical initial and final properties. + file.write(" name %s\n" % atom.name().value()) + file.write( + " initial_type %s\n" % atom.property("ambertype1") + ) + file.write( + " final_type %s\n" % atom.property("ambertype1") + ) + file.write( + " initial_LJ %.5f %.5f\n" + % (LJ1.sigma().value(), LJ1.epsilon().value()) + ) + file.write( + " final_LJ %.5f %.5f\n" + % (LJ1.sigma().value(), LJ1.epsilon().value()) + ) + file.write(" initial_charge %.5f\n" % charge1_value) + file.write(" final_charge %.5f\n" % charge1_value) + + # End atom record. + file.write(" endatom\n") + + # Only print records for the atoms that are perturbed. + else: + for idx in sorted( + pert_idxs, key=lambda idx: atom_sorting_criteria(mol.atom(idx)) + ): + # Get the perturbed atom. + atom = mol.atom(idx) + + # Only require the final Lennard-Jones and charge properties. + LJ1 = atom.property("LJ1") + charge1_value = atom.property("charge1").value() + + # Atom data. Create dummy pert file with identical initial and final properties. + file.write(" name %s\n" % atom.name().value()) + file.write( + " initial_type %s\n" % atom.property("ambertype1") + ) + file.write( + " final_type %s\n" % atom.property("ambertype1") + ) + file.write( + " initial_LJ %.5f %.5f\n" + % (LJ1.sigma().value(), LJ1.epsilon().value()) + ) + file.write( + " final_LJ %.5f %.5f\n" + % (LJ1.sigma().value(), LJ1.epsilon().value()) + ) + file.write(" initial_charge %.5f\n" % charge1_value) + file.write(" final_charge %.5f\n" % charge1_value) + + # End atom record. + file.write(" endatom\n") + else: # Given multistep protocol: if print_all_atoms: diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py index e71e1369d..dc924c8f9 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py @@ -626,6 +626,11 @@ def generateSomdConfig( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Returns ------- @@ -812,10 +817,33 @@ def generateSomdConfig( # Restraint if restraint: - total_lines.append("use boresch restraints = True") - total_lines.append(restraint.toString(engine="SOMD")) - # If we are turning on the restraint, need to specify this in the config file - if perturbation_type == "restraint": + # Handle Boresch restraints. + if restraint._restraint_type == "boresch": + if perturbation_type == "release_restraint": + raise _IncompatibleError( + "SOMD does not support releasing Boresch restraints, only " + "multiple distance restraints." + ) + total_lines.append("use boresch restraints = True") + total_lines.append(restraint.toString(engine="SOMD")) + + # Handle multiple distance restraints. + elif restraint._restraint_type == "multiple_distance": + total_lines.append("use distance restraints = True") + if perturbation_type != "restraint": + # In this case, we want to ensure that the "permanent" distance restraint is active + total_lines.append("use permanent distance restraints = True") + total_lines.append( + restraint.toString( + engine="SOMD", perturbation_type=perturbation_type + ) + ) + + # If we are turning on the restraint, need to specify this in the config file. + if ( + perturbation_type == "restraint" + or perturbation_type == "release_restraint" + ): total_lines.append("turn on receptor-ligand restraints mode = True") return total_lines diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy.py index 6f03f4f69..8b581f5d2 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy.py @@ -112,6 +112,11 @@ def __init__( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Currently perturubation_type != "full" is only supported by BioSimSpace.Process.Somd. diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_equilibration.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_equilibration.py index b879bf4e7..e9549d5e5 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_equilibration.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_equilibration.py @@ -115,6 +115,11 @@ def __init__( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Currently perturubation_type != "full" is only supported by BioSimSpace.Process.Somd. diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_minimisation.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_minimisation.py index 378b5c5db..7a67d5634 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_minimisation.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_minimisation.py @@ -53,6 +53,11 @@ def __init__( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Currently perturubation_type != "full" is only supported by BioSimSpace.Process.Somd. diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py index edbbc218e..2e45c712f 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py @@ -50,6 +50,11 @@ def __init__( "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). Currently perturubation_type != "full" is only supported by BioSimSpace.Process.Somd. @@ -114,6 +119,12 @@ def setPerturbationType(self, perturbation_type): "charge_soft" : Perturb all charging soft atom LJ terms (i.e. 0.0->value). "restraint" : Perturb the receptor-ligand restraint strength by linearly scaling the force constants (0.0->value). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). + """ if type(perturbation_type) is not str: raise TypeError("'perturbation_type' must be of type 'str'") @@ -129,6 +140,7 @@ def setPerturbationType(self, perturbation_type): "grow_soft", "charge_soft", "restraint", + "release_restraint", ] if perturbation_type not in allowed_perturbation_types: diff --git a/tests/Sandpit/Exscientia/Process/test_somd.py b/tests/Sandpit/Exscientia/Process/test_somd.py index 11ecc6ba0..45a71983a 100644 --- a/tests/Sandpit/Exscientia/Process/test_somd.py +++ b/tests/Sandpit/Exscientia/Process/test_somd.py @@ -139,9 +139,11 @@ def test_pert_res_num(perturbable_system): assert unique1[0] == "perturbed residue number = 2" -def test_restraint(system, tmp_path): - """Test if the restraint has been written in a way that can be processed - correctly.""" +def test_restraint_boresch(system, tmp_path): + """ + Test if the Boresch restraint has been written in a way that can be processed + correctly. + """ ligand = BSS.IO.readMolecules( [f"{url}/ligand01.rst7", f"{url}/ligand01.prm7"] ).getMolecule(0) @@ -193,6 +195,77 @@ def test_restraint(system, tmp_path): assert "boresch restraints dictionary" in lines[-1] +@pytest.fixture(scope="module") +def restraint_mdr(system): + """A restraint object for testing.""" + ligand = BSS.IO.readMolecules( + [f"{url}/ligand01.rst7", f"{url}/ligand01.prm7"] + ).getMolecule(0) + decoupled_ligand = decouple(ligand) + + fake_protein = BSS.IO.readMolecules( + [f"{url}/ligand04.rst7", f"{url}/ligand04.prm7"] + ).getMolecule(0) + + system = (decoupled_ligand + fake_protein).toSystem() + + # Create three distance restraints + restraint_dict = { + "distance_restraints": [ + { + "l1": decoupled_ligand.getAtoms()[0], + "r1": fake_protein.getAtoms()[0], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": decoupled_ligand.getAtoms()[1], + "r1": fake_protein.getAtoms()[1], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": decoupled_ligand.getAtoms()[2], + "r1": fake_protein.getAtoms()[2], + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + } + restraint = Restraint( + system, restraint_dict, 300 * kelvin, restraint_type="multiple_distance" + ) + + return restraint + + +def test_restraint_mdr(tmp_path, restraint_mdr): + """ + Test if the multiple distance restraints restraint has been + written in a way that can be processed correctly. + """ + # Create a short free energy protocol. + protocol = BSS.Protocol.Production( + runtime=BSS.Types.Time(0.001, "nanoseconds"), + ) + + # Run the process and check that it finishes without error. + run_process( + restraint_mdr._system, protocol, restraint=restraint_mdr, work_dir=str(tmp_path) + ) + + # Check for restraint options in config file. + with open(tmp_path / "test.cfg", "r") as f: + lines = f.readlines() + assert "use distance restraints = True" in lines[-4] + assert "use permanent distance restraints = True" in lines[-3] + assert "distance restraints dictionary" in lines[-2] + assert "permanent distance restraints dictionary" in lines[-1] + + def run_process(system, protocol, **kwargs): """Helper function to run various simulation protocols.""" diff --git a/tests/Sandpit/Exscientia/Protocol/test_config.py b/tests/Sandpit/Exscientia/Protocol/test_config.py index 794ef2e44..9f7e2d1a1 100644 --- a/tests/Sandpit/Exscientia/Protocol/test_config.py +++ b/tests/Sandpit/Exscientia/Protocol/test_config.py @@ -294,7 +294,7 @@ def test_sc_parameters(self, system): class TestSomdABFE: @staticmethod @pytest.fixture(scope="class") - def system_and_restraint(): + def system_and_boresch_restraint(): # Benzene. m = BSS.Parameters.openff_unconstrained_2_0_0("c1ccccc1").getMolecule() @@ -343,9 +343,59 @@ def system_and_restraint(): return system, restraint - def test_turn_on_restraint(self, system_and_restraint): - """Test for turning on the restraint""" - system, restraint = system_and_restraint + @staticmethod + @pytest.fixture(scope="class") + def system_and_mdr_restraint(): + # Benzene. + m = BSS.Parameters.openff_unconstrained_2_0_0("c1ccccc1").getMolecule() + + # Assign atoms for restraint + atom_1 = m.getAtoms()[0] + atom_2 = m.getAtoms()[1] + atom_3 = m.getAtoms()[2] + atom_4 = m.getAtoms()[3] + atom_5 = m.getAtoms()[4] + atom_6 = m.getAtoms()[5] + + mol = decouple(m) + system = mol.toSystem() + + # Create random restraint dictionary + restraint_dict = { + "distance_restraints": [ + { + "l1": atom_1, + "r1": atom_2, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": atom_3, + "r1": atom_4, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": atom_5, + "r1": atom_6, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + } + + restraint = Restraint( + system, restraint_dict, 298 * kelvin, restraint_type="multiple_distance" + ) + + return system, restraint + + def test_turn_on_restraint_boresch(self, system_and_boresch_restraint): + """Test for turning on multiple distance restraints""" + system, restraint = system_and_boresch_restraint protocol = FreeEnergy(perturbation_type="restraint") freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( system, protocol, engine="SOMD", restraint=restraint @@ -383,9 +433,88 @@ def test_turn_on_restraint(self, system_and_restraint): for line in lines: assert line in pert_text - def test_discharge(self, system_and_restraint): + def test_turn_on_restraint_mdr(self, system_and_mdr_restraint): + """Test for turning on the mdr restraint""" + system, restraint = system_and_mdr_restraint + protocol = FreeEnergy(perturbation_type="restraint") + freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( + system, protocol, engine="SOMD", restraint=restraint + ) + + # Test .cfg file + with open(f"{freenrg._work_dir}/lambda_0.0000/somd.cfg", "r") as f: + cfg_text = f.read() + assert "use distance restraints = True" in cfg_text + assert ( + "distance restraints dictionary = {(1, 0): (3.0, 5.0, 1.0), (3, 2): " + "(3.0, 5.0, 1.0), (5, 4): (3.0, 5.0, 1.0)}" + ) in cfg_text + assert "turn on receptor-ligand restraints mode = True" in cfg_text + + # Test .pert file + with open(f"{freenrg._work_dir}/lambda_0.0000/somd.pert", "r") as f: + pert_text = f.read() + # Check all atoms present + carbons = [f"C{i}" for i in range(1, 7)] + hydrogens = [f"H{i}" for i in range(1, 7)] + atoms = carbons + hydrogens + for atom in atoms: + assert atom in pert_text + # Check perturbations are correct + lines = [ + "initial_type C1", + "final_type C1", + "initial_LJ 3.48065 0.08688", + "final_LJ 3.48065 0.08688", + "initial_charge -0.13000", + "final_charge -0.13000", + ] + for line in lines: + assert line in pert_text + + def test_release_restraint_mdr(self, system_and_mdr_restraint): + """Test for releasing the non-permanent mdr restraints""" + system, restraint = system_and_mdr_restraint + protocol = FreeEnergy(perturbation_type="release_restraint") + freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( + system, protocol, engine="SOMD", restraint=restraint + ) + + # Test .cfg file + with open(f"{freenrg._work_dir}/lambda_0.0000/somd.cfg", "r") as f: + cfg_text = f.read() + assert "use distance restraints = True" in cfg_text + assert "use permanent distance restraints = True" in cfg_text + assert "turn on receptor-ligand restraints mode = True" in cfg_text + assert ( + "distance restraints dictionary = {(1, 0): (3.0, 5.0, 1.0), " + "(3, 2): (3.0, 5.0, 1.0)}" + ) in cfg_text + assert ( + "permanent distance restraints dictionary = {(5, 4): (3.0, 5.0, 1.0)}" + in cfg_text + ) + + # Test .pert file + with open(f"{freenrg._work_dir}/lambda_0.0000/somd.pert", "r") as f: + pert_text = f.read() + # Don't check atoms present as they are given randomised names + # and the atom types are all du + # Check perturbations are correct + lines = [ + "initial_type du", + "final_type du", + "initial_LJ 0.00000 0.00000", + "final_LJ 0.00000 0.00000", + "initial_charge 0.00000", + "final_charge 0.00000", + ] + for line in lines: + assert line in pert_text + + def test_discharge(self, system_and_boresch_restraint): """Test for discharging the ligand""" - system, restraint = system_and_restraint + system, restraint = system_and_boresch_restraint protocol = FreeEnergy(perturbation_type="discharge_soft") freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( system, protocol, engine="SOMD", restraint=restraint @@ -423,9 +552,9 @@ def test_discharge(self, system_and_restraint): for line in lines: assert line in pert_text - def test_vanish(self, system_and_restraint): + def test_vanish(self, system_and_boresch_restraint): """Test for vanishing the ligand""" - system, restraint = system_and_restraint + system, restraint = system_and_boresch_restraint protocol = FreeEnergy(perturbation_type="vanish_soft") freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( system, protocol, engine="SOMD", restraint=restraint @@ -463,9 +592,9 @@ def test_vanish(self, system_and_restraint): for line in lines: assert line in pert_text - def test_discharge_and_vanish(self, system_and_restraint): + def test_discharge_and_vanish(self, system_and_boresch_restraint): """Test for simultaneously discharging and vanishing the ligand""" - system, restraint = system_and_restraint + system, restraint = system_and_boresch_restraint protocol = FreeEnergy(perturbation_type="full") freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( system, protocol, engine="SOMD", restraint=restraint From 5dc9585b9283290b5cf2608a8e94ec7f52dc1938 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Wed, 9 Aug 2023 21:26:16 +0100 Subject: [PATCH 06/19] Change access of pandas series to avoid deprecation warning --- .../Sandpit/Exscientia/Protocol/_free_energy_mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py index 2e45c712f..8e120dc08 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_free_energy_mixin.py @@ -197,7 +197,7 @@ def getLambda(self, type="float"): """ if type.lower() == "float": if len(self._lambda) == 1: - return float(self._lambda) + return float(self._lambda.iloc[0]) else: warnings.warn( f"The {self._lambda} has more than one value, return as pd.Series." From 67b4382faf5a5863bb81e19fe1ffae86aded8ed5 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Thu, 10 Aug 2023 13:23:30 +0100 Subject: [PATCH 07/19] Add first version of multiple distance restraints derivation algorithm --- .github/workflows/Sandpit_exs.yml | 2 +- .../FreeEnergy/_restraint_search.py | 693 +++++++++++++++--- .../FreeEnergy/test_restraint_search.py | 74 +- 3 files changed, 636 insertions(+), 133 deletions(-) diff --git a/.github/workflows/Sandpit_exs.yml b/.github/workflows/Sandpit_exs.yml index 99616ce0e..fa90be7ec 100644 --- a/.github/workflows/Sandpit_exs.yml +++ b/.github/workflows/Sandpit_exs.yml @@ -35,7 +35,7 @@ jobs: - name: Install dependency run: | - mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.8 ambertools gromacs "sire=2023.2.2" alchemlyb pytest pyarrow "typing-extensions!=4.6.0" openff-interchange pint=0.21 "rdkit<2023" "jaxlib>0.3.7" tqdm + mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.8 ambertools gromacs "sire=2023.2.2" alchemlyb pytest pyarrow "typing-extensions!=4.6.0" openff-interchange pint=0.21 "rdkit<2023" "jaxlib>0.3.7" tqdm scikit-learn python -m pip install git+https://github.com/Exscientia/MDRestraintsGenerator.git # For the testing of BSS.FreeEnergy.AlchemicalFreeEnergy.analysis python -m pip install https://github.com/alchemistry/alchemtest/archive/master.zip diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py index 1eb666d1a..249bb9380 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py @@ -33,6 +33,7 @@ import numpy as _np import os as _os from scipy.stats import circmean as _circmean +from sklearn.cluster import KMeans as _KMeans import warnings as _warnings from sire.legacy.Base import getBinDir as _getBinDir @@ -108,6 +109,9 @@ class RestraintSearch: # Create a list of supported molecular dynamics engines. _engines = ["GROMACS", "SOMD"] + # Create a list of supported restraint types. + _restraint_types = ["boresch", "multiple_distance"] + def __init__( self, system, @@ -334,7 +338,7 @@ def _analyse( ---------- restraint_type: str - The type of restraints to select (currently only Boresch is available). + The type of restraints to select, from "Boresch" or "multiple_distance". Default is 'Boresch'. method: str @@ -513,8 +517,8 @@ def analyse( The temperature of the system restraint_type : str - The type of restraints to select (currently only Boresch is available). - Default is ``Boresch``. + The type of restraints to select, from 'Boresch' or 'multiple_distance'. + Default is "Boresch". method : str The method to use to derive the restraints. 'BSS' or 'MDRestraintsGenerator'. @@ -558,6 +562,11 @@ def analyse( The restraints of `restraint_type` which best mimic the strongest receptor-ligand interactions. """ + _supported_methods = { + "boresch": ["BSS", "MDRestraintsGenerator"], + "multiple_distance": ["BSS"], + } + # Check all inputs if not isinstance(work_dir, str): @@ -587,9 +596,11 @@ def analyse( raise TypeError( f"restraint_type {type(restraint_type)} must be of type 'str'." ) - if not restraint_type.lower() == "boresch": + + if not restraint_type.lower() in RestraintSearch._restraint_types: raise NotImplementedError( - "Only Boresch restraints are currently implemented" + f"Restraint type {restraint_type} is not implemented. " + f"Please choose from {RestraintSearch._restraint_types}" ) if not isinstance(method, str): @@ -599,6 +610,12 @@ def analyse( "Deriving restraints using 'MDRestraintsGenerator'" "or 'BSS' are the only options implemented." ) + # Check that the selected method is supported for the selected restraint type. + if not method in _supported_methods[restraint_type.lower()]: + raise NotImplementedError( + f"Method {method} is not supported for restraint type {restraint_type}. " + f"Please choose from {_supported_methods[restraint_type.lower()]}" + ) if method.lower() == "mdrestraintsgenerator": if not is_MDRestraintsGenerator: @@ -666,6 +683,18 @@ def analyse( restraint_idx=restraint_idx, ) + if restraint_type.lower() == "multiple_distance": + return RestraintSearch._multiple_distance_restraint( + u, + system, + temperature, + ligand_selection_str, + receptor_selection_str, + method, + work_dir, + cutoff, + ) + @staticmethod def _boresch_restraint( u, @@ -943,6 +972,116 @@ def _boresch_restraint_MDRestraintsGenerator( ) return restraint + @staticmethod + def _findOrderedPairs(u, ligand_selection_str, receptor_selection_str, cutoff): + """ + Return a list of receptor-ligand anchor atoms pairs in the form + (lig atom index, receptor atom index), where the pairs are ordered + from low to high variance of distance over the trajectory. + + Parameters + ---------- + + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + + ligand_selection_str: str + The selection string for the atoms in the ligand to consider + as potential anchor points. + + receptor_selection_str: str + The selection string for the protein in the ligand to consider + as potential anchor points. + + cutoff: BioSimSpace.Types.Length + The greatest distance between ligand and receptor anchor atoms. + Only affects behaviour when method == "BSS" Receptor anchors + further than cutoff Angstroms from the closest ligand anchors will not + be included in the search for potential anchor points. + + Returns + ------- + + pairs_ordered_sd : list of tuples + List of receptor-ligand atom pairs ordered by increasing variance of distance over + the trajectory. + """ + + lig_selection = u.select_atoms(ligand_selection_str) + pair_variance_dict = {} + + # Get all receptor atoms within specified distance of cutoff + for lig_atom in lig_selection: + for prot_atom in u.select_atoms( + f"{receptor_selection_str} and (around {cutoff / _angstrom} index {lig_atom.index})" + ): + pair_variance_dict[(lig_atom.index, prot_atom.index)] = {} + pair_variance_dict[(lig_atom.index, prot_atom.index)]["dists"] = [] + + # Compute Average Distance and SD + for frame in _tqdm( + u.trajectory, desc="Searching for low variance pairs. Frame no: " + ): + for lig_atom_index, prot_atom_index in pair_variance_dict.keys(): + distance = _dist( + _mda.AtomGroup([u.atoms[lig_atom_index]]), + _mda.AtomGroup([u.atoms[prot_atom_index]]), + box=frame.dimensions, + )[2][0] + pair_variance_dict[(lig_atom_index, prot_atom_index)]["dists"].append( + distance + ) + + # change lists to numpy arrays + for pair in pair_variance_dict.keys(): + pair_variance_dict[pair]["dists"] = _np.array( + pair_variance_dict[pair]["dists"] + ) + + # calculate SD + for pair in pair_variance_dict.keys(): + pair_variance_dict[pair]["sd"] = pair_variance_dict[pair]["dists"].std() + + # get n pairs with lowest SD + pairs_ordered_sd = [] + for item in sorted(pair_variance_dict.items(), key=lambda item: item[1]["sd"]): + pairs_ordered_sd.append(item[0]) + + if len(pairs_ordered_sd) == 0: + raise _AnalysisError( + "No receptor-ligand atom pairs found within specified cutoff." + ) + + return pairs_ordered_sd + + @staticmethod + def _getDistance(idx1, idx2, u): + """ + Get the distance between two atoms in a universe. + + Parameters + ---------- + idx1 : int + Index of the first atom + idx2 : int + Index of the second atom + u : MDAnalysis.Universe + The MDA universe containing the atoms and + trajectory. + + Returns + ------- + distance : float + The distance between the two atoms in Angstroms. + """ + distance = _dist( + _mda.AtomGroup([u.atoms[idx1]]), + _mda.AtomGroup([u.atoms[idx2]]), + box=u.dimensions, + )[2][0] + return distance + @staticmethod def _boresch_restraint_BSS( u, @@ -1021,85 +1160,6 @@ def _boresch_restraint_BSS( interactions. """ - def _findOrderedPairs(u, ligand_selection_str, receptor_selection_str, cutoff): - """ - Return a list of receptor-ligand anchor atoms pairs in the form - (lig atom index, receptor atom index), where the pairs are ordered - from low to high variance of distance over the trajectory. - - Parameters - ---------- - - u : MDAnalysis.Universe - The trajectory for the ABFE restraint calculation as a - MDAnalysis.Universe object. - - ligand_selection_str: str - The selection string for the atoms in the ligand to consider - as potential anchor points. - - receptor_selection_str: str - The selection string for the protein in the ligand to consider - as potential anchor points. - - cutoff: BioSimSpace.Types.Length - The greatest distance between ligand and receptor anchor atoms. - Only affects behaviour when method == "BSS" Receptor anchors - further than cutoff Angstroms from the closest ligand anchors will not - be included in the search for potential anchor points. - - Returns - ------- - - pairs_ordered_sd : list of tuples - List of receptor-ligand atom pairs ordered by increasing variance of distance over - the trajectory. - """ - - lig_selection = u.select_atoms(ligand_selection_str) - pair_variance_dict = {} - - # Get all receptor atoms within specified distance of cutoff - for lig_atom in lig_selection: - for prot_atom in u.select_atoms( - f"{receptor_selection_str} and (around {cutoff / _angstrom} index {lig_atom.index})" - ): - pair_variance_dict[(lig_atom.index, prot_atom.index)] = {} - pair_variance_dict[(lig_atom.index, prot_atom.index)]["dists"] = [] - - # Compute Average Distance and SD - for frame in _tqdm( - u.trajectory, desc="Searching for low variance pairs. Frame no: " - ): - for lig_atom_index, prot_atom_index in pair_variance_dict.keys(): - distance = _dist( - _mda.AtomGroup([u.atoms[lig_atom_index]]), - _mda.AtomGroup([u.atoms[prot_atom_index]]), - box=frame.dimensions, - )[2][0] - pair_variance_dict[(lig_atom_index, prot_atom_index)][ - "dists" - ].append(distance) - - # change lists to numpy arrays - for pair in pair_variance_dict.keys(): - pair_variance_dict[pair]["dists"] = _np.array( - pair_variance_dict[pair]["dists"] - ) - - # calculate SD - for pair in pair_variance_dict.keys(): - pair_variance_dict[pair]["sd"] = pair_variance_dict[pair]["dists"].std() - - # get n pairs with lowest SD - pairs_ordered_sd = [] - for item in sorted( - pair_variance_dict.items(), key=lambda item: item[1]["sd"] - ): - pairs_ordered_sd.append(item[0]) - - return pairs_ordered_sd - def _getAnchorAts(a1_idx, selection_str, u): """ Takes in index of anchor atom 1 (in either the receptor or ligand) @@ -1158,32 +1218,6 @@ def _getAnchorAts(a1_idx, selection_str, u): return a1_idx, a2_idx, a3_idx - def _getDistance(idx1, idx2, u): - """ - Get the distance between two atoms in a universe. - - Parameters - ---------- - idx1 : int - Index of the first atom - idx2 : int - Index of the second atom - u : MDAnalysis.Universe - The MDA universe containing the atoms and - trajectory. - - Returns - ------- - distance : float - The distance between the two atoms in Angstroms. - """ - distance = _dist( - _mda.AtomGroup([u.atoms[idx1]]), - _mda.AtomGroup([u.atoms[idx2]]), - box=u.dimensions, - )[2][0] - return distance - def _getAngle(idx1, idx2, idx3, u): """ Get the angle between three atoms in a universe. @@ -1243,7 +1277,7 @@ def _getDihedral(idx1, idx2, idx3, idx4, u): def _getBoreschDOF(l1, l2, l3, r1, r2, r3, u): """Calculate Boresch degrees of freedom from indices of anchor atoms""" # Ordering of connection of anchors is r3,r2,r1,l1,l2,l3 - r = _getDistance(r1, l1, u) + r = RestraintSearch._getDistance(r1, l1, u) thetaA = _getAngle(r2, r1, l1, u) thetaB = _getAngle(r1, l1, l2, u) phiA = _getDihedral(r3, r2, r1, l1, u) @@ -1696,7 +1730,7 @@ def _getBoreschRestraint(pair, boresch_dof_data): return restraint # Find pairs with lowest SD - pairs_ordered_sd = _findOrderedPairs( + pairs_ordered_sd = RestraintSearch._findOrderedPairs( u, ligand_selection_str, receptor_selection_str, cutoff ) @@ -1719,3 +1753,438 @@ def _getBoreschRestraint(pair, boresch_dof_data): ) return restraint + + @staticmethod + def _multiple_distance_restraint( + u, + system, + temperature, + ligand_selection_str, + receptor_selection_str, + method, + work_dir, + cutoff, + ): + """ + Generate the multiple distance restraints Restraint using the native + BioSimSpace implementation. This involves: + + 1. Finding the pairs of receptor-ligand atoms that have the lowest standard + deviation of distances + 2. Selecting pairs based on lowest standard deviation of distances and diversity + of direction. One pair is selected for each selected ligand atom. + 3. Deriving restraint parameters from the selected pairs so that the flat-bottomed + region encompasses 95 % of the sampled distances. The pair with the lowest + variance of the distance is selected to be the "permanent" distance restraint. + + Parameters + ---------- + + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + + system : :class:`System ` + The molecular system for the ABFE restraint calculation. This + must contain a single decoupled molecule and is assumed to have + already been equilibrated. + + temperature : :class:`System ` + The temperature of the system + + ligand_selection_str : str + The selection string for the atoms in the ligand to consider + as potential anchor points. + + receptor_selection_str : str + The selection string for the atoms in the receptor to consider + as potential anchor points. Uses the mdanalysis atom selection + language. + + method : str + The method to use to derive the restraints. Currently only 'BSS' + is implemented, which uses the native BioSimSpace derivation. + + work_dir : str + The working directory for the simulation. + + cutoff: BioSimSpace.Types.Length + The greatest distance between ligand and receptor anchor atoms. + Only affects behaviour when method == "BSS" Receptor anchors + further than cutoff Angstroms from the closest ligand anchors will not + be included in the search for potential anchor points. + + Returns + ------- + + restraint : :class:`Restraint ` + The restraints of `restraint_type` which best mimic the strongest receptor-ligand + interactions. + """ + if method == "MDRestraintsGenerator": + raise NotImplementedError( + "MDRestraintsGenerator is not implemented for multiple distance restraints." + ) + + elif method == "BSS": + return RestraintSearch._multiple_distance_restraint_BSS( + u, + system, + temperature, + ligand_selection_str, + receptor_selection_str, + work_dir, + cutoff, + ) + + else: + raise ValueError("method must be 'MDRestraintsGenerator' or 'BSS'.") + + @staticmethod + def _multiple_distance_restraint_BSS( + u, + system, + temperature, + ligand_selection_str, + receptor_selection_str, + work_dir, + cutoff, + ): + """ + Generate the multiple distance restraints Restraint using the native + BioSimSpace implementation. This involves: + + 1. Finding the pairs of receptor-ligand atoms that have the lowest standard + deviation of distances + 2. Selecting pairs based on lowest standard deviation of distances and diversity + of direction. One pair is selected for each selected ligand atom. + 3. Deriving restraint parameters from the selected pairs so that the flat-bottomed + region encompasses 95 % of the sampled distances. The pair with the lowest + variance of the distance is selected to be the "permanent" distance restraint. + + Parameters + ---------- + + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + + system : :class:`System ` + The molecular system for the ABFE restraint calculation. This + must contain a single decoupled molecule and is assumed to have + already been equilibrated. + + temperature : :class:`System ` + The temperature of the system + + ligand_selection_str: str + The selection string for the atoms in the ligand to consider + as potential anchor points. + + receptor_selection_str: str + The selection string for the protein in the ligand to consider + as potential anchor points. + + work_dir : str + The working directory for the simulation. + + cutoff: BioSimSpace.Types.Length + The greatest distance between ligand and receptor anchor atoms. + Only affects behaviour when method == "BSS" Receptor anchors + further than cutoff Angstroms from the closest ligand anchors will not + be included in the search for potential anchor points. + + Returns + ------- + + restraint : :class:`Restraint ` + The restraints of `restraint_type` which best mimic the strongest receptor-ligand + interactions. + """ + + def _get_norm_vector(frame, pair): + """ + Get the normalised interatomic vector between two atoms. + + Parameters + ---------- + frame : MDAnalysis.Timestep + The frame of the trajectory to use. + pair : tuple + The pair of atoms to consider. + + Returns + ------- + vector : np.ndarray + The normalised interatomic vector between the two atoms. + """ + inter_vec = frame.positions[pair[1]] - frame.positions[pair[0]] + # Check the length of this is not excessive (e.g. due to PBCs) + norm = _np.linalg.norm(inter_vec) + if norm > 15: + raise ValueError( + f"The interatomic vector between atoms {pair[0]} and {pair[1]} is " + f"excessive ({norm:.2f} A). This is likely due to PBCs. " + "Please check your input trajectory." + ) + else: + return inter_vec / norm + + def _clusterPairsByDirection(u, pairs_ordered_sd, n_clusters): + """ + Cluster the pairs of atoms by direction (based on the final structure). + + Parameters + ---------- + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + pairs_ordered_sd : list + The pairs of atoms ordered by SD. + n_clusters : int + The number of clusters to use. + + Returns + ------- + pairs_ordered_clustered : list[tuple] + The pairs of atoms ordered by SD and clustered by direction. + The third element of each tuple is the cluster number. + """ + # Get the normalised interatomic vectors for each pair of atoms, + # using the final frame of the trajectory. + vectors_dict = { + pair: _get_norm_vector(u.trajectory[-1], pair) + for pair in pairs_ordered_sd + } + # Cluster the vectors by direction, using k-means clustering. Predict the cluster + # indices at the same time. + cluster_indices = _KMeans(n_clusters=n_clusters, n_init=10).fit_predict( + list(vectors_dict.values()) + ) + + # Create a new list of pairs of atoms, ordered by SD and including the cluster number. + pairs_ordered_clustered = [ + (pair[0], pair[1], cluster_indices[i]) + for i, pair in enumerate(pairs_ordered_sd) + ] + + return pairs_ordered_clustered + + def _filterPairsByCluster(pairs_ordered_clustered, selected_ligand_atoms): + """ + Filter the pairs of atoms by cluster. This is done by checking each pair + in order of decreasing variance. If the ligand heavy atom has not already + been used in a restraint and no other restraints from the given directional + cluster have been selected, the restraint is accepted. + + Parameters + ---------- + pairs_ordered_clustered : list[tuple] + The pairs of atoms ordered by SD and clustered by direction. + The first and second elements of each tuple are the atom indices. + The third element of each tuple is the cluster number. + + selected_ligand_atoms : list[int] + The indices of the selected ligand atoms. + + Returns + ------- + pairs_ordered_clustered_filtered : list[tuple] + The pairs of atoms ordered by SD and clustered by direction, filtered + by cluster. The tuples contain the atom indices only. + """ + filtered_pairs = [] + selected_clusters = [] + available_ligand_atoms = [at.index for at in selected_ligand_atoms] + + # Loop over the pairs of atoms in order of decreasing variance. + for pair in pairs_ordered_clustered: + # If the ligand atom has not already been used in a restraint and no + # other restraints from the given directional cluster have been selected, + # accept the restraint. + if ( + pair[0] in available_ligand_atoms + and pair[2] not in selected_clusters + ): + filtered_pairs.append((pair[0], pair[1])) + selected_clusters.append(pair[2]) + available_ligand_atoms.remove(pair[0]) + + if len(available_ligand_atoms) == 0: + return filtered_pairs + + # If not all ligand atoms have been used in a restraint, raise an error. + raise _AnalysisError( + "Not all ligand atoms have been used in a restraint. Try increasing the number of " + "candidate pairs." + ) + + def _plotDistanceRestraints(distance_dict): + """ + Plot the variation of the the distances for each pair + over the trajectory. + + Parameters + ---------- + distance_dict : dict + A dictionary containing the distances for each pair, where + the keys are the pair indices and the values are lists of + distances in Angstroms. + """ + n_pairs = len(distance_dict) + n_columns = 6 + n_rows = int(_np.ceil(n_pairs / n_columns)) + + # Plot histograms + fig, axs = _plt.subplots( + n_rows, n_columns, figsize=(2.7 * n_columns, 4 * n_rows), dpi=500 + ) + axs = axs.flatten() + for i, pair in enumerate(distance_dict): + axs[i].hist(distance_dict[pair], bins=10, density=True) + axs[i].axvline( + x=_np.mean(distance_dict[pair]), + color="r", + linestyle="dashed", + linewidth=2, + label="mean", + ) + axs[i].set_xlabel(f"({pair[0]}, {pair[1]}) Distance / $\mathrm{{\AA}}$") + axs[i].set_ylabel("Probability") + if i == n_pairs - 1: # Only add legend to last plot + axs[i].legend() + fig.tight_layout() + fig.savefig( + f"{work_dir}/distance_restraints_hist.png", + facecolor="white", + ) + _plt.close(fig) + + # Plot variation with time to see if there are slow DOF + fig, axs = _plt.subplots( + n_rows, n_columns, figsize=(2.7 * n_columns, 4 * n_rows), dpi=500 + ) + axs = axs.flatten() + for i, pair in enumerate(distance_dict): + axs[i].plot( + list(range(len(distance_dict[pair]))), + distance_dict[pair], + ) + axs[i].axhline( + y=_np.mean(distance_dict[pair]), + color="r", + linestyle="dashed", + linewidth=2, + label="mean", + ) # No need to add legend as has been + # added to histograms + axs[i].set_ylabel(f"({pair[0]}, {pair[1]}) Distance / $\mathrm{{\AA}}$") + axs[i].set_xlabel("Frame Index") + fig.tight_layout() + fig.savefig( + f"{work_dir}/distance_restraints_time.png", + facecolor="white", + ) + _plt.close(fig) + + def _getRestraintDict(u, pairs_ordered): + """ + Generate the multiple distance restraints dictionary given + a list of pairs of atoms to restrain, ordered by decreasing + variance of intramolecular distance. The first pair will be + selected to be the "permanent" distance restraint. The restraints + are selected so that the flat-bottomed region covers 95 % of + the distances sampled during the trajectory. + + Parameters + ---------- + u : MDAnalysis.Universe + The trajectory for the ABFE restraint calculation as a + MDAnalysis.Universe object. + + pairs_ordered : list[tuple] + The pairs of atoms ordered by SD. The first and second + elements of each tuple are the atom indices. + + Returns + ------- + restraint_dict : dict + The multiple distance restraints dictionary. + """ + # For each pair, get a list of the distances over the trajectory. + distances = {pair: [] for pair in pairs_ordered} + for _ in u.trajectory: + for pair in pairs_ordered: + distances[pair].append( + RestraintSearch._getDistance(pair[0], pair[1], u) + ) + + # Plot the distances over the trajectory. + _plotDistanceRestraints(distances) + + # For each pair, get the restraint parameters and add them to the + # restraint dictionary. + restraint_dict = {"distance_restraints": []} + for i, pair in enumerate(pairs_ordered): + r1 = system.getAtom(int(pair[1])) + l1 = system.getAtom(int(pair[0])) + r0 = _np.mean(distances[pair]) * _angstrom # Equilibrium distance + kr = 40 * _kcal_per_mol / (_angstrom**2) # Default of 40 kcal/mol/A^2 + r_fb = ( + _np.percentile(distances[pair], 97.5) + - _np.percentile(distances[pair], 2.5) + ) * _angstrom # Flat-bottomed region + individual_restraint_dict = { + "r1": r1, + "l1": l1, + "r0": r0, + "kr": kr, + "r_fb": r_fb, + } + # If this is the first pair, add it as the permanent distance restraint. + if i == 0: + restraint_dict[ + "permanent_distance_restraint" + ] = individual_restraint_dict + else: + restraint_dict["distance_restraints"].append( + individual_restraint_dict + ) + + return restraint_dict + + # Find pairs with lowest SD, ordered by SD. + pairs_ordered_sd = RestraintSearch._findOrderedPairs( + u, ligand_selection_str, receptor_selection_str, cutoff + ) + + # Cluster pairs according to direction (based on initial structure). Choose + # the number of clusters to be equal to the number of selected heavy atoms in the ligand. + selected_ligand_atoms = u.select_atoms(ligand_selection_str).select_atoms( + "not name H*" + ) + n_lig_heavy_atoms = len(selected_ligand_atoms) + pairs_ordered_clustered = _clusterPairsByDirection( + u, pairs_ordered_sd, n_clusters=n_lig_heavy_atoms + ) + + # Check each pair in order of reverse variance. If the ligand heavy atom has not + # already been used in a restraint and no other restraints from the given cluster + # have been selected, accept the restraint. + pairs_ordered_clustered_filtered = _filterPairsByCluster( + pairs_ordered_clustered, selected_ligand_atoms + ) + + # Get restraint parameters for each pair by selecting the flat-bottomed region + # to encompass 95 % of sampled configurations. This also plots the data. + restraint_dict = _getRestraintDict(u, pairs_ordered_clustered_filtered) + + # Create the restraint object and return it. + restraint = _Restraint( + system, + restraint_dict, + temperature=temperature, + restraint_type="multiple_distance", + ) + + return restraint diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py index 4bd2a5762..9bec1085e 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py @@ -37,7 +37,7 @@ def setup_system(): return ligand, decouple_system, protocol -# Make sure GROMSCS is installed. +# Make sure GROMACS is installed. has_gromacs = BSS._gmx_exe is not None @@ -169,7 +169,7 @@ def _restraint_search(tmp_path_factory): @staticmethod @pytest.fixture(scope="class") - def restraint(_restraint_search): + def boresch_restraint(_restraint_search): restraint_search, outdir = _restraint_search restraint = restraint_search.analyse( method="BSS", restraint_type="Boresch", block=False @@ -178,7 +178,7 @@ def restraint(_restraint_search): @staticmethod @pytest.fixture(scope="class") - def restraint_k20(_restraint_search): + def boresch_restraint_k20(_restraint_search): restraint_search, outdir = _restraint_search restraint = restraint_search.analyse( method="BSS", @@ -188,30 +188,39 @@ def restraint_k20(_restraint_search): ) return restraint, outdir - def test_sanity(self, restraint): - restraint, _ = restraint + @staticmethod + @pytest.fixture(scope="class") + def multiple_distance_restraint(_restraint_search): + restraint_search, outdir = _restraint_search + restraint = restraint_search.analyse( + method="BSS", restraint_type="multiple_distance", block=False + ) + return restraint, outdir + + def test_sanity(self, boresch_restraint): + restraint, _ = boresch_restraint assert isinstance(restraint, Restraint) - def test_plots(self, restraint): + def test_plots_boresch(self, boresch_restraint): """Test if all the plots have been generated correctly""" - restraint, outdir = restraint + restraint, outdir = boresch_restraint assert (outdir / "restraint_idx0_dof_time.png").is_file() assert (outdir / "restraint_idx0_dof_hist.png").is_file() - def test_dG_off(self, restraint): + def test_dG_off_boresch(self, boresch_restraint): """Test if the restraint generated has the same energy""" - restraint, _ = restraint + restraint, _ = boresch_restraint assert np.isclose(-9.8955, restraint.correction.value(), atol=0.01) - def test_bond(self, restraint): - restraint, _ = restraint + def test_bond_boresch(self, boresch_restraint): + restraint, _ = boresch_restraint equilibrium_values_r0 = ( restraint._restraint_dict["equilibrium_values"]["r0"] / nanometer ) assert np.isclose(0.6057, equilibrium_values_r0, atol=0.001) - def test_angles(self, restraint): - restraint, _ = restraint + def test_angles_boresch(self, boresch_restraint): + restraint, _ = boresch_restraint equilibrium_values_thetaA0 = ( restraint._restraint_dict["equilibrium_values"]["thetaA0"] / degree ) @@ -221,8 +230,8 @@ def test_angles(self, restraint): ) assert np.isclose(56.4496, equilibrium_values_thetaB0, atol=0.001) - def test_dihedrals(self, restraint): - restraint, _ = restraint + def test_dihedrals_boresch(self, boresch_restraint): + restraint, _ = boresch_restraint equilibrium_values_phiA0 = ( restraint._restraint_dict["equilibrium_values"]["phiA0"] / degree ) @@ -236,20 +245,20 @@ def test_dihedrals(self, restraint): ) assert np.isclose(71.3148, equilibrium_values_phiC0, atol=0.001) - def test_index(self, restraint): - restraint, _ = restraint + def test_index_boresch(self, boresch_restraint): + restraint, _ = boresch_restraint idxs = { k: restraint._restraint_dict["anchor_points"][k].index() for k in restraint._restraint_dict["anchor_points"] } assert idxs == {"r1": 1560, "r2": 1558, "r3": 1562, "l1": 10, "l2": 9, "l3": 11} - def test_force_constant(self, restraint_k20): - restraint, _ = restraint_k20 + def test_force_constant_boresch(self, boresch_restraint_k20): + restraint, _ = boresch_restraint_k20 for force_constant in restraint._restraint_dict["force_constants"].values(): assert np.isclose(20, force_constant.value(), atol=0.01) - def test_analysis_failure(self, _restraint_search): + def test_analysis_failure_boresch(self, _restraint_search): restraint_search, _ = _restraint_search with pytest.raises(AnalysisError): restraint = restraint_search.analyse( @@ -258,3 +267,28 @@ def test_analysis_failure(self, _restraint_search): block=False, cutoff=0.1 * angstrom, ) + + def test_plots_mdr(self, multiple_distance_restraint): + """Test if all the plots have been generated correctly""" + restraint, outdir = multiple_distance_restraint + assert (outdir / "distance_restraints_time.png").is_file() + assert (outdir / "distance_restraints_hist.png").is_file() + + def test_dG_off_mdr(self, multiple_distance_restraint): + """Test if the restraint generated has the same energy""" + restraint, _ = multiple_distance_restraint + assert np.isclose(-0.0790, restraint.correction.value(), atol=0.01) + + # TODO: Check that the atoms picked are always the same (once the resraint search algorithm is stable) + + # TODO: Test the dict parameters are the same (once the resraint search algorithm is stable) + + def test_analysis_failure_mdr(self, _restraint_search): + restraint_search, _ = _restraint_search + with pytest.raises(AnalysisError): + restraint = restraint_search.analyse( + method="BSS", + restraint_type="multiple_distance", + block=False, + cutoff=0.1 * angstrom, + ) From 8cd398624eacaf256bfc22a20a1ddc597b26739b Mon Sep 17 00:00:00 2001 From: Finlay Clark Date: Tue, 15 Aug 2023 15:51:06 +0000 Subject: [PATCH 08/19] Ensure that atom record is always written --- python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py index 33a0182ef..66b1412b1 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py @@ -1275,6 +1275,9 @@ def atom_sorting_criteria(atom): # Get the perturbed atom. atom = mol.atom(idx) + # Start atom record. + file.write(" atom\n") + # Only require the initial Lennard-Jones properties. LJ0 = atom.property("LJ0") @@ -1348,6 +1351,9 @@ def atom_sorting_criteria(atom): # Get the perturbed atom. atom = mol.atom(idx) + # Start atom record. + file.write(" atom\n") + # Only require the final Lennard-Jones and charge properties. LJ1 = atom.property("LJ1") charge1_value = atom.property("charge1").value() From eb41a907ddd5d47b986645244bc64e0eac211f1e Mon Sep 17 00:00:00 2001 From: Finlay Clark Date: Fri, 1 Sep 2023 10:04:46 +0000 Subject: [PATCH 09/19] Correct SOMD pert files Ensure that "endmolecule" is written for all SOMD pert files, and make the tests on the SOMD pert files more detailed (added checks for start and end atom and molecule lines). --- .../Sandpit/Exscientia/Process/_somd.py | 4 ++-- .../Exscientia/Protocol/test_config.py | 24 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py index 66b1412b1..cbf663afc 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_somd.py @@ -3082,8 +3082,8 @@ def sort_dihedrals(dihedrals, idx): # End improper record. file.write(" endimproper\n") - # End molecule record. - file.write("endmolecule\n") + # End molecule record. + file.write("endmolecule\n") # Finally, convert the molecule to the lambda = 0 state. diff --git a/tests/Sandpit/Exscientia/Protocol/test_config.py b/tests/Sandpit/Exscientia/Protocol/test_config.py index 9f7e2d1a1..471976d22 100644 --- a/tests/Sandpit/Exscientia/Protocol/test_config.py +++ b/tests/Sandpit/Exscientia/Protocol/test_config.py @@ -423,12 +423,16 @@ def test_turn_on_restraint_boresch(self, system_and_boresch_restraint): assert atom in pert_text # Check perturbations are correct lines = [ + "molecule LIG", + "atom", "initial_type C1", "final_type C1", "initial_LJ 3.48065 0.08688", "final_LJ 3.48065 0.08688", "initial_charge -0.13000", "final_charge -0.13000", + "endatom", + "endmolecule", ] for line in lines: assert line in pert_text @@ -462,12 +466,16 @@ def test_turn_on_restraint_mdr(self, system_and_mdr_restraint): assert atom in pert_text # Check perturbations are correct lines = [ + "molecule LIG", + "atom", "initial_type C1", "final_type C1", "initial_LJ 3.48065 0.08688", "final_LJ 3.48065 0.08688", "initial_charge -0.13000", "final_charge -0.13000", + "endatom", + "endmolecule", ] for line in lines: assert line in pert_text @@ -502,12 +510,16 @@ def test_release_restraint_mdr(self, system_and_mdr_restraint): # and the atom types are all du # Check perturbations are correct lines = [ + "molecule LIG", + "atom", "initial_type du", "final_type du", "initial_LJ 0.00000 0.00000", "final_LJ 0.00000 0.00000", "initial_charge 0.00000", "final_charge 0.00000", + "endatom", + "endmolecule", ] for line in lines: assert line in pert_text @@ -542,12 +554,16 @@ def test_discharge(self, system_and_boresch_restraint): assert atom in pert_text # Check perturbations are correct lines = [ + "molecule LIG", + "atom", "initial_type C1", "final_type C1", "initial_LJ 3.48065 0.08688", "final_LJ 3.48065 0.08688", "initial_charge -0.13000", "final_charge -0.00000", + "endatom", + "endmolecule", ] for line in lines: assert line in pert_text @@ -582,12 +598,16 @@ def test_vanish(self, system_and_boresch_restraint): assert atom in pert_text # Check perturbations are correct lines = [ + "molecule LIG", + "atom", "initial_type C1", "final_type du", "initial_LJ 3.48065 0.08688", "final_LJ 0.00000 0.00000", "initial_charge -0.00000", "final_charge -0.00000", + "endatom", + "endmolecule", ] for line in lines: assert line in pert_text @@ -622,12 +642,16 @@ def test_discharge_and_vanish(self, system_and_boresch_restraint): assert atom in pert_text # Check perturbations are correct lines = [ + "molecule LIG", + "atom", "initial_type C1", "final_type du", "initial_LJ 3.48065 0.08688", "final_LJ 0.00000 0.00000", "initial_charge -0.13000", "final_charge 0.00000", + "endatom", + "endmolecule", ] for line in lines: assert line in pert_text From 0fe2fe4a841abfee79a488829862a7226cae9690 Mon Sep 17 00:00:00 2001 From: Finlay Clark Date: Fri, 1 Sep 2023 12:47:05 +0000 Subject: [PATCH 10/19] Fix sandpit CI --- .github/workflows/Sandpit_exs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Sandpit_exs.yml b/.github/workflows/Sandpit_exs.yml index fa90be7ec..a3207315d 100644 --- a/.github/workflows/Sandpit_exs.yml +++ b/.github/workflows/Sandpit_exs.yml @@ -35,7 +35,7 @@ jobs: - name: Install dependency run: | - mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.8 ambertools gromacs "sire=2023.2.2" alchemlyb pytest pyarrow "typing-extensions!=4.6.0" openff-interchange pint=0.21 "rdkit<2023" "jaxlib>0.3.7" tqdm scikit-learn + mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.8 ambertools gromacs "sire>=2023.2" alchemlyb pytest pyarrow "typing-extensions!=4.6.0" openff-interchange pint=0.21 "rdkit<2023" "jaxlib>0.3.7" tqdm python -m pip install git+https://github.com/Exscientia/MDRestraintsGenerator.git # For the testing of BSS.FreeEnergy.AlchemicalFreeEnergy.analysis python -m pip install https://github.com/alchemistry/alchemtest/archive/master.zip From 8e722eb7ba1f9769d7e83e6637fa2c79599916d4 Mon Sep 17 00:00:00 2001 From: Finlay Clark Date: Fri, 1 Sep 2023 13:30:22 +0000 Subject: [PATCH 11/19] Fix issue with checking alchemlyb version number --- tests/Sandpit/Exscientia/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Sandpit/Exscientia/conftest.py b/tests/Sandpit/Exscientia/conftest.py index eb88cf301..7facc4362 100644 --- a/tests/Sandpit/Exscientia/conftest.py +++ b/tests/Sandpit/Exscientia/conftest.py @@ -63,7 +63,7 @@ has_alchemlyb = True # Check for parquet support. - major, minor, _ = alchemlyb.__version__.split(".") + major, minor, *_ = alchemlyb.__version__.split(".") major = int(major) minor = int(minor) if major < 2 or (major < 3 and minor < 1): From 514370eae924633bc3ebc296255171babe9ac49a Mon Sep 17 00:00:00 2001 From: Finlay Clark Date: Thu, 21 Sep 2023 14:44:26 +0000 Subject: [PATCH 12/19] Make restraint search more robust for multiple distance restraints Increase the number of directional clusters until the restraint search succeeds or hits the maximum. --- .../FreeEnergy/_restraint_search.py | 50 +++++++++++++------ 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py index 249bb9380..79ebcc3f9 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint_search.py @@ -2013,11 +2013,8 @@ def _filterPairsByCluster(pairs_ordered_clustered, selected_ligand_atoms): if len(available_ligand_atoms) == 0: return filtered_pairs - # If not all ligand atoms have been used in a restraint, raise an error. - raise _AnalysisError( - "Not all ligand atoms have been used in a restraint. Try increasing the number of " - "candidate pairs." - ) + # If not all ligand atoms have been used in a restraint, return nothing. + return None def _plotDistanceRestraints(distance_dict): """ @@ -2053,6 +2050,10 @@ def _plotDistanceRestraints(distance_dict): axs[i].set_ylabel("Probability") if i == n_pairs - 1: # Only add legend to last plot axs[i].legend() + # Hide unused axes + if i >= n_pairs: + axs[i].axis("off") + fig.tight_layout() fig.savefig( f"{work_dir}/distance_restraints_hist.png", @@ -2163,17 +2164,36 @@ def _getRestraintDict(u, pairs_ordered): selected_ligand_atoms = u.select_atoms(ligand_selection_str).select_atoms( "not name H*" ) - n_lig_heavy_atoms = len(selected_ligand_atoms) - pairs_ordered_clustered = _clusterPairsByDirection( - u, pairs_ordered_sd, n_clusters=n_lig_heavy_atoms - ) + # Choose initial number of clusters to to the number of selected heavy atoms in the ligand. + n_clusters_initial = len(selected_ligand_atoms) + n_clusters_max = 3 * n_clusters_initial + n_clusters_to_try = [ + round(n) for n in _np.linspace(n_clusters_initial, n_clusters_max, 5) + ] + + for n_clusters in n_clusters_to_try: + pairs_ordered_clustered = _clusterPairsByDirection( + u, pairs_ordered_sd, n_clusters=n_clusters + ) - # Check each pair in order of reverse variance. If the ligand heavy atom has not - # already been used in a restraint and no other restraints from the given cluster - # have been selected, accept the restraint. - pairs_ordered_clustered_filtered = _filterPairsByCluster( - pairs_ordered_clustered, selected_ligand_atoms - ) + # Check each pair in order of reverse variance. If the ligand heavy atom has not + # already been used in a restraint and no other restraints from the given cluster + # have been selected, accept the restraint. + pairs_ordered_clustered_filtered = _filterPairsByCluster( + pairs_ordered_clustered, selected_ligand_atoms + ) + + if pairs_ordered_clustered_filtered is not None: + break + + if ( + n_clusters_to_try[-1] == n_clusters + and pairs_ordered_clustered_filtered is None + ): + raise _AnalysisError( + "Could not find suitable multiple distance restraints. " + "Please try increasing the number of candidate pairs." + ) # Get restraint parameters for each pair by selecting the flat-bottomed region # to encompass 95 % of sampled configurations. This also plots the data. From f5d1743185652b18c8c711f90977705b9041d61e Mon Sep 17 00:00:00 2001 From: Finlay Clark Date: Mon, 2 Oct 2023 16:26:26 +0000 Subject: [PATCH 13/19] Add more tests of mdr restraint search algorithm --- .../Exscientia/FreeEnergy/test_restraint.py | 6 ++- .../FreeEnergy/test_restraint_search.py | 45 +++++++++++++++++-- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py index e442dce37..1a74d23c5 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py @@ -329,8 +329,10 @@ def mdr_restraint_component_fb_r0(): @pytest.fixture(scope="session") def mdr_restraint_fb_r0(mdr_restraint_component_fb_r0): - """Generate the multiple distance restraints restraint object with - the values for r0 and r_fb set to 0 for the permanent restraint.""" + """ + Generate the multiple distance restraints restraint object with + the values for r0 and r_fb set to 0 for the permanent restraint. + """ system, restraint_dict = mdr_restraint_component_fb_r0 restraint = Restraint( diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py index 9bec1085e..5103a8263 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py @@ -277,11 +277,50 @@ def test_plots_mdr(self, multiple_distance_restraint): def test_dG_off_mdr(self, multiple_distance_restraint): """Test if the restraint generated has the same energy""" restraint, _ = multiple_distance_restraint - assert np.isclose(-0.0790, restraint.correction.value(), atol=0.01) + assert np.isclose(-0.0790, restraint.correction.value(), atol=0.001) - # TODO: Check that the atoms picked are always the same (once the resraint search algorithm is stable) + def test_index_mdr(self, multiple_distance_restraint): + """Check that the same atoms are selected for the distance restraint.""" + restraint, _ = multiple_distance_restraint + idxs_receptor = [ + pair["r1"].index() + for pair in restraint._restraint_dict["distance_restraints"] + ] + idxs_ligand = [ + pair["l1"].index() + for pair in restraint._restraint_dict["distance_restraints"] + ] + # The search algorithm is not deterministic, so the receptor anchor points + # are not always the same. The ligand anchor points are always the same as + # all heavy atoms are used. + expected_idxs_ligand = [13, 9, 16, 12, 11, 14, 15, 8, 17].sort() + assert len(idxs_receptor) == len(idxs_ligand) + assert idxs_ligand.sort() == expected_idxs_ligand + + # Check the permanent distance restraint - this is always the same + assert ( + restraint._restraint_dict["permanent_distance_restraint"]["r1"].index() + == 1539 + ) + assert ( + restraint._restraint_dict["permanent_distance_restraint"]["l1"].index() + == 10 + ) - # TODO: Test the dict parameters are the same (once the resraint search algorithm is stable) + def test_parameters_mdr(self, multiple_distance_restraint): + """ + Check the parameters selected for the permanent distance restraint. + Other restraints are not tested as the multiple distance restraint + selection algorithm is not deterministic. + """ + restraint, _ = multiple_distance_restraint + permanent_restraint = restraint._restraint_dict["permanent_distance_restraint"] + assert np.isclose(8.9019, permanent_restraint["r0"] / angstrom, atol=0.001) + assert np.isclose( + 40, + permanent_restraint["kr"] / (kcal_per_mol / angstrom**2), + ) + assert np.isclose(0.5756, permanent_restraint["r_fb"] / angstrom, atol=0.001) def test_analysis_failure_mdr(self, _restraint_search): restraint_search, _ = _restraint_search From 9971a7b049804850c119c63e5229548093dbb8c4 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 14 Nov 2023 09:50:08 +0000 Subject: [PATCH 14/19] Add tests for MDR restraints dict --- .../FreeEnergy/test_restraint_search.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py index 9bec1085e..d6a168d7b 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint_search.py @@ -279,7 +279,27 @@ def test_dG_off_mdr(self, multiple_distance_restraint): restraint, _ = multiple_distance_restraint assert np.isclose(-0.0790, restraint.correction.value(), atol=0.01) - # TODO: Check that the atoms picked are always the same (once the resraint search algorithm is stable) + def test_dict_mdr(self, multiple_distance_restraint): + """Check that the MDR restraint dictionary is correct""" + restraint, _ = multiple_distance_restraint + restr_dict = restraint._restraint_dict + # Check the permanent distance restraint - this should always be the same. + assert restr_dict["permanent_distance_restraint"]["r1"].index() == 1539 + assert restr_dict["permanent_distance_restraint"]["l1"].index() == 10 + assert restr_dict["permanent_distance_restraint"]["r0"].unit() == "ANGSTROM" + # Check close + assert restr_dict["permanent_distance_restraint"][ + "r0" + ].value() == pytest.approx(8.9019, abs=1e-4) + assert restr_dict["permanent_distance_restraint"]["kr"].unit() == "M Q-1 T-2" + assert restr_dict["permanent_distance_restraint"]["kr"].value() == 40.0 + assert restr_dict["permanent_distance_restraint"]["r_fb"].unit() == "ANGSTROM" + assert restr_dict["permanent_distance_restraint"][ + "r_fb" + ].value() == pytest.approx(0.5756, abs=1e-4) + # Check the distance restraints - these will vary slightly due to random initialisation + # of the clustering algorithm during restraint searching, so we just check the number. + assert len(restr_dict["distance_restraints"]) == 9 # TODO: Test the dict parameters are the same (once the resraint search algorithm is stable) From edf8294d09d589f8c58756f1411200810cce39cb Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 14 Nov 2023 12:11:05 +0000 Subject: [PATCH 15/19] Implement multiple distance restraints for GROMACS This is done by using restraint potentials (function type 10) for the flat-bottomed restraints which are affected by lambda- restraint, and a distance restraint for the permanent restraint which is not affected by any lambdas. See https://manual.gromacs.org/documentation/2019-rc1/reference-manual/topologies/topology-file-formats.html#id31 for details. Note that the distance restraint requires the force constant to be written to the mdp file. --- .github/workflows/Sandpit_exs.yml | 2 +- .../FreeEnergy/_alchemical_free_energy.py | 13 +- .../Exscientia/FreeEnergy/_restraint.py | 132 +++++++++++++++++- .../Sandpit/Exscientia/Process/_gromacs.py | 34 ++++- .../Sandpit/Exscientia/Protocol/_config.py | 42 +++++- .../Exscientia/FreeEnergy/test_restraint.py | 99 ++++++++++++- .../Exscientia/Protocol/test_config.py | 129 ++++++++++------- 7 files changed, 388 insertions(+), 63 deletions(-) diff --git a/.github/workflows/Sandpit_exs.yml b/.github/workflows/Sandpit_exs.yml index 8c8558cf1..aa4856674 100644 --- a/.github/workflows/Sandpit_exs.yml +++ b/.github/workflows/Sandpit_exs.yml @@ -35,7 +35,7 @@ jobs: - name: Install dependency run: | - mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.3" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm scikit-learn + mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.3.0" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm scikit-learn python -m pip install git+https://github.com/Exscientia/MDRestraintsGenerator.git # For the testing of BSS.FreeEnergy.AlchemicalFreeEnergy.analysis python -m pip install https://github.com/alchemistry/alchemtest/archive/master.zip diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py index 0be2de619..731e269da 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py @@ -258,12 +258,23 @@ def __init__( "Use the 'BioSimSpace.Align' package to map and merge molecules." ) - if self._protocol.getPerturbationType() != "full": + pert_type = self._protocol.getPerturbationType() + if pert_type not in ["full", "release_restraint"]: raise NotImplementedError( "GROMACS currently only supports the 'full' perturbation " "type. Please use engine='SOMD' when running multistep " "perturbation types." ) + if pert_type == "release_restraint": + restraint_err = ValueError( + "The 'release_restraint' perturbation type requires a multiple " + "distance restraint restraint type." + ) + if not restraint: + raise restraint_err + if restraint._restraint_type != "multiple_distance": + raise restraint_err + self._exe = _gmx_exe elif engine == "AMBER": # Find a molecular dynamics engine and executable. diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py index 8cb1ba4bf..499846255 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_restraint.py @@ -92,7 +92,7 @@ class Restraint: The multiple distance restraints are flat-bottom restraints, which are represented as a dictionaries of the form: - distance_restraint_dict = {rl: BioSimSpace._SireWrappers.Atom, + distance_restraint_dict = {r1: BioSimSpace._SireWrappers.Atom, l1: BioSimSpace._SireWrappers.Atom, r0: BioSimSpace.Types.Length, kr: BioSimSpace.Types.Energy / BioSimSpace.Types.Area, @@ -113,7 +113,7 @@ class Restraint: # Create a dict of supported restraints and compatible engines. supported_restraints = { "boresch": ["gromacs", "somd"], - "multiple_distance": ["somd"], + "multiple_distance": ["gromacs", "somd"], } def __init__(self, system, restraint_dict, temperature, restraint_type="Boresch"): @@ -486,6 +486,129 @@ def write_dihedral(key_list, equilibrium_values, force_constants): return "\n".join(output) + def _gromacs_multiple_distance(self, perturbation_type=None): + """ + Format the Gromacs string for multiple distance restraints. + + Parameters + ---------- + perturbation_type : str, optional, default=None + The type of perturbation to applied during the current stage of the free energy + calculation. If the perturbation type is "release_restraint", the permanent distance + restraint will be written as a distance restraint (topology file directive [ distance + restraints ], not affected by any lambda values), while all other restraints will be + written as restraint potentials (topology file directive [ bonds ], affected by bonded-lambda. + This allows the bond restraints to be released while retaining the permanent distance restraint. + For all other perturbation types, all restraints will be written as restraint potential bonds. + + Returns + ------- + str + The Gromacs string for the multiple distance restraints. + """ + + def _get_distance_restraint_restraint_str(r1, l1, r0, r_fb, kr): + """ + Get the text line specifying a distance restraint restraint + (unaffected by any lambdas). + """ + # Calculate parameters. + ai = self._system.getIndex(r1) + 1 + aj = self._system.getIndex(l1) + 1 + low = r0 - r_fb + up1 = r0 + r_fb + up2 = 100 # Set this unresonably high so that we never get a linear potential. + + # Format strings, remembering to convert units. + restr_type_str = "2" # No averaging + restr_index_str = "0" # Only one restraint + low_str = "{:.3f}".format(low / _nanometer) + up1_str = "{:.3f}".format(up1 / _nanometer) + up2_str = "{:.3f}".format(up2) + fac_str = ( + "1.0" # Scale the force constant (specified in the mdp file) by 1.0. + ) + + # Format entire string. + # ai, aj, type, index, type', low, up1, up2, fac + distance_restraint_restraint_parameter_string = f" {ai:<10} {aj:<10} {restr_type_str:<10} {restr_index_str:<10} {restr_type_str:<10} {low_str:<10} {up1_str:<10} {up2_str:<10} {fac_str:<10}" + + return distance_restraint_restraint_parameter_string + + def _get_restraint_potential_bond_str(r1, l1, r0, r_fb, kr): + """ + Get the text line specifying a restraint potential bond + (affected by bonded-lambda). + """ + # Calculate parameters. + ai = self._system.getIndex(r1) + 1 + aj = self._system.getIndex(l1) + 1 + low = r0 - r_fb + up1 = r0 + r_fb + up2 = 100 # Set this unresonably high so that we never get a linear potential. + + # Format strings, remembering to convert units. + restr_type_str = "10" # Restraint potential bond + low_str = "{:.3f}".format(low / _nanometer) + up1_str = "{:.3f}".format(up1 / _nanometer) + up2_str = "{:.3f}".format(up2) + kdr_0_str = "{:.2f}".format( + 0 + ) # Force constant is 0 when bonded-lambda = 0. + kdr_str = "{:.2f}".format(kr / (_kj_per_mol / _nanometer**2)) + + # Format entire string. + # ai, aj, type, lowA, up1A, up2A, kdrA, lowB, up1B, up2B, kdrB + restraint_potential_bond_parameter_string = f" {ai:<10} {aj:<10} {restr_type_str:<10} {low_str:<10} {up1_str:<10} {up2_str:<10} {kdr_0_str:<10} {low_str:<10} {up1_str:<10} {up2_str:<10} {kdr_str:<10}" + + return restraint_potential_bond_parameter_string + + # Write the output string. + output = [ + "[ intermolecular_interactions ]", + ] + + output.append("[ bonds ]") + output.append( + "; ai aj type lowA up1A up2A kdrA lowB up1B up2B kdrB" + ) + for restraint in self._restraint_dict["distance_restraints"]: + output.append( + _get_restraint_potential_bond_str( + restraint["r1"], + restraint["l1"], + restraint["r0"], + restraint["r_fb"], + restraint["kr"], + ) + ) + if perturbation_type != "release_restraint": + output.append( + _get_restraint_potential_bond_str( + self._restraint_dict["permanent_distance_restraint"]["r1"], + self._restraint_dict["permanent_distance_restraint"]["l1"], + self._restraint_dict["permanent_distance_restraint"]["r0"], + self._restraint_dict["permanent_distance_restraint"]["r_fb"], + self._restraint_dict["permanent_distance_restraint"]["kr"], + ) + ) + else: # Perturbation type is release_restraint - write the permanent restraint as a distance restraint + output.append("[ distance_restraints ]") + output.append( + "; ai aj type index type' low up1 up2 fac" + ) + output.append( + _get_distance_restraint_restraint_str( + self._restraint_dict["permanent_distance_restraint"]["r1"], + self._restraint_dict["permanent_distance_restraint"]["l1"], + self._restraint_dict["permanent_distance_restraint"]["r0"], + self._restraint_dict["permanent_distance_restraint"]["r_fb"], + self._restraint_dict["permanent_distance_restraint"]["kr"], + ) + ) + + return "\n".join(output) + def _somd_boresch(self, perturbation_type=None): """Format the SOMD string for the Boresch restraints.""" @@ -601,7 +724,10 @@ def toString(self, engine, perturbation_type=None): """ to_str_functions = { "boresch": {"gromacs": self._gromacs_boresch, "somd": self._somd_boresch}, - "multiple_distance": {"somd": self._somd_multiple_distance}, + "multiple_distance": { + "gromacs": self._gromacs_multiple_distance, + "somd": self._somd_multiple_distance, + }, } engine = engine.strip().lower() diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py index 4c83b1c0f..f95020786 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py @@ -312,14 +312,30 @@ def _write_system(self, system, coord_file=None, topol_file=None, ref_file=None) ) # Check that the perturbation type is supported.. - if self._protocol.getPerturbationType() != "full": + if self._protocol.getPerturbationType() not in [ + "full", + "release_restraint", + ]: msg = ( "'BioSimSpace.Process.Gromacs' currently only supports the 'full' " - "perturbation type. Please use 'BioSimSpace.Process.Somd' " + " and 'release_restraint' perturbation type. Please use 'BioSimSpace.Process.Somd' " "for multistep perturbation types." ) raise NotImplementedError(msg) + # Check that we have multiple distance restraints if the perturbation type is 'release_restraint'. + if self._protocol.getPerturbationType() == "release_restraint": + if not self._restraint: + raise ValueError( + "'BioSimSpace.Process.Gromacs' requires a " + "restraint for the 'release_restraint' perturbation type!" + ) + if self._restraint._restraint_type != "multiple_distance": + raise ValueError( + "'BioSimSpace.Process.Gromacs' requires a " + "multiple distance restraint for the 'release_restraint' perturbation type!" + ) + else: # Check for perturbable molecules and convert to the chosen end state. system = self._checkPerturbable(system) @@ -381,7 +397,12 @@ def _write_system(self, system, coord_file=None, topol_file=None, ref_file=None) if self._restraint: with open(topol_file, "a") as f: f.write("\n") - f.write(self._restraint.toString(engine="GROMACS")) + f.write( + self._restraint.toString( + engine="GROMACS", + perturbation_type=self._protocol._perturbation_type, + ) + ) def _generate_config(self): """Generate GROMACS configuration file strings.""" @@ -453,10 +474,17 @@ def _generate_config(self): # Set the configuration. if not isinstance(self._protocol, _Protocol.Dummy): config = _Protocol.ConfigFactory(self._system, self._protocol) + pert_type = ( + self._protocol._perturbation_type + if isinstance(self._protocol, _Protocol._FreeEnergyMixin) + else None + ) self.addToConfig( config.generateGromacsConfig( extra_options={**config_options, **self._extra_options}, extra_lines=self._extra_lines, + restraint=self._restraint, + perturbation_type=pert_type, ) ) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py index 5ae1b3b49..300c6af8b 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Protocol/_config.py @@ -8,6 +8,9 @@ from .. import _gmx_version from .._Exceptions import IncompatibleError as _IncompatibleError from ..Align._squash import _amber_mask_from_indices, _squashed_atom_mapping +from ..FreeEnergy._restraint import Restraint as _Restraint +from ..Units.Energy import kj_per_mol as _kj_per_mol +from ..Units.Length import nanometer as _nanometer class ConfigFactory: @@ -390,7 +393,13 @@ def generateAmberConfig(self, extra_options=None, extra_lines=None): return total_lines - def generateGromacsConfig(self, extra_options=None, extra_lines=None): + def generateGromacsConfig( + self, + extra_options=None, + extra_lines=None, + restraint=None, + perturbation_type=None, + ): """ Outputs the current protocol in a format compatible with GROMACS. @@ -403,12 +412,29 @@ def generateGromacsConfig(self, extra_options=None, extra_lines=None): extra_lines : list A list of extra lines to be put at the end of the script. + restraint : :class:`Restraint ` + Restraint object that contains information for ABFE calculations. + + perturbation_type : str + The type of perturbation to perform. Options are: + "full" : A full perturbation of all terms (default option). + "release_restraint" : Used with multiple distance restraints to release all + restraints other than the "permanent" one when the ligand + is fully decoupled. Note that lambda = 0.0 is the fully + released state, and lambda = 1.0 is the fully restrained + state (i.e. 0.0 -> value). The non-permanent restraints + can be scaled with bonded-lambda. + Returns ------- config : list The generated config list in a GROMACS format. """ + if perturbation_type == "release_restraint" and restraint is None: + raise ValueError( + "Cannot use perturbation_type='release_restraint' without a Restraint object." + ) extra_options = extra_options if extra_options is not None else {} extra_lines = extra_lines if extra_lines is not None else [] @@ -598,6 +624,20 @@ def tranform(charge, LJ): "nstdhdl" ] = self._report_interval # Write gradients every report_interval steps. + # Handle the combination of multiple distance restraints and perturbation type + # of "release_restraint". In this case, the force constant of the "permanent" + # restraint needs to be written to the mdp file. + if perturbation_type == "release_restraint": + if not isinstance(restraint, _Restraint): + raise ValueError( + "Cannot use perturbation_type='release_restraint' without a Restraint object." + ) + # Get force constant in correct units. + force_constant = restraint._restraint_dict[ + "permanent_distance_restraint" + ]["kr"] / (_kj_per_mol / _nanometer**2) + protocol_dict["disre-fc"] = force_constant + # Put everything together in a line-by-line format. total_dict = {**protocol_dict, **extra_options} total_lines = [ diff --git a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py index e442dce37..1a3e99e08 100644 --- a/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py +++ b/tests/Sandpit/Exscientia/FreeEnergy/test_restraint.py @@ -384,10 +384,101 @@ def test_analytical_correction_mdr(mdr_restraint): dG = mdr_restraint.getCorrection(method="analytical") / kcal_per_mol -def test_gromacs_output_mdr(mdr_restraint): - """Test the Gromacs output.""" - with pytest.raises(NotImplementedError): - gromacs_string = mdr_restraint.toString(engine="Gromacs") +class TestGromacsOutputMDR: + @staticmethod + @pytest.fixture(scope="class") + def getRestraintGromacsFull(mdr_restraint): + """The form of the restraints when the perturbation type is `full`.""" + mdr_str = mdr_restraint.toString( + engine="GROMACS", perturbation_type="full" + ).split("\n") + return mdr_str + + @staticmethod + @pytest.fixture(scope="class") + def getRestraintGromacsRelease(mdr_restraint): + """The form of the restraints when the perturbation type is `release_restraint`.""" + mdr_str = mdr_restraint.toString( + engine="GROMACS", perturbation_type="release_restraint" + ).split("\n") + return mdr_str + + def test_n_lines(self, getRestraintGromacsFull, getRestraintGromacsRelease): + """Test that the correct number of lines have been written.""" + assert len(getRestraintGromacsFull) == 6 + assert len(getRestraintGromacsRelease) == 8 + + def test_section_names(self, getRestraintGromacsFull, getRestraintGromacsRelease): + """Check that the section headings are correct.""" + assert ( + getRestraintGromacsFull[0].strip() + == getRestraintGromacsRelease[0].strip() + == "[ intermolecular_interactions ]" + ) + assert ( + getRestraintGromacsFull[1].strip() + == getRestraintGromacsRelease[1].strip() + == "[ bonds ]" + ) + assert getRestraintGromacsRelease[5].strip() == "[ distance_restraints ]" + + def test_comments(self, getRestraintGromacsFull, getRestraintGromacsRelease): + """Check that the value label comments are as expected.""" + bond_restr_str = "; ai aj type lowA up1A up2A kdrA lowB up1B up2B kdrB" + distance_restr_str = "; ai aj type index type' low up1 up2 fac" + assert ( + getRestraintGromacsFull[2].strip() + == getRestraintGromacsRelease[2].strip() + == bond_restr_str + ) + assert getRestraintGromacsRelease[6].strip() == distance_restr_str + + def test_at_nums(self, getRestraintGromacsFull, getRestraintGromacsRelease): + """Check that the anchor point atom numbers are correct.""" + full_restr_lines = getRestraintGromacsFull[3:] + release_restr_lines = getRestraintGromacsRelease[3:5] + [ + getRestraintGromacsRelease[7] + ] + for lines in [full_restr_lines, release_restr_lines]: + ais = [x.split()[0] for x in lines] + ajs = [x.split()[1] for x in lines] + assert ais == ["1", "2", "3"] + assert ajs == ["1496", "1497", "1498"] + + def test_restraint_vals_bond_restraint( + self, getRestraintGromacsFull, getRestraintGromacsRelease + ): + """Check that the bond restraint values are correct.""" + full_restr_lines = getRestraintGromacsFull[3:] + release_restr_lines = getRestraintGromacsRelease[3:5] + + # Strip off the atom numbers. + full_restr_lines = [x.split()[2:] for x in full_restr_lines] + release_restr_lines = [x.split()[2:] for x in release_restr_lines] + + # Check that all lines are the same. + assert all([x == full_restr_lines[0] for x in full_restr_lines]) + assert all([x == release_restr_lines[0] for x in release_restr_lines]) + assert full_restr_lines[0] == release_restr_lines[0] + + # Pick one line (equivalence of all lines checked above) and check the values. + line = [x.strip() for x in full_restr_lines[0]] + assert line == [ + "10", + "0.200", + "0.400", + "100.000", + "0.00", + "0.200", + "0.400", + "100.000", + "4184.00", + ] + + def test_restraint_vals_distance_restraint(self, getRestraintGromacsRelease): + """Check that the distance restraint values are correct.""" + restr_vals = [x.strip() for x in getRestraintGromacsRelease[7].split()[2:]] + assert restr_vals == ["2", "0", "2", "0.200", "0.400", "100.000", "1.0"] class TestSomdOutputMDR: diff --git a/tests/Sandpit/Exscientia/Protocol/test_config.py b/tests/Sandpit/Exscientia/Protocol/test_config.py index 87a76896e..de0409e76 100644 --- a/tests/Sandpit/Exscientia/Protocol/test_config.py +++ b/tests/Sandpit/Exscientia/Protocol/test_config.py @@ -205,6 +205,61 @@ def test_staged_fep_df(self, system): assert "init-lambda-state = 6" in mdp_text +@pytest.mark.skipif( + has_antechamber is False or has_openff is False, + reason="Requires ambertools/antechamber and openff to be installed", +) +@pytest.fixture(scope="module") +def system_and_mdr_restraint(): + # Benzene. + m = BSS.Parameters.openff_unconstrained_2_0_0("c1ccccc1").getMolecule() + m._sire_object = m._sire_object.edit().rename("LIG").molecule().commit() + + # Assign atoms for restraint + atom_1 = m.getAtoms()[0] + atom_2 = m.getAtoms()[1] + atom_3 = m.getAtoms()[2] + atom_4 = m.getAtoms()[3] + atom_5 = m.getAtoms()[4] + atom_6 = m.getAtoms()[5] + + mol = decouple(m) + system = mol.toSystem() + + # Create random restraint dictionary + restraint_dict = { + "distance_restraints": [ + { + "l1": atom_1, + "r1": atom_2, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + { + "l1": atom_3, + "r1": atom_4, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + ], + "permanent_distance_restraint": { + "l1": atom_5, + "r1": atom_6, + "r0": 3 * angstrom, + "kr": 10 * kcal_per_mol / angstrom**2, + "r_fb": 1 * angstrom, + }, + } + + restraint = Restraint( + system, restraint_dict, 298 * kelvin, restraint_type="multiple_distance" + ) + + return system, restraint + + @pytest.mark.skipif( has_antechamber is False or has_openff is False, reason="Requires ambertools/antechamber and openff to be installed", @@ -265,6 +320,30 @@ def test_annihilate_vdw2q(self, system): assert "couple-lambda1 = q" in mdp_text assert "couple-intramol = no" in mdp_text + @pytest.mark.skipif( + has_gromacs is False, reason="Requires GROMACS to be installed." + ) + def test_mdr_force_constant(self, system, system_and_mdr_restraint): + """ + Check that when the restraint type == 'release_restraint', the + force constant of the permanent distance restraint (not affected by + restraint-lambda) is written to the MDP file. + """ + system, restraint = system_and_mdr_restraint + protocol = FreeEnergy( + perturbation_type="release_restraint", + ) + + freenrg = BSS.FreeEnergy.AlchemicalFreeEnergy( + system, + protocol, + engine="GROMACS", + restraint=restraint, + ) + with open(f"{freenrg._work_dir}/lambda_6/gromacs.mdp", "r") as f: + mdp_text = f.read() + assert "disre-fc = 4184.0" in mdp_text + @pytest.mark.skipif( has_gromacs is False, reason="Requires GROMACS to be installed." ) @@ -343,56 +422,6 @@ def system_and_boresch_restraint(): return system, restraint - @staticmethod - @pytest.fixture(scope="class") - def system_and_mdr_restraint(): - # Benzene. - m = BSS.Parameters.openff_unconstrained_2_0_0("c1ccccc1").getMolecule() - - # Assign atoms for restraint - atom_1 = m.getAtoms()[0] - atom_2 = m.getAtoms()[1] - atom_3 = m.getAtoms()[2] - atom_4 = m.getAtoms()[3] - atom_5 = m.getAtoms()[4] - atom_6 = m.getAtoms()[5] - - mol = decouple(m) - system = mol.toSystem() - - # Create random restraint dictionary - restraint_dict = { - "distance_restraints": [ - { - "l1": atom_1, - "r1": atom_2, - "r0": 3 * angstrom, - "kr": 10 * kcal_per_mol / angstrom**2, - "r_fb": 1 * angstrom, - }, - { - "l1": atom_3, - "r1": atom_4, - "r0": 3 * angstrom, - "kr": 10 * kcal_per_mol / angstrom**2, - "r_fb": 1 * angstrom, - }, - ], - "permanent_distance_restraint": { - "l1": atom_5, - "r1": atom_6, - "r0": 3 * angstrom, - "kr": 10 * kcal_per_mol / angstrom**2, - "r_fb": 1 * angstrom, - }, - } - - restraint = Restraint( - system, restraint_dict, 298 * kelvin, restraint_type="multiple_distance" - ) - - return system, restraint - def test_turn_on_restraint_boresch(self, system_and_boresch_restraint): """Test for turning on multiple distance restraints""" system, restraint = system_and_boresch_restraint From e33539f47a3437898490ed10ff6214ccf7a5ec9d Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 14 Nov 2023 16:57:34 +0000 Subject: [PATCH 16/19] Require that gromacs process is passed free energy protocol if restraint is supplied --- python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py | 8 +++++++- tests/Sandpit/Exscientia/Process/test_gromacs.py | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py b/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py index f95020786..93c86f6d4 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Process/_gromacs.py @@ -198,6 +198,12 @@ def __init__( raise ValueError("'show_errors' must be of type 'bool'.") self._show_errors = show_errors + if restraint and not isinstance(protocol, _Protocol._FreeEnergyMixin): + raise ValueError( + "'BioSimSpace.Process.Gromacs' requires a " + "FreeEnergy protocol for running with a restraint!" + ) + # Initialise the energy dictionary and title header. self._energy_dict = ( dict() @@ -400,7 +406,7 @@ def _write_system(self, system, coord_file=None, topol_file=None, ref_file=None) f.write( self._restraint.toString( engine="GROMACS", - perturbation_type=self._protocol._perturbation_type, + perturbation_type=self._protocol.getPerturbationType(), ) ) diff --git a/tests/Sandpit/Exscientia/Process/test_gromacs.py b/tests/Sandpit/Exscientia/Process/test_gromacs.py index 7d4e57c49..ec3b9b7f7 100644 --- a/tests/Sandpit/Exscientia/Process/test_gromacs.py +++ b/tests/Sandpit/Exscientia/Process/test_gromacs.py @@ -210,7 +210,9 @@ def test_write_restraint(system, tmp_path): ) # Create a short production protocol. - protocol = BSS.Protocol.Production(runtime=BSS.Types.Time(0.0001, "nanoseconds")) + protocol = BSS.Protocol.FreeEnergy( + runtime=BSS.Types.Time(0.0001, "nanoseconds"), perturbation_type="full" + ) # Run the process and check that it finishes without error. run_process(system, protocol, restraint=restraint, work_dir=str(tmp_path)) From a8b56d0776696e1337acc559d6367599966d7c8d Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 14 Nov 2023 16:57:55 +0000 Subject: [PATCH 17/19] Update changelog with MDR implementation --- doc/source/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 4e4ce5cc2..4598e7e65 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -19,6 +19,7 @@ Devel * Remove redundant ``BioSimSpace.Types.Type.__ne__`` operator (`#201 `__). * Minor internal updates due to Sire API fixes (`#203 `__). * Fix bug in the BSS Boresch restraint search code (`@fjclark `_) (`#204 `__). +* Added SOMD and GROMACS support for multiple distance restraints for ABFE calculations (`#178 `__). `2023.4.0 `_ - Oct 13 2023 ------------------------------------------------------------------------------------------------- From 6655bfd7d8342948572b5266fff3cddc881efbc6 Mon Sep 17 00:00:00 2001 From: finlayclark Date: Sat, 18 Nov 2023 13:01:34 +0000 Subject: [PATCH 18/19] Add warning about SOMD instabilities with decoupled ligands [ci skip] --- .../Exscientia/FreeEnergy/_alchemical_free_energy.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py index 731e269da..95692bdc5 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py +++ b/python/BioSimSpace/Sandpit/Exscientia/FreeEnergy/_alchemical_free_energy.py @@ -365,6 +365,15 @@ def __init__( # Ensure that the system is compatible with the restraint restraint.system = self._system + # Warn the user about instabilities with multiple distance restraints in SOMD. + if restraint._restraint_type == "multiple_distance" and engine == "SOMD": + _warnings.warn( + "SOMD simulations with some non-interacting ligands can be unstable. This " + "affects some systems with multiple distance restraints during the release " + "restraint state. If you experience problems, consider using multiple distance " + "restraints with GROMACS instead." + ) + self._restraint = restraint # Create fake instance methods for 'analyse' and 'difference'. These From 8025d4a10f09c655342699847ccd012b870f35fd Mon Sep 17 00:00:00 2001 From: finlayclark Date: Tue, 21 Nov 2023 11:51:04 +0000 Subject: [PATCH 19/19] Remove duplicate mention of scikit-learn in dependencies [ci skip] --- .github/workflows/Sandpit_exs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/Sandpit_exs.yml b/.github/workflows/Sandpit_exs.yml index 2ae765da0..aa4856674 100644 --- a/.github/workflows/Sandpit_exs.yml +++ b/.github/workflows/Sandpit_exs.yml @@ -35,7 +35,7 @@ jobs: - name: Install dependency run: | - mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.3.0" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm scikit-learn scikit-learn + mamba install -c conda-forge -c openbiosim/label/main biosimspace python=3.10 ambertools gromacs "sire=2023.3.0" "alchemlyb>=2.1" pytest openff-interchange pint=0.21 rdkit "jaxlib>0.3.7" tqdm scikit-learn python -m pip install git+https://github.com/Exscientia/MDRestraintsGenerator.git # For the testing of BSS.FreeEnergy.AlchemicalFreeEnergy.analysis python -m pip install https://github.com/alchemistry/alchemtest/archive/master.zip