-
Notifications
You must be signed in to change notification settings - Fork 2
/
ligand_adder.py
executable file
·408 lines (342 loc) · 17.2 KB
/
ligand_adder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
#!/usr/bin/env python
"""
Adds ligands to a file
"""
import numpy as np
import os
import sys
import random
import subprocess
from beak.msm import utils
from beak.analyze import get_min_distance
from configparser import ConfigParser
from Dabble import DabbleBuilder, VmdSilencer
from tempfile import TemporaryDirectory
from vmd import atomsel, molecule, vmdnumpy
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# HELPER CLASS #
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class LigandAdder(object):
"""
Adds ligands to a molecule with one ligand set.
Args:
inputfile (str): Path to MAE file with one ligand
configfile (str): Path to config file for this sampling run
"""
def __init__(self, inputfile, configfile):
self.molid = molecule.load("mae", inputfile)
inputname = os.path.split(inputfile)[1]
self.repindex = int(inputname.split("_")[0])
self.sampled_clusters = [int(inputname.split("_")[2].replace(".mae",
""))]
config = ConfigParser(interpolation=None)
config.read(configfile)
self.config = config
self.gen = config.getint("production", "generation")
self.dabbleopts = config["dabble"]
self.outdir = os.path.join(os.path.abspath(self.config["system"]["rootdir"]),
"systems",
str(self.gen+1))
# Set up relevant variables
self.msm = utils.load(os.path.join(config["system"]["rootdir"],
"production",
str(self.gen),
"mmsm_G%d.pkl" % self.gen))
self.clusters = utils.load(os.path.join(config["system"]["rootdir"],
"production",
str(self.gen),
"testing.mcluster.pkl"))
inc_equil = config.getboolean("model", "include_equilibration",
fallback=True)
self.prodfiles = utils.get_prodfiles(generation=self.gen,
rootdir=config["system"]["rootdir"],
new=False,
equilibration=inc_equil)
criteria = config["model"].get("criteria", "hub_scores")
if criteria == "hub_scores":
self.weights = utils.load(os.path.join(config["system"]["rootdir"],
"production",
str(self.gen),
"mmsm_scores.pkl"))
elif criteria == "populations":
self.weights = self.msm.populations_
elif criteria == "counts":
def _count_one(self, num):
return np.sum([len(np.where(c == num)[0]) for c in self.clusters])
self.weights = sorted(range(self.config.getint("model", "macrostates")),
key=lambda x: _count_one(self, x))
self.bulk_clusters = np.argsort(self.weights)[-5:]
#===========================================================================
def go(self):
"""
Actually runs
"""
# Set the user field of actually placed ligand to nonzero
# this is the prettiest way to do ligand placement, but the user
# field doesn't save. We temporarily communicate this with zero cooods
atomsel("all").set("user", 1.0)
atomsel("same fragment as resname %s and (x=0. and y=0. and z=0.)"
% self.config["system"]["ligands"].replace(",", " "),
molid=self.molid).set("user", 0.0)
# Add ligands and Dabble
with VmdSilencer(output=os.path.join(self.config["system"]["rootdir"],
"systems",
str(self.gen+1),
"generator.log")):
self.add_ligands_and_dabble()
# Start the simulation
self.start_jobs()
# Check for output files from other adders. If we're all done,
# increment generation
for idx in range(1, self.config.getint("model", "samplers")+1):
if not os.path.isfile(os.path.join(self.outdir, "%d.prmtop" % idx)):
return
self.config["production"]["generation"] = str(self.gen+1)
with open(os.environ.get("CONFIG"), 'w') as configfile:
self.config.write(configfile)
#===========================================================================
def start_jobs(self):
"""
Starts all the production jobs
"""
inpdir = os.path.join(self.config["system"]["rootdir"],
"systems", str(self.gen+1))
command = [
os.path.join(os.environ["PI_HOME"], "software", "submit_new",
"submit_new"),
"-p", os.path.join(inpdir, "%d.prmtop" % self.repindex),
"-c", os.path.join(inpdir, "%d.inpcrd" % self.repindex),
"-d", self.config["system"]["rootdir"],
"-s", "80",
"-t", self.config["production"]["runlength"],
"-q", self.config["production"]["queue"],
"-j", "%s-G%d-r%d" % (self.config["system"]["jobname"],
self.gen+1, self.repindex),
"-r", str(self.gen+1),
"--%s" % self.config["production"]["pressure"],
"--msm", str(self.repindex),
"--gpus", "1",
]
if self.config.getboolean("production", "hmr", fallback=True):
command.append("--hmr")
# Could put above with fallback but this is less likely to take down
# all my jobs lol
if self.config.get("production", "extra_options", fallback=None):
command.extend(["--extra-production-options",
self.config["production"]["extra_options"]])
subprocess.check_call(command)
#==========================================================================
def _add_one_ligand(self, clust, permissive=False):
"""
Adds a single ligand to this frame using the greedy criteria:
- lowest hub score out of candidate hub score array
- not too close to existing sampled ligand
For now, try 10 times to find one, or give up and go random
Args:
molid (int): VMD molecule ID we're adding ligands too
clust (int): Cluster to sample
permissive (bool): Relaxes criteria about ligand being inside
the box
Returns:
(bool) Whether a ligand was successfully added
"""
# Get a frame representing this cluster
nligs = self.config.getint("system", "num_ligands")
fileindex, frameindex, ligidx = utils.get_frame(clust,
dataset=self.clusters,
nligs=nligs)
framid = utils.load_trajectory(filename=self.prodfiles[fileindex],
config=self.config,
frame=frameindex)
# Get a selection for relevant ligand from this frame
lignames = self.config["system"]["ligands"].split(',')
ligids = sorted(set(atomsel("resname %s"
% " ".join(lignames),
molid=framid).get("residue")))
ligsel = atomsel("same fragment as residue %d"
% ligids[ligidx], framid)
# Check ligand isn't out of the box, which can happen sometimes
# with solvent ligands. This can result in some weird dabbled
# systems that later crash
box = [float(x)/2. for x in self.dabbleopts["dimensions"].split(',')]
#pylint: disable=too-many-boolean-expressions
if not permissive and (\
max(ligsel.get("x")) > box[0] or \
min(ligsel.get("x")) < -box[0] or \
max(ligsel.get("y")) > box[1] or \
min(ligsel.get("y")) < -box[1] or \
max(ligsel.get("z")) > box[2] or \
min(ligsel.get("z")) < -box[2]):
print(" Rejected out of box")
return False
#pylint: enable=too-many-boolean-expressions
# Check ligand distance from protein so no overlap happens
protsel = atomsel("protein and noh and not same fragment as "
"resname %s" % " ".join(lignames), self.molid)
mind = get_min_distance(ligsel, protsel)
if mind < float(self.dabbleopts["minprotdist"]):
print(" Rejected too close to protein: %f" % mind)
return False
# Check ligand distance to already chosen ligands in molid
ligfrags = set(atomsel("resname %s and user 1.0"
% " ".join(lignames),
molid=self.molid).get("fragment"))
for frag in ligfrags:
checksel = atomsel("fragment %s" % frag, self.molid)
mind = get_min_distance(checksel, ligsel)
if mind < float(self.dabbleopts["minligdist"]):
print(" Rejected too close to ligands: %f" % mind)
return False
# Pick an unset ligand to move
selres = min(atomsel("resname %s and not user 1.0"
% " ".join(lignames),
self.molid).get("residue"))
oldidxs = atomsel("same fragment as residue %d"
% selres, self.molid).get("index")
newidxs = ligsel.get("index")
if len(oldidxs) != len(newidxs):
raise ValueError("AHHH LIGANDS")
# Get desired ligand coordinates with a view into array and copy
subsetm = vmdnumpy.timestep(self.molid, 0)[min(oldidxs):max(oldidxs)+1]
subsetf = vmdnumpy.timestep(framid, 0)[min(newidxs):max(newidxs)+1]
np.copyto(src=subsetf, dst=subsetm)
# Set the user field of this ligand to indicate it's selected
atomsel("same fragment as residue %d"
% selres, self.molid).set("user", 1.0)
# Do some cleanup and add this ligand to the sampled list
molecule.delete(framid)
self.sampled_clusters.append(clust)
return True
#==========================================================================
def _get_random_cluster(self):
"""
Returns num random clusters to resample, with probability
inversely proportional to representation
Args:
weights (list of float): Weight to apply to each cluster.
Will be normalized and turned to probabiilities
msm (MarkovStateModel): MSM to get cluster labels from
"""
probs = [1./h for h in self.weights]
probs = [_/sum(probs) for _ in probs]
probsel = self.msm.inverse_transform(np.random.choice(range(len(probs)),
size=1,
p=probs))
return probsel[0][0]
#==========================================================================
def _put_ligand_in_solvent(self):
"""
If the greedy algorithm fails, find a frame from the solvent
cluster that's not too far away and just put the ligand there.
Tries 50 times normally, then relaxes restrictions about where
the ligand can end up. This is more likely to result in a system
that crashes but at least it's built in the first place.
Args:
molid (int): Molecule ID to add a ligand to
bulk_clusters (list of int): Clusters to consider adding
Returns:
(int): Cluster that was selected
"""
gotagoodframe = False
counter = 0
while not gotagoodframe:
solclust = int(random.choice(self.bulk_clusters))
gotagoodframe = self._add_one_ligand(clust=solclust,
permissive=counter > 50)
if counter > 50:
print("Permissively trying %d" % solclust)
sys.stdout.flush()
counter += 1
self.sampled_clusters.append(solclust)
return solclust
#==========================================================================
def _greedily_add_ligand(self):
"""
Adds a buncha ligands to each molid using the "greedy" criteria:
- low hub score (probabilistically chosen)
- not too close to an existing sampled ligand
If we try more than 10 different clusters and can't find one that
matches, just put the ligand at some location in the solvent, as
determined by a random frame in the bulk cluster
"""
done = False
tried = list(self.sampled_clusters)
while not done:
# If we've tried more than 20 different clusters, take
# a bulk one
if len(tried) > 20:
break
# Get a new unsampled cluster, except allow the bulk
# cluster to be chosen multiple times
clust = self._get_random_cluster()
if clust not in self.bulk_clusters and clust in tried:
# TODO: permissiveness allow double picking of clusters?
# May usually fail though, more expensive?
continue
done = self._add_one_ligand(clust, permissive=False)
if not done:
tried.append(clust)
sys.stdout.flush()
# If greediness failed, just place other ligand from bulk
if not done:
print("Going for solvent")
sys.stdout.flush()
self._put_ligand_in_solvent()
sys.stdout.flush()
#==========================================================================
def add_ligands_and_dabble(self):
"""
Adds remaining ligands to file.
Saves all molecules to a temporary file and invokes the Dabble
Builder API. Names them according to the replicate id.
Args:
molid (int): Molecule ID to save
"""
# Add remaining ligands
for _ in range(self.config.getint("system", "num_ligands")-1): # First already set
self._greedily_add_ligand()
# Print out resampler info, for easy backtracking
print("SAMPLER %d.psf: Clusters: %s" % (self.repindex,
self.sampled_clusters))
sys.stdout.flush()
# Write temporary file with correct ligand positions
filen = os.path.join(self.outdir, "%d_inp.mae" % self.repindex)
atomsel("all", self.molid).write('mae', filen)
with TemporaryDirectory(dir=os.environ.get("SCRATCH"),
prefix="dabble") as tdir:
if len(self.dabbleopts.get("topologies", "").strip()):
topos = self.dabbleopts["topologies"].split(',')
else:
topos = None
if len(self.dabbleopts.get("parameters", "").strip()):
params = self.dabbleopts["parameters"].split(',')
else:
params = None
builder = DabbleBuilder(solute_filename=filen,
output_filename=os.path.join(self.outdir,
"%d.prmtop" % self.repindex),
user_x=float(self.dabbleopts["dimensions"].split(',')[0]),
user_y=float(self.dabbleopts["dimensions"].split(',')[1]),
user_z=float(self.dabbleopts["dimensions"].split(',')[2]),
membrane_system=self.dabbleopts.get("solvent", "DEFAULT"),
forcefield=self.dabbleopts["forcefield"],
extra_topos=topos,
extra_params=params,
exclude_sel=self.dabbleopts.get("excludesel"),
hmassrepartition=self.config.getboolean("production", "hmr", fallback=True),
overwrite=True,
tmp_dir=tdir
)
builder.write()
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# ACTUAL MAIN AREA #
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
if __name__ == "__main__":
# Load all relevant files according to this generation
if not os.environ.get("INPUT"):
raise ValueError("No input file in $INPUT")
if not os.environ.get("CONFIG"):
raise ValueError("No config file in $CONFIG")
adder = LigandAdder(inputfile=os.environ.get("INPUT"),
configfile=os.environ.get("CONFIG"))
adder.go()