forked from openmm/openmmforcefields
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_amber.py
1578 lines (1421 loc) · 65.3 KB
/
convert_amber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# AMBER --> OpenMM force-field conversion script
# Author: Rafal P. Wiewiora, ChoderaLab
from __future__ import print_function, division
from io import StringIO
import parmed
from parmed.utils.six import iteritems
from parmed.utils.six.moves import StringIO, zip
import openmm.app as app
import openmm.unit as u
import os
import sys
import re
import tempfile
import yaml
from distutils.spawn import find_executable
import hashlib
from collections import OrderedDict
import glob
import argparse
from lxml import etree as et
import csv
import logging
import warnings
import xml.etree.ElementTree as etree
from copy import deepcopy
from parmed.exceptions import ParameterWarning
warnings.filterwarnings('error', category=ParameterWarning)
_loadoffre = re.compile(r'loadoff (\S*)', re.I)
_sourcere = re.compile(r'source (\S*)', re.I)
# check for AMBERHOME, find from tleap location if not set, exception if can't
if os.getenv('AMBERHOME'):
AMBERHOME = os.getenv('AMBERHOME')
else:
if not find_executable('tleap'):
raise Exception('AMBERHOME not set and tleap not available from PATH')
tleap_path = find_executable('tleap')
AMBERHOME = os.path.split(tleap_path)[0]
AMBERHOME = os.path.join(AMBERHOME, '../')
parmed.amber.AMBERHOME = AMBERHOME
# set global defaults for verbose and log
verbose = False
no_log = False
# set files that are ignored in leaprc's
# solvents and ions converted separately; leaprc.ff10 calls phosphoaa10.lib
# which does not exist anymore, LeAP skips it on error so we do too
ignore = {'solvents.lib', 'atomic_ions.lib', 'ions94.lib', 'ions91.lib',
'phosphoaa10.lib'}
# define NEARLYZERO to replace numerical comparisons to zero
NEARLYZERO = 1e-10
# set beta
temperature = 300.0 * u.kelvin
kB = u.BOLTZMANN_CONSTANT_kB * u.AVOGADRO_CONSTANT_NA
kT = kB * temperature
beta = 1.0/kT
class LeapException(Exception):
def __init__(self, leaprc_filename):
msg = 'Something went wrong in processing this LEaP input file:\n'
msg += '\n'
infile = open(leaprc_filename, 'rt')
contents = infile.read()
msg += contents
msg += '\n'
super(LeapException, self).__init__(msg)
def main():
global verbose
global no_log
global logger
# argparse
parser = argparse.ArgumentParser(description='AMBER --> OpenMM forcefield '
'conversion script')
parser.add_argument('--input', '-i', default='master.yaml',
help='path of the input file. Default: "master.yaml"')
parser.add_argument('--input-format', '-if', default='yaml',
help='format of the input file: "yaml" or "leaprc". Default: "yaml"')
parser.add_argument('--output-dir', '-od', help='path of the output directory. '
'Default: "ffxml/" for yaml, "./" for leaprc')
parser.add_argument('--verbose', '-v', action='store_true',
help='turns verbosity on')
parser.add_argument('--log', action='store', dest='log_filename', default=None,
help='log energies for tests to specified CSV file')
parser.add_argument('--protein-test', action='store_true',
help='validate resulting XML through protein tests')
parser.add_argument('--nucleic-test', action='store_true',
help='validate resulting XML through nucleic acid tests')
parser.add_argument('--protein-ua-test', action='store_true',
help='validate resulting XML through united-atom protein tests')
parser.add_argument('--phospho-protein-test', action='store_true',
help='validate resulting XML through phosphorylated protein tests')
parser.add_argument('--gaff-test', action='store_true',
help='validate resulting XML through small-molecule (GAFF) test')
parser.add_argument('--lipids-test', action='store_true',
help='validate resulting XML through lipids tests')
args = parser.parse_args()
verbose = args.verbose
if args.log_filename:
logger = Logger(args.log_filename) # log to file
else:
logger = Logger() # be silent
# input is either a YAML or a leaprc - default is leaprc
# output directory hardcoded here for ffxml/
if args.input_format == 'yaml':
if args.output_dir is None:
convert_yaml(args.input, ffxml_dir='ffxml/')
else:
convert_yaml(args.input, ffxml_dir=args.output_dir)
# if leaprc converted - output to the same dir
elif args.input_format == 'leaprc':
if args.output_dir is None:
ffxml_name = convert_leaprc(args.input, ffxml_dir='./')
else:
ffxml_name = convert_leaprc(args.input, ffxml_dir=args.output_dir)
if args.protein_test:
validate_protein(ffxml_name, args.input)
if args.nucleic_test:
validate_nucleic(ffxml_name, args.input)
if args.protein_ua_test:
validate_protein(ffxml_name, args.input, united_atom=True)
if args.phospho_protein_test:
validate_phospho_protein(ffxml_name, args.input)
if args.gaff_test:
validate_gaff(ffxml_name, args.input)
if args.lipids_test:
validate_lipids(ffxml_name, args.input)
else:
sys.exit('Wrong input_format chosen.')
logger.close()
def read_lines(filename):
"""
Read lines from file, stripping comments and newlines.
"""
with open(filename, 'rt') as f:
lines = [ line if '#' not in line else line[:line.index('#')] for line in f.readlines() ]
return lines
def write_file(file, contents):
"""
Write text to file.
Parameters
----------
filename : str
The file to write to
contents : str
Text contents to be written to file
"""
if type(file) == str:
outfile = open(file, 'w')
else:
outfile = os.fdopen(file, 'w')
outfile.write(contents)
outfile.close()
def convert_leaprc(files, split_filename=False, ffxml_dir='./', ignore=ignore,
provenance=None, write_unused=False, override_level=None, filter_warnings='error', is_glycam=False):
if verbose: print('\nConverting %s to ffxml...' % files)
# allow for multiple source files - further code assuming list is passed
if not isinstance(files, list):
files = [files]
basename = ''
for f in files:
f_basename = os.path.basename(f)
if split_filename:
f_basename = f_basename.split('.')[1:]
f_basename = '.'.join(f_basename)
if not basename:
basename = f_basename
else:
basename += '_'
basename += f_basename
ffxml_name = os.path.join(ffxml_dir, (basename + '.xml'))
if not os.path.exists(ffxml_dir):
os.mkdir(ffxml_dir)
if verbose: print('Preprocessing the leaprc for %s...' % basename)
# do source processing
new_files = []
for fil in files:
lines = read_lines(fil)
for line in lines:
if _sourcere.findall(line):
replace_leaprc = _sourcere.findall(line)[0]
replace_leaprc_path = os.path.join(os.path.join(AMBERHOME,
'dat/leap/cmd', replace_leaprc))
new_files.append(replace_leaprc_path)
new_files.append(fil)
# now do ignore processing and join multiple leaprc's
files = new_files
new_lines = []
for fil in files:
lines = read_lines(fil)
fil_new_lines = []
for line in lines:
if (ignore is not None and _loadoffre.findall(line) and
_loadoffre.findall(line)[0] in ignore):
continue
fil_new_lines += line
new_lines += fil_new_lines
leaprc = StringIO(''.join(new_lines))
if verbose: print('Converting to ffxml %s...' % ffxml_name)
params = parmed.amber.AmberParameterSet.from_leaprc(leaprc)
params = parmed.openmm.OpenMMParameterSet.from_parameterset(params, remediate_residues=(not write_unused))
if override_level:
for name, residue in params.residues.items():
residue.override_level = override_level
if is_glycam:
skip_duplicates = False
else:
skip_duplicates = True
if filter_warnings != 'error':
with warnings.catch_warnings():
warnings.filterwarnings(filter_warnings, category=ParameterWarning)
params.write(ffxml_name, provenance=provenance, write_unused=write_unused, improper_dihedrals_ordering='amber', skip_duplicates=skip_duplicates, is_glycam=is_glycam)
else:
params.write(ffxml_name, provenance=provenance, write_unused=write_unused, improper_dihedrals_ordering='amber', skip_duplicates=skip_duplicates, is_glycam=is_glycam)
if verbose: print('%s successfully written!' % ffxml_name)
return ffxml_name
def convert_gaff(files, ffxml_basename='', split_filename=False, ffxml_dir='./', ignore=ignore,
provenance=None, write_unused=False, filter_warnings='error'):
if verbose: print('\nConverting %s to ffxml...' % files)
# allow for multiple source files - further code assuming list is passed
if not isinstance(files, list):
files = [files]
# Create ffxml
ffxml_name = os.path.join(ffxml_dir, (ffxml_basename + '.xml'))
if not os.path.exists(ffxml_dir):
os.mkdir(ffxml_dir)
# Process parameter file
params = parmed.amber.AmberParameterSet(files)
params = parmed.openmm.OpenMMParameterSet.from_parameterset(params, remediate_residues=(not write_unused))
if filter_warnings != 'error':
with warnings.catch_warnings():
warnings.filterwarnings(filter_warnings, category=ParameterWarning)
params.write(ffxml_name, provenance=provenance, write_unused=write_unused, improper_dihedrals_ordering='amber')
else:
params.write(ffxml_name, provenance=provenance, write_unused=write_unused, improper_dihedrals_ordering='amber')
if verbose: print('%s successfully written!' % ffxml_name)
return ffxml_name
def convert_recipe(files, solvent_file=None, ffxml_dir='./', provenance=None, ffxml_basename=None,
filter_warnings='always'):
if verbose: print('\nConverting %s to ffxml...' % files)
ffxml_name = os.path.join(ffxml_dir, (ffxml_basename + '.xml'))
ffxml_temp_stringio = StringIO()
params = parmed.amber.AmberParameterSet(files)
print(params.atom_types.keys())
params = parmed.openmm.OpenMMParameterSet.from_parameterset(params)
# Change atom type naming
# atom_types
new_atom_types = OrderedDict()
for name, atom_type in params.atom_types.items():
new_name = ffxml_basename + '-' + name
new_atom_types[new_name] = atom_type
params.atom_types = new_atom_types
# atoms in residues
for name, residue in params.residues.items():
for atom in residue:
new_type = ffxml_basename + '-' + atom.type
atom.type = new_type
if solvent_file is None:
# this means this file does not include a water model - hard-coded assumption it is
# then a 'multivalent' file - set overrideLevel to 1 for all residue templates
for name, residue in params.residues.items():
residue.override_level = 1
with warnings.catch_warnings():
warnings.filterwarnings(filter_warnings, category=ParameterWarning)
params.write(ffxml_name, provenance=provenance, write_unused=False, improper_dihedrals_ordering='amber')
else:
with warnings.catch_warnings():
warnings.filterwarnings(filter_warnings, category=ParameterWarning)
params.write(ffxml_temp_stringio, provenance=provenance, write_unused=False, improper_dihedrals_ordering='amber')
ffxml_temp_stringio.seek(0)
if verbose: print('Modifying converted ffxml to append solvent parameters')
tree_main = et.parse(ffxml_temp_stringio)
tree_water = et.parse(solvent_file)
root_main = tree_main.getroot()
root_water = tree_water.getroot()
with open(ffxml_name, 'wb') as f:
f.write(b'<ForceField>\n ')
f.write(et.tostring(root_main.findall('Info')[0]))
f.write(b'<AtomTypes>\n ')
for subelement in root_main.findall('AtomTypes')[0]:
f.write(et.tostring(subelement))
f.write(b' ')
for subelement in root_water.findall('AtomTypes')[0]:
f.write(et.tostring(subelement))
f.write(b'</AtomTypes>\n <Residues>\n ')
for subelement in root_main.findall('Residues')[0]:
f.write(et.tostring(subelement))
f.write(b' ')
for subelement in root_water.findall('Residues')[0]:
f.write(et.tostring(subelement))
f.write(b'</Residues>\n <HarmonicBondForce>\n ')
for subelement in root_water.findall('HarmonicBondForce')[0]:
f.write(et.tostring(subelement))
f.write(b'</HarmonicBondForce>\n <HarmonicAngleForce>\n ')
for subelement in root_water.findall('HarmonicAngleForce')[0]:
f.write(et.tostring(subelement))
f.write(b'</HarmonicAngleForce>\n ')
f.write(('<NonbondedForce coulomb14scale="%s" lj14scale="%s">\n ' %
(root_main.findall('NonbondedForce')[0].attrib['coulomb14scale'],
root_main.findall('NonbondedForce')[0].attrib['lj14scale'])
).encode('utf-8'))
for subelement in root_main.findall('NonbondedForce')[0]:
f.write(et.tostring(subelement))
f.write(b' ')
for subelement in root_water.findall('NonbondedForce')[0]:
if subelement.tag == 'UseAttributeFromResidue': continue
f.write(et.tostring(subelement))
f.write(b'</NonbondedForce>\n</ForceField>')
if verbose: print('%s successfully written!' % ffxml_name)
return ffxml_name
def convert_yaml(yaml_name, ffxml_dir, ignore=ignore):
data = yaml.load(open(yaml_name), Loader=yaml.FullLoader)
# TODO: Verify that the version that is installed via conda matches sourcePackageVersion
# Default yaml reading mode is leaprc
ALLOWED_MODES = ('LEAPRC', 'RECIPE', 'GAFF')
for entry in data:
# Handle MODE switching
if 'MODE' in entry:
MODE = entry['MODE']
if not MODE in ALLOWED_MODES:
raise Exception(f'MODE definition must be one of {ALLOWED_MODES}')
continue
# Handle definition of source packages
if 'sourcePackage' in entry:
source_pack = entry['sourcePackage']
source_pack_ver = entry['sourcePackageVersion']
continue
if 'sourcePackage2' in entry:
# Switch mode to RECIPE processing
source_pack2 = entry['sourcePackage2']
source_pack_ver2 = entry['sourcePackageVersion2']
continue
# Extract source files, reference, and test files
source_files = entry['Source']
reference = entry['Reference']
test_filename = entry['Test']
# Make sure source_files is a list
if isinstance(source_files, str):
source_files = [source_files]
# Recipes require extra definitions
if MODE == 'RECIPE':
recipe_name = entry['Name']
solvent_name = entry['Solvent']
if 'Solvent_source' in entry:
recipe_source2 = entry['Solvent_source']
else:
recipe_source2 = None
if 'Standard' in entry:
standard_ffxml = os.path.join(ffxml_dir, (entry['Standard'] + '.xml'))
else:
standard_ffxml = None
elif MODE == 'GAFF':
recipe_name = entry['Name']
# Create provenance object
provenance = OrderedDict()
files = []
source = provenance['Source'] = []
for source_file in source_files:
if MODE == 'LEAPRC':
if os.path.exists(source_file):
_filename = os.path.join('./', source_file)
else:
_filename = os.path.join(AMBERHOME, 'dat/leap/cmd', source_file)
elif MODE == 'RECIPE':
_filename = os.path.join(AMBERHOME, 'dat/leap/', source_file)
elif MODE == 'GAFF':
_filename = os.path.join('gaff', 'dat', source_file)
files.append(_filename)
source.append(OrderedDict())
source[-1]['Source'] = source_file
md5 = hashlib.md5()
with open(_filename, 'rb') as f:
md5.update(f.read())
md5 = md5.hexdigest()
source[-1]['md5hash'] = md5
source[-1]['sourcePackage'] = source_pack
source[-1]['sourcePackageVersion'] = source_pack_ver
# For recipes, add water file and source info for it
if MODE == 'RECIPE' and recipe_source2 is not None:
_filename = os.path.join('files', recipe_source2)
solvent_file = _filename
source.append(OrderedDict())
source[-1]['Source'] = recipe_source2
md5 = hashlib.md5()
with open(_filename, 'rb') as f:
md5.update(f.read())
md5 = md5.hexdigest()
source[-1]['md5hash'] = md5
source[-1]['sourcePackage'] = source_pack2
source[-1]['sourcePackageVersion'] = source_pack_ver2
elif MODE == 'RECIPE' and recipe_source2 is None:
solvent_file = None
provenance['Reference'] = reference
# set default conversion options
write_unused = False
filter_warnings = 'error'
override_level = None
# set conversion options if present
if 'Options' in entry:
for option in entry['Options']:
if option == 'write_unused':
write_unused = entry['Options'][option]
elif option == 'filter_warnings':
filter_warnings = entry['Options'][option]
elif option == 'ffxml_dir':
ffxml_dir = entry['Options'][option]
elif option == 'override_level':
override_level = entry['Options'][option]
else:
raise Exception("Wrong option used in Options for %s"
% source_files)
# Convert files
if MODE == 'LEAPRC':
is_glycam = False
for source_file in source_files:
if 'GLYCAM' in source_file:
is_glycam = True
ffxml_name = convert_leaprc(files, ffxml_dir=ffxml_dir, ignore=ignore,
provenance=provenance, write_unused=write_unused, override_level=override_level,
filter_warnings=filter_warnings, split_filename=True, is_glycam=is_glycam)
elif MODE == 'RECIPE':
ffxml_name = convert_recipe(files, solvent_file=solvent_file,
ffxml_dir=ffxml_dir, provenance=provenance,
ffxml_basename=recipe_name)
elif MODE == 'GAFF':
ffxml_name = convert_gaff(files, ffxml_basename=recipe_name, ffxml_dir=ffxml_dir, ignore=ignore,
provenance=provenance, write_unused=write_unused,
filter_warnings=filter_warnings, split_filename=True)
if 'CharmmFFXMLFilename' in entry:
charmm_ffxml_filename = entry['CharmmFFXMLFilename']
charmm_lipid2amber_filename = entry['CharmmLipid2AmberFilename']
if verbose: print('Merging lipid entries...')
merge_lipids(ffxml_name, charmm_ffxml_filename, charmm_lipid2amber_filename)
if 'Prefix' in entry:
prefix = entry['Prefix']
if verbose: print('Rewriting %s to append prefix "%s"...' % (ffxml_name, prefix))
add_prefix_to_ffxml(ffxml_name, prefix)
if verbose: print('Validating the conversion...')
tested = False
for test in test_filename:
if test == 'protein':
validate_protein(ffxml_name, entry['Source'])
tested = True
elif test == 'nucleic':
validate_dna(ffxml_name, entry['Source'])
validate_rna(ffxml_name, entry['Source'])
tested = True
elif test == 'protein_ua':
validate_protein(ffxml_name, entry['Source'], united_atom=True)
tested = True
elif test == 'protein_phospho':
validate_phospho_protein(ffxml_name, entry['Source'])
tested = True
elif test == 'gaff':
validate_gaff(ffxml_name, entry['leaprc'], entry['Source'])
tested = True
elif test == 'water_ion':
validate_water_ion(ffxml_name, files, solvent_name, recipe_name,
standard_ffxml=standard_ffxml)
tested = True
elif test == 'dna':
validate_dna(ffxml_name, entry['Source'])
tested = True
elif test == 'rna':
validate_rna(ffxml_name, entry['Source'])
tested = True
elif test == 'lipids':
#validate_lipids(ffxml_name, source_files)
validate_merged_lipids(ffxml_name, entry['Source'])
tested = True
elif test == 'protein_glycan':
validate_glyco_protein(ffxml_name, entry['Source'])
tested = True
if not tested:
raise Exception('No validation tests have been run for %s' %
source_files)
def merge_lipids(ffxml_filename, charmm_ffxml_filename, charmm_lipid2amber_filename):
"""
Merge lipid residue definitions in AMBER ffxml file according to entries in a CHARMM ffxml file.
Parameters
----------
ffxml_filename : str
AMBER lipids ffxml filename with AMBER split lipids.
charmm_ffxml_filename : str
CHARMM ffxml lipids
charmmlipid2amber_filename : str
CHARMM CSV file containing translation from CHARMM -> AMBER
"""
# Read the input files.
charmmff = etree.parse(charmm_ffxml_filename)
amberff = etree.parse(ffxml_filename)
charmmResidues = charmmff.getroot().find('Residues').findall('Residue')
amberResidues = amberff.getroot().find('Residues').findall('Residue')
amberResMap = {}
for res in amberResidues:
atoms = dict((atom.attrib['name'], atom) for atom in res.findall('Atom'))
amberResMap[res.attrib['name']] = atoms
translations = {}
with open(charmm_lipid2amber_filename) as input:
# Skip the first two lines.
input.readline()
input.readline()
for line in input:
fields = line.split(',')
mergedRes = fields[0]
mergedAtom = fields[2].split()[0]
originalAtom, originalRes = fields[3].split()
translations[(mergedRes, mergedAtom)] = (originalRes, originalAtom)
# Remove all residues from the Amber file.
parentNode = amberff.getroot().find('Residues')
for res in amberResidues:
parentNode.remove(res)
# Copy over the CHARMM residues, making appropriate replacements.
def translateResidue(residue):
newres = deepcopy(residue)
# Translate atom properties
for atom in newres.findall('Atom'):
key = (residue.attrib['name'], atom.attrib['name'])
if key not in translations:
return None # We don't have a translation.
amberResName, amberAtomName = translations[key]
if amberResName not in amberResMap or amberAtomName not in amberResMap[amberResName]:
return None # We don't have a translation.
amberAtom = amberResMap[amberResName][amberAtomName]
for attrib in amberAtom.attrib:
if attrib != 'name':
atom.attrib[attrib] = amberAtom.attrib[attrib]
# Remove Patches from CHARMM residues
for patch in newres.findall('AllowPatch'):
newres.remove(patch)
return newres
# Iterate over CHARMM lipid residue templates and replace components with AMBER parameters
for residue in charmmResidues:
copy = translateResidue(residue)
if copy is not None:
parentNode.append(copy)
# Write merged lipid ffxml file (overwriting original file)
amberff.write(ffxml_filename)
def add_prefix_to_ffxml(ffxml_filename, prefix):
"""
Replace the contents of an ffxml file with a modified version in which every atom type is prefixed with `prefix`.
Parameters
----------
ffxml_filename : str
OpenMM ffxml filename (will be overwritten)
prefix : str
Prefix
"""
import re
import sys
inTypes = False
replacements = {}
modified_contents = ''
with open(ffxml_filename, 'r') as infile:
for line in infile:
if '<AtomTypes>' in line:
inTypes = True
if '</AtomTypes>' in line:
inTypes = False
if inTypes:
match = re.search('name="(.*?)"', line)
if match is not None:
name = match.group(1)
newName = prefix + '-' + name
line = line.replace('name="%s"' % name, 'name="%s"' % newName)
replacements['type="%s"' % name] = 'type="%s"' % newName
replacements['type1="%s"' % name] = 'type1="%s"' % newName
replacements['type2="%s"' % name] = 'type2="%s"' % newName
replacements['type3="%s"' % name] = 'type3="%s"' % newName
replacements['type4="%s"' % name] = 'type4="%s"' % newName
else:
for key in replacements:
if key in line:
line = line.replace(key, replacements[key])
if line.endswith('\n'):
line = line[:-1]
modified_contents += line + '\n'
with open(ffxml_filename, 'w') as outfile:
outfile.write(modified_contents)
def assert_energies_glyco_protein(prmtop, inpcrd, ffxml, tolerance=1e-1):
import math
# Get AMBER system
parm_amber = parmed.load_file(prmtop, inpcrd)
system_amber = parm_amber.createSystem()
# Create topology where residue names are named from HYP to CHYP or NHYP (etc) where necessary
source_topology = parm_amber.topology
destination_topology = app.Topology()
new_atoms = {}
for chain in source_topology.chains():
new_chain = destination_topology.addChain(chain.id)
for residue in chain.residues():
new_name = residue.name
if residue.index in [0, 5, 13, 21, 29]:
new_name = 'N' + residue.name
elif residue.index in [4, 9, 17, 25, 33]:
new_name = 'C' + residue.name
new_residue = destination_topology.addResidue(new_name, new_chain, residue.id)
for atom in residue.atoms():
new_atom = destination_topology.addAtom(atom.name, atom.element, new_residue, atom.id)
new_atoms[atom] = new_atom
for bond in source_topology.bonds():
order = bond.order
destination_topology.addBond(new_atoms[bond[0]], new_atoms[bond[1]], order=order)
# Get OpenMM system
if isinstance(ffxml, str):
ff = app.ForceField(ffxml)
else:
ff = app.ForceField(*ffxml)
system_omm = ff.createSystem(destination_topology, ignoreExternalBonds=True)
def compute_potential_components(system, positions, beta=beta):
# Note: this is copied from perses
# TODO: consider moving this outside of assert_energies_glyco_protein()
system = deepcopy(system)
for index in range(system.getNumForces()):
force = system.getForce(index)
force.setForceGroup(index)
integrator = openmm.VerletIntegrator(1.0*u.femtosecond)
platform = openmm.Platform.getPlatformByName('Reference')
context = openmm.Context(system, integrator, platform)
context.setPositions(positions)
energy_components = list()
for index in range(system.getNumForces()):
force = system.getForce(index)
forcename = force.__class__.__name__
groups = 1<<index
potential = beta * context.getState(getEnergy=True, groups=groups).getPotentialEnergy()
energy_components.append((forcename, potential))
del context, integrator
return energy_components
amber_energies = compute_potential_components(system_amber, parm_amber.positions)
omm_energies = compute_potential_components(system_omm, parm_amber.positions)
for amber, omm in zip(amber_energies, omm_energies):
force_name = amber[0]
assert math.isclose(amber[1], omm[1], rel_tol=tolerance)
print(force_name, amber[1], omm[1])
def assert_energies(prmtop, inpcrd, ffxml, system_name='unknown', tolerance=2.5e-5,
improper_tolerance=1e-2, units=u.kilojoules_per_mole, openmm_topology=None, openmm_positions=None):
# AMBER
parm_amber = parmed.load_file(prmtop, inpcrd)
system_amber = parm_amber.createSystem(splitDihedrals=True)
amber_energies = parmed.openmm.energy_decomposition_system(parm_amber,
system_amber, nrg=units)
# OpenMM-ffxml
if isinstance(ffxml, str):
ff = app.ForceField(ffxml)
else:
ff = app.ForceField(*ffxml)
if openmm_positions is None:
openmm_positions = parm_amber.positions
if openmm_topology is not None:
system_omm = ff.createSystem(openmm_topology)
parm_omm = parmed.openmm.load_topology(openmm_topology, system_omm,
xyz=openmm_positions)
else:
system_omm = ff.createSystem(parm_amber.topology)
parm_omm = parmed.openmm.load_topology(parm_amber.topology, system_omm,
xyz=parm_amber.positions)
system_omm = parm_omm.createSystem(splitDihedrals=True)
omm_energies = parmed.openmm.energy_decomposition_system(parm_omm,
system_omm, nrg=units, platform='Reference')
# calc rel energies and assert
energies = []
rel_energies = []
for i, j in zip(amber_energies, omm_energies):
if i[0] != j[0]:
raise Exception('Mismatch in energy tuples naming.')
if abs(i[1]) > NEARLYZERO:
rel_energies.append((i[0], abs((i[1]-j[1])/i[1])))
else:
if abs(j[1]) > NEARLYZERO:
raise AssertionError('One of AMBER %s energies (%s) for %s is zero, '
'while the corresponding OpenMM energy is non-zero' %
(system_name, i[0], ffxml))
rel_energies.append((i[0], 0))
dihedrals_done = False
for (i, amber_energy, openmm_energy) in zip(rel_energies, amber_energies, omm_energies):
if i[0] != 'PeriodicTorsionForce':
if i[1] > tolerance:
raise AssertionError('%s relative energy error (%s, %f) outside of allowed tolerance (%f) for %s: AMBER %s OpenMM %s' %
(system_name, i[0], i[1], tolerance, ffxml, amber_energy, openmm_energy))
else:
if not dihedrals_done:
if i[1] > tolerance:
raise AssertionError('%s relative energy error (%s, %f) outside of allowed tolerance (%f) for %s: AMBER %s OpenMM %s' %
(system_name, i[0], i[1], tolerance, ffxml, amber_energy, openmm_energy))
dihedrals_done = True
else: #impropers
if i[1] > improper_tolerance:
raise AssertionError('%s relative energy error (%s-impropers, %f) outside of allowed tolerance (%f) for %s: AMBER %s OpenMM %s' %
(system_name, i[0], i[1], improper_tolerance, ffxml, amber_energy, openmm_energy))
# logging
if not no_log:
amber_energies_log = dict()
omm_energies_log = dict()
rel_energies_log = dict()
amber_energies_log['ffxml_name'] = ffxml
amber_energies_log['test_system'] = system_name
amber_energies_log['data_type'] = 'AMBER'
amber_energies_log['units'] = units
omm_energies_log['ffxml_name'] = ffxml
omm_energies_log['test_system'] = system_name
omm_energies_log['data_type'] = 'OpenMM'
omm_energies_log['units'] = units
rel_energies_log['ffxml_name'] = ffxml
rel_energies_log['test_system'] = system_name
rel_energies_log['data_type'] = 'abs((AMBER-OpenMM)/AMBER)'
dihedrals_done = False
for item in amber_energies:
if item[0] == 'PeriodicTorsionForce' and not dihedrals_done:
amber_energies_log['PeriodicTorsionForce_dihedrals'] = item[1]
dihedrals_done = True
elif item[0] == 'PeriodicTorsionForce' and dihedrals_done:
amber_energies_log['PeriodicTorsionForce_impropers'] = item[1]
elif item[0] == 'CMMotionRemover':
continue
else:
amber_energies_log[item[0]] = item[1]
dihedrals_done = False
for item in omm_energies:
if item[0] == 'PeriodicTorsionForce' and not dihedrals_done:
omm_energies_log['PeriodicTorsionForce_dihedrals'] = item[1]
dihedrals_done = True
elif item[0] == 'PeriodicTorsionForce' and dihedrals_done:
omm_energies_log['PeriodicTorsionForce_impropers'] = item[1]
elif item[0] == 'CMMotionRemover':
continue
else:
omm_energies_log[item[0]] = item[1]
dihedrals_done = False
for item in rel_energies:
if item[0] == 'PeriodicTorsionForce' and not dihedrals_done:
rel_energies_log['PeriodicTorsionForce_dihedrals'] = item[1]
dihedrals_done = True
elif item[0] == 'PeriodicTorsionForce' and dihedrals_done:
rel_energies_log['PeriodicTorsionForce_impropers'] = item[1]
elif item[0] == 'CMMotionRemover':
continue
else:
rel_energies_log[item[0]] = item[1]
logger.log(amber_energies_log)
logger.log(omm_energies_log)
logger.log(rel_energies_log)
def validate_protein(ffxml_name, leaprc_name, united_atom=False):
if verbose: print('Protein energy validation for %s' % ffxml_name)
if verbose: print('Preparing temporary files for validation...')
ala3_top = tempfile.mkstemp()
ala3_crd = tempfile.mkstemp()
villin_top = tempfile.mkstemp()
villin_crd = tempfile.mkstemp()
leap_script_ala3_file = tempfile.mkstemp()
leap_script_villin_file = tempfile.mkstemp()
if verbose: print('Preparing LeaP scripts...')
if not united_atom:
leap_script_ala3_string = """source %s
x = loadPdb files/ala3.pdb
saveAmberParm x %s %s
quit""" % (leaprc_name, ala3_top[1], ala3_crd[1])
leap_script_villin_string = """source %s
x = loadPdb files/villin.pdb
saveAmberParm x %s %s
quit""" % (leaprc_name, villin_top[1], villin_crd[1])
else:
leap_script_ala3_string = """source %s
x = loadPdb files/ala3_ua.pdb
saveAmberParm x %s %s
quit""" % (leaprc_name, ala3_top[1], ala3_crd[1])
leap_script_villin_string = """source %s
x = loadPdb files/villin_ua.pdb
saveAmberParm x %s %s
quit""" % (leaprc_name, villin_top[1], villin_crd[1])
write_file(leap_script_ala3_file[0], leap_script_ala3_string)
write_file(leap_script_villin_file[0], leap_script_villin_string)
if verbose: print('Running LEaP...')
os.system('tleap -f %s > %s' % (leap_script_ala3_file[1], os.devnull))
if os.path.getsize(ala3_top[1]) == 0 or os.path.getsize(ala3_crd[1]) == 0:
raise LeapException(leap_script_ala3_file[1])
os.system('tleap -f %s > %s' % (leap_script_villin_file[1], os.devnull))
if os.path.getsize(villin_top[1]) == 0 or os.path.getsize(villin_crd[1]) == 0:
raise LeapException(leap_script_villin_file[1])
try:
if verbose: print('Calculating and validating ala_ala_ala energies...')
assert_energies(ala3_top[1], ala3_crd[1], ffxml_name,
system_name='protein-ala_ala_ala')
if verbose: print('Ala_ala_ala energy validation successful!')
if verbose: print('Calculating and validating villin headpiece energies...')
assert_energies(villin_top[1], villin_crd[1], ffxml_name,
system_name='protein-villin headpiece')
if verbose: print('Villin headpiece energy validation successful!')
finally:
if verbose: print('Deleting temp files...')
for f in (ala3_top, ala3_crd, villin_top, villin_crd, leap_script_ala3_file,
leap_script_villin_file):
os.unlink(f[1])
if verbose: print('Protein energy validation for %s done!' % ffxml_name)
def validate_dna(ffxml_name, leaprc_name):
if verbose: print('DNA energy validation for %s' % ffxml_name)
if verbose: print('Preparing temporary files for validation...')
dna_top = tempfile.mkstemp()
dna_crd = tempfile.mkstemp()
leap_script_dna_file = tempfile.mkstemp()
if verbose: print('Preparing LeaP scripts...')
leap_script_dna_string = """addPdbAtomMap {
{ "H1'" "H1*" }
{ "H2'" "H2'1" }
{ "H2''" "H2'2" }
{ "H3'" "H3*" }
{ "H4'" "H4*" }
{ "H5'" "H5'1" }
{ "H5''" "H5'2" }
{ "HO2'" "HO'2" }
{ "HO5'" "H5T" }
{ "HO3'" "H3T" }
{ "OP1" "O1P" }
{ "OP2" "O2P" }
}
source %s
addPdbResMap {
{ 0 "DG" "DG5" } { 1 "DG" "DG3" }
{ 0 "DA" "DA5" } { 1 "DA" "DA3" }
{ 0 "DC" "DC5" } { 1 "DC" "DC3" }
{ 0 "DT" "DT5" } { 1 "DT" "DT3" }
}
x = loadPdb files/4rzn_dna.pdb
saveAmberParm x %s %s
quit""" % (leaprc_name, dna_top[1], dna_crd[1])
write_file(leap_script_dna_file[0], leap_script_dna_string)
if verbose: print('Running LEaP...')
os.system('tleap -f %s > %s' % (leap_script_dna_file[1], os.devnull))
if os.path.getsize(dna_top[1]) == 0 or os.path.getsize(dna_crd[1]) == 0:
raise LeapException(leap_script_dna_file[1])
try:
if verbose: print('Calculating and validating DNA energies...')
assert_energies(dna_top[1], dna_crd[1], ffxml_name,
system_name='nucleic-DNA')
if verbose: print('DNA energy validation successful!')
finally:
if verbose: print('Deleting temp files...')
for f in (dna_top, dna_crd, leap_script_dna_file):
os.unlink(f[1])
if verbose: print('DNA energy validation for %s done!' % ffxml_name)
def validate_rna(ffxml_name, leaprc_name):
if verbose: print('RNA energy validation for %s' % ffxml_name)
if verbose: print('Preparing temporary files for validation...')
rna_top = tempfile.mkstemp()
rna_crd = tempfile.mkstemp()
leap_script_rna_file = tempfile.mkstemp()
leap_script_rna_file_alt = tempfile.mkstemp()
if verbose: print('Preparing LeaP scripts...')
leap_script_rna_string = """
addPdbAtomMap {
{ "H1'" "H1*" }
{ "H2'" "H2'1" }
{ "H2''" "H2'2" }
{ "H3'" "H3*" }
{ "H4'" "H4*" }
{ "H5'" "H5'1" }
{ "H5''" "H5'2" }
{ "HO2'" "HO'2" }
{ "HO5'" "H5T" }
{ "HO3'" "H3T" }
{ "OP1" "O1P" }
{ "OP2" "O2P" }
}
source %s
addPdbResMap {
{ 0 "G" "G5" } { 1 "G" "G3" } { "G" "G" }
{ 0 "A" "A5" } { 1 "A" "A3" } { "A" "A" }
{ 0 "C" "C5" } { 1 "C" "C3" } { "C" "C" }
{ 0 "U" "U5" } { 1 "U" "U3" } { "U" "U" }
}
x = loadPdb files/5c5w_rna.pdb
saveAmberParm x %s %s
quit""" % (leaprc_name, rna_top[1], rna_crd[1])
leap_script_rna_string_alt = """
addPdbAtomMap {
{ "H1'" "H1*" }
{ "H2'" "H2'1" }
{ "H2''" "H2'2" }
{ "H3'" "H3*" }
{ "H4'" "H4*" }
{ "H5'" "H5'1" }
{ "H5''" "H5'2" }
{ "HO2'" "HO'2" }
{ "HO5'" "H5T" }
{ "HO3'" "H3T" }
{ "OP1" "O1P" }
{ "OP2" "O2P" }
}
source %s
addPdbResMap {
{ 0 "G" "RG5" } { 1 "G" "RG3" } { "G" "RG" }
{ 0 "A" "RA5" } { 1 "A" "RA3" } { "A" "RA" }
{ 0 "C" "RC5" } { 1 "C" "RC3" } { "C" "RC" }
{ 0 "U" "RU5" } { 1 "U" "RU3" } { "U" "RU" }
}
x = loadPdb files/5c5w_rna.pdb
saveAmberParm x %s %s
quit""" % (leaprc_name, rna_top[1], rna_crd[1])
write_file(leap_script_rna_file[0], leap_script_rna_string)
write_file(leap_script_rna_file_alt[0], leap_script_rna_string_alt)
if verbose: print('Running LEaP...')
os.system('tleap -f %s > %s' % (leap_script_rna_file[1], os.devnull))
if os.path.getsize(rna_top[1]) == 0 or os.path.getsize(rna_crd[1]) == 0:
# try alternative name mappings
os.system('tleap -f %s > %s' % (leap_script_rna_file_alt[1], os.devnull))
if os.path.getsize(rna_top[1]) == 0 or os.path.getsize(rna_crd[1]) == 0:
raise LeapException(leap_script_rna_file_alt[1])
try:
if verbose: print('Calculating and validating RNA energies...')
# improper testing turned off pending solution to problems
assert_energies(rna_top[1], rna_crd[1], ffxml_name,
system_name='nucleic-RNA')
if verbose: print('RNA energy validation successful!')
finally:
if verbose: print('Deleting temp files...')
for f in (rna_top, rna_crd, leap_script_rna_file, leap_script_rna_file_alt):
os.unlink(f[1])
if verbose: print('RNA energy validation for %s done!' % ffxml_name)
def validate_gaff(ffxml_name, leaprc_name, gaff_dat_name):
if verbose: print('GAFF energy validation for %s' % ffxml_name)
if verbose: print('Preparing temporary files for validation...')
imatinib_top = tempfile.mkstemp()
imatinib_crd = tempfile.mkstemp()
leap_script_imatinib_file = tempfile.mkstemp()