-
Notifications
You must be signed in to change notification settings - Fork 874
/
fragmenter.py
305 lines (279 loc) · 17 KB
/
fragmenter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
"""Perform fragmentation of molecules."""
from __future__ import annotations
import copy
from typing import TYPE_CHECKING
from monty.json import MSONable
from pymatgen.analysis.graphs import MoleculeGraph, MolGraphSplitError
from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender
from pymatgen.io.babel import BabelMolAdaptor
if TYPE_CHECKING:
from pymatgen.core.structure import Molecule
__author__ = "Samuel Blau"
__copyright__ = "Copyright 2018, The Materials Project"
__version__ = "2.0"
__maintainer__ = "Samuel Blau"
__email__ = "[email protected]"
__status__ = "Beta"
__date__ = "8/21/19"
class Fragmenter(MSONable):
"""Molecule fragmenter class."""
def __init__(
self,
molecule: Molecule,
edges: list | None = None,
depth: int = 1,
open_rings: bool = False,
use_metal_edge_extender: bool = False,
opt_steps: int = 10000,
prev_unique_frag_dict: dict | None = None,
assume_previous_thoroughness: bool = True,
):
"""Standard constructor for molecule fragmentation.
Args:
molecule (Molecule): The molecule to fragment.
edges (list): List of index pairs that define graph edges, aka molecule bonds. If not set,
edges will be determined with OpenBabel. Defaults to None.
depth (int): The number of levels of iterative fragmentation to perform, where each level
will include fragments obtained by breaking one bond of a fragment one level up.
Defaults to 1. However, if set to 0, instead all possible fragments are generated
using an alternative, non-iterative scheme.
open_rings (bool): Whether or not to open any rings encountered during fragmentation.
Defaults to False. If true, any bond that fails to yield disconnected graphs when
broken is instead removed and the entire structure is optimized with OpenBabel in
order to obtain a good initial guess for an opened geometry that can then be put
back into QChem to be optimized without the ring just reforming.
use_metal_edge_extender (bool): Whether or not to attempt to add additional edges from
O, N, F, or Cl to any Li or Mg atoms present that OpenBabel may have missed. Defaults
to False. Most important for ionic bonding. Note that additional metal edges may yield
new "rings" (e.g. -C-O-Li-O- in LiEC) that will not play nicely with ring opening.
opt_steps (int): Number of optimization steps when opening rings. Defaults to 10000.
prev_unique_frag_dict (dict): A dictionary of previously identified unique fragments.
Defaults to None. Typically only used when trying to find the set of unique fragments
that come from multiple molecules.
assume_previous_thoroughness (bool): Whether or not to assume that a molecule / fragment
provided in prev_unique_frag_dict has all of its unique subfragments also provided in
prev_unique_frag_dict. Defaults to True. This is an essential optimization when trying
to find the set of unique fragments that come from multiple molecules if all of those
molecules are being fully iteratively fragmented. However, if you're passing a
prev_unique_frag_dict which includes a molecule and its fragments that were generated
at insufficient depth to find all possible subfragments to a fragmentation calculation
of a different molecule that you aim to find all possible subfragments of and which has
common subfragments with the previous molecule, this optimization will cause you to
miss some unique subfragments.
"""
self.assume_previous_thoroughness = assume_previous_thoroughness
self.open_rings = open_rings
self.opt_steps = opt_steps
if edges is None:
self.mol_graph = MoleculeGraph.from_local_env_strategy(molecule, OpenBabelNN())
else:
_edges: dict[tuple[int, int], dict | None] = {(edge[0], edge[1]): None for edge in edges}
self.mol_graph = MoleculeGraph.from_edges(molecule, _edges)
if ("Li" in molecule.composition or "Mg" in molecule.composition) and use_metal_edge_extender:
self.mol_graph = metal_edge_extender(self.mol_graph)
self.prev_unique_frag_dict = prev_unique_frag_dict or {}
self.new_unique_frag_dict = {} # new fragments from the given molecule not contained in prev_unique_frag_dict
self.all_unique_frag_dict = {} # all fragments from just the given molecule
self.unique_frag_dict = {} # all fragments from both the given molecule and prev_unique_frag_dict
if depth == 0: # Non-iterative, find all possible fragments:
# Find all unique fragments besides those involving ring opening
self.all_unique_frag_dict = self.mol_graph.build_unique_fragments()
# Then, if self.open_rings is True, open all rings present in self.unique_fragments
# in order to capture all unique fragments that require ring opening.
if self.open_rings:
self._open_all_rings()
else: # Iterative fragment generation:
self.fragments_by_level = {}
# Loop through the number of levels,
for level in range(depth):
# If on the first level, perform one level of fragmentation on the principle molecule graph:
if level == 0:
alph_formula = self.mol_graph.molecule.composition.alphabetical_formula
self.fragments_by_level["0"] = self._fragment_one_level(
{f"{alph_formula} E{len(self.mol_graph.graph.edges())}": [self.mol_graph]}
)
else:
num_frags_prev_level = 0
for key in self.fragments_by_level[str(level - 1)]:
num_frags_prev_level += len(self.fragments_by_level[str(level - 1)][key])
if num_frags_prev_level == 0:
# Nothing left to fragment, so exit the loop:
break
# If not on the first level, and there are fragments present in the previous level, then
# perform one level of fragmentation on all fragments present in the previous level:
self.fragments_by_level[str(level)] = self._fragment_one_level(
self.fragments_by_level[str(level - 1)]
)
if self.prev_unique_frag_dict == {}:
self.new_unique_frag_dict = copy.deepcopy(self.all_unique_frag_dict)
else:
for frag_key in self.all_unique_frag_dict:
if frag_key not in self.prev_unique_frag_dict:
self.new_unique_frag_dict[frag_key] = copy.deepcopy(self.all_unique_frag_dict[frag_key])
else:
for fragment in self.all_unique_frag_dict[frag_key]:
found = False
for prev_frag in self.prev_unique_frag_dict[frag_key]:
if fragment.isomorphic_to(prev_frag):
found = True
if not found:
if frag_key not in self.new_unique_frag_dict:
self.new_unique_frag_dict[frag_key] = [fragment]
else:
self.new_unique_frag_dict[frag_key].append(fragment)
self.new_unique_fragments = 0
for frag_key in self.new_unique_frag_dict:
self.new_unique_fragments += len(self.new_unique_frag_dict[frag_key])
if self.prev_unique_frag_dict == {}:
self.unique_frag_dict = self.new_unique_frag_dict
self.total_unique_fragments = self.new_unique_fragments
else:
self.unique_frag_dict = copy.deepcopy(self.prev_unique_frag_dict)
for frag_key in self.new_unique_frag_dict:
if frag_key in self.unique_frag_dict:
self.unique_frag_dict[frag_key] += [*self.new_unique_frag_dict[frag_key]]
else:
self.unique_frag_dict[frag_key] = copy.deepcopy(self.new_unique_frag_dict[frag_key])
self.total_unique_fragments = 0
for frag_key in self.unique_frag_dict:
self.total_unique_fragments += len(self.unique_frag_dict[frag_key])
def _fragment_one_level(self, old_frag_dict: dict) -> dict:
"""
Perform one step of iterative fragmentation on a list of molecule graphs. Loop through the graphs,
then loop through each graph's edges and attempt to remove that edge in order to obtain two
disconnected subgraphs, aka two new fragments. If successful, check if the new fragments
are already present in self.unique_fragments, and append them if not. If unsuccessful, we know
that edge belongs to a ring. If we are opening rings, do so with that bond, and then again
check if the resulting fragment is present in self.unique_fragments and add it if it is not.
"""
new_frag_dict = {}
for old_frags in old_frag_dict.values():
for old_frag in old_frags:
for edge in old_frag.graph.edges:
bond = [(edge[0], edge[1])]
fragments = []
try:
fragments = old_frag.split_molecule_subgraphs(bond, allow_reverse=True)
except MolGraphSplitError:
if self.open_rings:
fragments = [open_ring(old_frag, bond, self.opt_steps)]
for fragment in fragments:
alph_formula = fragment.molecule.composition.alphabetical_formula
new_frag_key = f"{alph_formula} E{len(fragment.graph.edges())}"
proceed = True
if (
self.assume_previous_thoroughness
and self.prev_unique_frag_dict != {}
and new_frag_key in self.prev_unique_frag_dict
):
for unique_fragment in self.prev_unique_frag_dict[new_frag_key]:
if unique_fragment.isomorphic_to(fragment):
proceed = False
break
if proceed:
if new_frag_key not in self.all_unique_frag_dict:
self.all_unique_frag_dict[new_frag_key] = [fragment]
new_frag_dict[new_frag_key] = [fragment]
else:
found = False
for unique_fragment in self.all_unique_frag_dict[new_frag_key]:
if unique_fragment.isomorphic_to(fragment):
found = True
break
if not found:
self.all_unique_frag_dict[new_frag_key].append(fragment)
if new_frag_key in new_frag_dict:
new_frag_dict[new_frag_key].append(fragment)
else:
new_frag_dict[new_frag_key] = [fragment]
return new_frag_dict
def _open_all_rings(self) -> None:
"""
Having already generated all unique fragments that did not require ring opening,
now we want to also obtain fragments that do require opening. We achieve this by
looping through all unique fragments and opening each bond present in any ring
we find. We also temporarily add the principle molecule graph to self.unique_fragments
so that its rings are opened as well.
"""
alph_formula = self.mol_graph.molecule.composition.alphabetical_formula
mol_key = f"{alph_formula} E{len(self.mol_graph.graph.edges())}"
self.all_unique_frag_dict[mol_key] = [self.mol_graph]
new_frag_keys: dict[str, list] = {"0": []}
new_frag_key_dict = {}
for key in self.all_unique_frag_dict:
for fragment in self.all_unique_frag_dict[key]:
ring_edges = fragment.find_rings()
if ring_edges != []:
for bond in ring_edges[0]:
new_fragment = open_ring(fragment, [bond], self.opt_steps)
alph_formula = new_fragment.molecule.composition.alphabetical_formula
frag_key = f"{alph_formula} E{len(new_fragment.graph.edges())}"
if frag_key not in self.all_unique_frag_dict:
if frag_key not in new_frag_keys["0"]:
new_frag_keys["0"].append(copy.deepcopy(frag_key))
new_frag_key_dict[frag_key] = copy.deepcopy([new_fragment])
else:
found = False
for unique_fragment in new_frag_key_dict[frag_key]:
if unique_fragment.isomorphic_to(new_fragment):
found = True
break
if not found:
new_frag_key_dict[frag_key].append(copy.deepcopy(new_fragment))
else:
found = False
for unique_fragment in self.all_unique_frag_dict[frag_key]:
if unique_fragment.isomorphic_to(new_fragment):
found = True
break
if not found:
self.all_unique_frag_dict[frag_key].append(copy.deepcopy(new_fragment))
for key, value in new_frag_key_dict.items():
self.all_unique_frag_dict[key] = copy.deepcopy(value)
idx = 0
while len(new_frag_keys[str(idx)]) != 0:
new_frag_key_dict = {}
idx += 1
new_frag_keys[str(idx)] = []
for key in new_frag_keys[str(idx - 1)]:
for fragment in self.all_unique_frag_dict[key]:
ring_edges = fragment.find_rings()
if ring_edges != []:
for bond in ring_edges[0]:
new_fragment = open_ring(fragment, [bond], self.opt_steps)
alph_formula = new_fragment.molecule.composition.alphabetical_formula
frag_key = f"{alph_formula} E{len(new_fragment.graph.edges())}"
if frag_key not in self.all_unique_frag_dict:
if frag_key not in new_frag_keys[str(idx)]:
new_frag_keys[str(idx)].append(copy.deepcopy(frag_key))
new_frag_key_dict[frag_key] = copy.deepcopy([new_fragment])
else:
found = False
for unique_fragment in new_frag_key_dict[frag_key]:
if unique_fragment.isomorphic_to(new_fragment):
found = True
break
if not found:
new_frag_key_dict[frag_key].append(copy.deepcopy(new_fragment))
else:
found = False
for unique_fragment in self.all_unique_frag_dict[frag_key]:
if unique_fragment.isomorphic_to(new_fragment):
found = True
break
if not found:
self.all_unique_frag_dict[frag_key].append(copy.deepcopy(new_fragment))
for key, value in new_frag_key_dict.items():
self.all_unique_frag_dict[key] = copy.deepcopy(value)
self.all_unique_frag_dict.pop(mol_key)
def open_ring(mol_graph: MoleculeGraph, bond: list, opt_steps: int) -> MoleculeGraph:
"""Open a ring using OpenBabel's local opt. Given a molecule graph and a bond,
convert the molecule graph into an OpenBabel molecule, remove
the given bond, perform the local opt with the number of steps determined by
self.steps, and then convert the resulting structure back into a molecule graph
to be returned.
"""
ob_mol = BabelMolAdaptor.from_molecule_graph(mol_graph)
ob_mol.remove_bond(bond[0][0] + 1, bond[0][1] + 1)
ob_mol.localopt(steps=opt_steps, forcefield="uff")
return MoleculeGraph.from_local_env_strategy(ob_mol.pymatgen_mol, OpenBabelNN())