From 85d88d85f2a9538667be147835093d402d6d80ea Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Fri, 3 Feb 2023 08:23:23 -0800 Subject: [PATCH 01/26] Initial commit; adding graph hashing from emmet, beginning work on molecule-specific trajectory --- pymatgen/core/trajectory.py | 538 ++++++++++++++++++++++++++++++++- pymatgen/util/graph_hashing.py | 266 ++++++++++++++++ 2 files changed, 803 insertions(+), 1 deletion(-) create mode 100644 pymatgen/util/graph_hashing.py diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 04dbe8561ea..c4dcc3820af 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -28,7 +28,7 @@ ) from pymatgen.io.vasp.outputs import Vasprun, Xdatcar -__author__ = "Eric Sivonxay, Shyam Dwaraknath, Mingjian Wen" +__author__ = "Eric Sivonxay, Shyam Dwaraknath, Mingjian Wen, Evan Spotte-Smith" __version__ = "0.1" __date__ = "Jun 29, 2022" @@ -598,3 +598,539 @@ def _get_site_props(self, frames: int | list[int]) -> SitePropsType | None: return [self.site_properties[i] for i in frames] raise ValueError("Unexpected frames type.") raise ValueError("Unexpected site_properties type.") + +class MoleculeOptimizeTrajectory(MSONable): + """ + Trajectory of a geometry optimization for a system without a lattice (namely, a Molecule object). + + Provides basic functions such as slicing trajectory, combining trajectories, and + obtaining displacements. + """ + + def __init__( + self, + species: list[str | Element | Species | DummySpecies | Composition], + coords: list[list[Vector3D]] | np.ndarray, + *, + site_properties: SitePropsType | None = None, + frame_properties: list[dict] | None = None, + constant_lattice: bool = True, + time_step: int | float | None = None, + coords_are_displacement: bool = False, + base_positions: list[list[Vector3D]] | np.ndarray | None = None, + ): + """ + In below, `N` denotes the number of sites in the Molecule, and `M` denotes the + number of frames in the trajectory. + + Args: + species: shape (N,). List of species on each site. Can take in flexible + input, including: + i. A sequence of element / species specified either as string + symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers, + e.g., (3, 56, ...) or actual Element or Species objects. + ii. List of dict of elements/species and occupancies, e.g., + [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of + disordered structures. + coords: shape (M, N, 3). coordinates of the sites. + site_properties: Properties associated with the sites. This should be a + list of `M` dicts for a single dict. If a list of dicts, each provides + the site properties for a frame. Each value in a dict should be a + sequence of length `N`, giving the properties of the `N` sites. + For example, for a trajectory with `M=2` and `N=4`, the + `site_properties` can be: [{"magmom":[5,5,5,5]}, {"magmom":[5,5,5,5]}]. + If a single dict, the site properties in the dict apply to all frames + in the trajectory. For example, for a trajectory with `M=2` and `N=4`, + {"magmom":[2,2,2,2]} means that, through the entire trajectory, + the magmom are kept constant at 2 for all four atoms. + frame_properties: Properties associated with the structure (e.g. total + energy). This should be a sequence of `M` dicts, with each dict + providing the properties for a frame. For example, for a trajectory with + `M=2`, the `frame_properties` can be [{'energy':1.0}, {'energy':2.0}]. + time_step: Time step of MD simulation in femto-seconds. Should be `None` + for a geometry optimization. + coords_are_displacement: Whether `coords` are given in displacements + (True) or positions (False). Note, if this is `True`, `coords` + of a frame (say i) should be relative to the previous frame (i.e. + i-1), but not relative to the `base_position`. + base_positions: shape (N, 3). The starting positions of all atoms in the + trajectory. Used to reconstruct positions when converting from + displacements to positions. Only needs to be specified if + `coords_are_displacement=True`. Defaults to the first index of + `coords` when `coords_are_displacement=False`. + """ + + if coords_are_displacement: + if base_positions is None: + warnings.warn( + "Without providing an array of starting positions, the positions " + "for each time step will not be available." + ) + self.base_positions = base_positions + else: + self.base_positions = frac_coords[0] # type: ignore[assignment] + self.coords_are_displacement = coords_are_displacement + + self.species = species + self.frac_coords = np.asarray(frac_coords) + self.time_step = time_step + + self._check_site_props(site_properties) + self.site_properties = site_properties + + self._check_frame_props(frame_properties) + self.frame_properties = frame_properties + + def get_structure(self, i: int) -> Structure: + """ + Get structure at specified index. + + Args: + i: Index of structure. + + Returns: + A pymatgen Structure object. + """ + return self[i] + + def to_positions(self): + """ + Convert displacements between consecutive frames into positions. + + `base_positions` and `frac_coords` should both be in fractional coords or + absolute coords. + + This is the opposite operation of `to_displacements()`. + """ + if self.coords_are_displacement: + cumulative_displacements = np.cumsum(self.frac_coords, axis=0) + positions = self.base_positions + cumulative_displacements + self.frac_coords = positions + self.coords_are_displacement = False + + def to_displacements(self): + """ + Converts positions of trajectory into displacements between consecutive frames. + + `base_positions` and `frac_coords` should both be in fractional coords. Does + not work for absolute coords because the atoms are to be wrapped into the + simulation box. + + This is the opposite operation of `to_positions()`. + """ + if not self.coords_are_displacement: + displacements = np.subtract( + self.frac_coords, + np.roll(self.frac_coords, 1, axis=0), + ) + displacements[0] = np.zeros(np.shape(self.frac_coords[0])) + + # Deal with PBC. + # For example - If in one frame an atom has fractional coordinates of + # [0, 0, 0.98] and in the next its coordinates are [0, 0, 0.01], this atom + # will have moved 0.03*c, but if we only subtract the positions, we would + # get a displacement vector of [0, 0, -0.97]. Therefore, we can correct for + # this by adding or subtracting 1 from the value. + displacements = [np.subtract(d, np.around(d)) for d in displacements] + + self.frac_coords = displacements + self.coords_are_displacement = True + + def extend(self, trajectory: Trajectory): + """ + Append a trajectory to the current one. + + The lattice, coords, and all other properties are combined. + + Args: + trajectory: Trajectory to append. + """ + if self.time_step != trajectory.time_step: + raise ValueError( + "Cannot extend trajectory. Time steps of the trajectories are " + f"incompatible: {self.time_step} and {trajectory.time_step}." + ) + + if self.species != trajectory.species: + raise ValueError( + "Cannot extend trajectory. Species in the trajectories are " + f"incompatible: {self.species} and {trajectory.species}." + ) + + # Ensure both trajectories are in positions before combining + self.to_positions() + trajectory.to_positions() + + self.site_properties = self._combine_site_props( + self.site_properties, + trajectory.site_properties, + len(self), + len(trajectory), + ) + + self.frame_properties = self._combine_frame_props( + self.frame_properties, + trajectory.frame_properties, + len(self), + len(trajectory), + ) + + self.lattice, self.constant_lattice = self._combine_lattice( + self.lattice, + trajectory.lattice, + len(self), + len(trajectory), + ) + + # Note, this should be after the other self._combine... method calls, since + # len(self) is used there. + self.frac_coords = np.concatenate((self.frac_coords, trajectory.frac_coords)) + + def __iter__(self): + """ + Iterator of the trajectory, yielding a pymatgen structure for each frame. + """ + for i in range(len(self)): + yield self[i] + + def __len__(self): + """ + Number of frames in the trajectory. + """ + return len(self.frac_coords) + + def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory: + """ + Get a subset of the trajectory. + + The output depends on the type of the input `frames`. If an int is given, return + a pymatgen Structure at the specified frame. If a list or a slice, return a new + trajectory with a subset of frames. + + Args: + frames: Indices of the trajectory to return. + + Return: + Subset of trajectory + """ + # Convert to position mode if not ready + self.to_positions() + + # For integer input, return the structure at that frame + if isinstance(frames, int): + if frames >= len(self): + raise IndexError(f"Frame index {frames} out of range.") + + lattice = self.lattice if self.constant_lattice else self.lattice[frames] + + return Structure( + Lattice(lattice), + self.species, + self.frac_coords[frames], + site_properties=self._get_site_props(frames), # type: ignore + to_unit_cell=True, + ) + + # For slice input, return a trajectory + if isinstance(frames, (slice, list, np.ndarray)): + if isinstance(frames, slice): + start, stop, step = frames.indices(len(self)) + selected = list(range(start, stop, step)) + else: + # Get rid of frames that exceed trajectory length + selected = [i for i in frames if i < len(self)] + + if len(selected) < len(frames): + bad_frames = [i for i in frames if i > len(self)] + raise IndexError(f"Frame index {bad_frames} out of range.") + + lattice = self.lattice if self.constant_lattice else self.lattice[selected] + frac_coords = self.frac_coords[selected] + + if self.frame_properties is not None: + frame_properties = [self.frame_properties[i] for i in selected] + else: + frame_properties = None + + return Trajectory( + lattice, + self.species, + frac_coords, + site_properties=self._get_site_props(selected), + frame_properties=frame_properties, + constant_lattice=self.constant_lattice, + time_step=self.time_step, + coords_are_displacement=False, + base_positions=self.base_positions, + ) + + supported = [int, slice, list or np.ndarray] + raise ValueError(f"Expect the type of frames be one of {supported}; {type(frames)}.") + + def write_Xdatcar( + self, + filename: str | Path = "XDATCAR", + system: str | None = None, + significant_figures: int = 6, + ): + """ + Writes to Xdatcar file. + + The supported kwargs are the same as those for the + Xdatcar_from_structs.get_string method and are passed through directly. + + Args: + filename: Name of file to write. It's prudent to end the filename with + 'XDATCAR', as most visualization and analysis software require this + for autodetection. + system: Description of system (e.g. 2D MoS2). + significant_figures: Significant figures in the output file. + """ + # Ensure trajectory is in position form + self.to_positions() + + if system is None: + system = f"{self[0].composition.reduced_formula}" + + lines = [] + format_str = f"{{:.{significant_figures}f}}" + syms = [site.specie.symbol for site in self[0]] + site_symbols = [a[0] for a in itertools.groupby(syms)] + syms = [site.specie.symbol for site in self[0]] + n_atoms = [len(tuple(a[1])) for a in itertools.groupby(syms)] + + for si, frac_coords in enumerate(self.frac_coords): + # Only print out the info block if + if si == 0 or not self.constant_lattice: + lines.extend([system, "1.0"]) + + if self.constant_lattice: + _lattice = self.lattice + else: + _lattice = self.lattice[si] + + for latt_vec in _lattice: + lines.append(f'{" ".join(map(str, latt_vec))}') + + lines.append(" ".join(site_symbols)) + lines.append(" ".join(map(str, n_atoms))) + + lines.append(f"Direct configuration= {si + 1}") + + for frac_coord, specie in zip(frac_coords, self.species): + coords = frac_coord + line = f'{" ".join(format_str.format(c) for c in coords)} {specie}' + lines.append(line) + + xdatcar_string = "\n".join(lines) + "\n" + + with zopen(filename, "wt") as f: + f.write(xdatcar_string) + + def as_dict(self) -> dict: + """ + Return the trajectory as a MSONAble dict. + """ + return { + "@module": type(self).__module__, + "@class": type(self).__name__, + "lattice": self.lattice.tolist(), + "species": self.species, + "frac_coords": self.frac_coords.tolist(), + "site_properties": self.site_properties, + "frame_properties": self.frame_properties, + "constant_lattice": self.constant_lattice, + "time_step": self.time_step, + "coords_are_displacement": self.coords_are_displacement, + "base_positions": self.base_positions, + } + + @classmethod + def from_structures( + cls, + structures: list[Structure], + constant_lattice: bool = True, + **kwargs, + ) -> Trajectory: + """ + Create trajectory from a list of structures. + + Note: Assumes no atoms removed during simulation. + + Args: + structures: pymatgen Structure objects. + constant_lattice: Whether the lattice changes during the simulation, + such as in an NPT MD simulation. + + Returns: + A trajectory from the structures. + """ + if constant_lattice: + lattice = structures[0].lattice.matrix + else: + lattice = np.array([structure.lattice.matrix for structure in structures]) + + species = structures[0].species + frac_coords = [structure.frac_coords for structure in structures] + site_properties = [structure.site_properties for structure in structures] + + return cls( + lattice, + species, # type: ignore + frac_coords, + site_properties=site_properties, # type: ignore + constant_lattice=constant_lattice, + **kwargs, + ) + + @classmethod + def from_file( + cls, + filename: str | Path, + constant_lattice: bool = True, + **kwargs, + ) -> Trajectory: + """ + Create trajectory from XDATCAR or vasprun.xml file. + + Args: + filename: Path to the file to read from. + constant_lattice: Whether the lattice changes during the simulation, + such as in an NPT MD simulation. + + Returns: + A trajectory from the file. + """ + fname = Path(filename).expanduser().resolve().name + + if fnmatch(fname, "*XDATCAR*"): + structures = Xdatcar(filename).structures + elif fnmatch(fname, "vasprun*.xml*"): + structures = Vasprun(filename).structures + else: + supported = ("XDATCAR", "vasprun.xml") + raise ValueError(f"Expect file to be one of {supported}; got {filename}.") + + return cls.from_structures( + structures, + constant_lattice=constant_lattice, + **kwargs, + ) + + @staticmethod + def _combine_lattice(lat1: np.ndarray, lat2: np.ndarray, len1: int, len2: int) -> tuple[np.ndarray, bool]: + """ + Helper function to combine trajectory lattice. + """ + if lat1.ndim == lat2.ndim == 2: + constant_lat = True + lat = lat1 + else: + constant_lat = False + if lat1.ndim == 2: + lat1 = np.tile(lat1, (len1, 1, 1)) + if lat2.ndim == 2: + lat2 = np.tile(lat2, (len2, 1, 1)) + lat = np.concatenate((lat1, lat2)) + + return lat, constant_lat + + @staticmethod + def _combine_site_props( + prop1: SitePropsType | None, prop2: SitePropsType | None, len1: int, len2: int + ) -> SitePropsType | None: + """ + Combine site properties. + + Either one of prop1 or prop2 can be None, dict, or a list of dict. All + possibilities of combining them are considered. + """ + # special cases + + if prop1 is None and prop2 is None: + return None + + if isinstance(prop1, dict) and prop1 == prop2: + return prop1 + + # general case + + assert prop1 is None or isinstance(prop1, (list, dict)) + assert prop2 is None or isinstance(prop2, (list, dict)) + + p1_candidates = { + "NoneType": [None] * len1, + "dict": [prop1] * len1, + "list": prop1, + } + p2_candidates = { + "NoneType": [None] * len2, + "dict": [prop2] * len2, + "list": prop2, + } + p1_selected: list = p1_candidates[type(prop1).__name__] # type: ignore + p2_selected: list = p2_candidates[type(prop2).__name__] # type: ignore + + return p1_selected + p2_selected + + @staticmethod + def _combine_frame_props(prop1: list[dict] | None, prop2: list[dict] | None, len1: int, len2: int) -> list | None: + """ + Combine frame properties. + """ + if prop1 is None and prop2 is None: + return None + if prop1 is None: + return [None] * len1 + list(prop2) # type: ignore + if prop2 is None: + return list(prop1) + [None] * len2 # type: ignore + return list(prop1) + list(prop2) # type:ignore + + def _check_site_props(self, site_props: SitePropsType | None): + """ + Check data shape of site properties. + """ + if site_props is None: + return + + if isinstance(site_props, dict): + site_props = [site_props] + else: + assert len(site_props) == len( + self + ), f"Size of the site properties {len(site_props)} does not equal to the number of frames {len(self)}." + + num_sites = len(self.frac_coords[0]) + for d in site_props: + for k, v in d.items(): + assert len(v) == num_sites, ( + f"Size of site property {k} {len(v)}) does not equal to the " + f"number of sites in the structure {num_sites}." + ) + + def _check_frame_props(self, frame_props: list[dict] | None): + """ + Check data shape of site properties. + """ + if frame_props is None: + return + + assert len(frame_props) == len( + self + ), f"Size of the frame properties {len(frame_props)} does not equal to the number of frames {len(self)}." + + def _get_site_props(self, frames: int | list[int]) -> SitePropsType | None: + """ + Slice site properties. + """ + if self.site_properties is None: + return None + if isinstance(self.site_properties, dict): + return self.site_properties + if isinstance(self.site_properties, list): + if isinstance(frames, int): + return self.site_properties[frames] + if isinstance(frames, list): + return [self.site_properties[i] for i in frames] + raise ValueError("Unexpected frames type.") + raise ValueError("Unexpected site_properties type.") \ No newline at end of file diff --git a/pymatgen/util/graph_hashing.py b/pymatgen/util/graph_hashing.py new file mode 100644 index 00000000000..08246d820e9 --- /dev/null +++ b/pymatgen/util/graph_hashing.py @@ -0,0 +1,266 @@ +""" +Copyright (C) 2004-2022, NetworkX Developers +Aric Hagberg +Dan Schult +Pieter Swart +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NetworkX Developers nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +----- + +Functions for hashing graphs to strings. +Isomorphic graphs should be assigned identical hashes. +For now, only Weisfeiler-Lehman hashing is implemented. + +""" + +from collections import Counter, defaultdict +from hashlib import blake2b + + +def _hash_label(label, digest_size): + return blake2b(label.encode("ascii"), digest_size=digest_size).hexdigest() + + +def _init_node_labels(G, edge_attr, node_attr): + if node_attr: + return {u: str(dd[node_attr]) for u, dd in G.nodes(data=True)} + elif edge_attr: + return {u: "" for u in G} + else: + return {u: str(deg) for u, deg in G.degree()} + + +def _neighborhood_aggregate(G, node, node_labels, edge_attr=None): + """ + Compute new labels for given node by aggregating + the labels of each node's neighbors. + """ + label_list = [] + for nbr in G.neighbors(node): + prefix = "" if edge_attr is None else str(G[node][nbr][edge_attr]) + label_list.append(prefix + node_labels[nbr]) + return node_labels[node] + "".join(sorted(label_list)) + + +def weisfeiler_lehman_graph_hash( + G, edge_attr=None, node_attr=None, iterations=3, digest_size=16 +): + """Return Weisfeiler Lehman (WL) graph hash. + + The function iteratively aggregates and hashes neighbourhoods of each node. + After each node's neighbors are hashed to obtain updated node labels, + a hashed histogram of resulting labels is returned as the final hash. + + Hashes are identical for isomorphic graphs and strong guarantees that + non-isomorphic graphs will get different hashes. See [1]_ for details. + + If no node or edge attributes are provided, the degree of each node + is used as its initial label. + Otherwise, node and/or edge labels are used to compute the hash. + + Parameters + ---------- + G: graph + The graph to be hashed. + Can have node and/or edge attributes. Can also have no attributes. + edge_attr: string, default=None + The key in edge attribute dictionary to be used for hashing. + If None, edge labels are ignored. + node_attr: string, default=None + The key in node attribute dictionary to be used for hashing. + If None, and no edge_attr given, use the degrees of the nodes as labels. + iterations: int, default=3 + Number of neighbor aggregations to perform. + Should be larger for larger graphs. + digest_size: int, default=16 + Size (in bits) of blake2b hash digest to use for hashing node labels. + + Returns + ------- + h : string + Hexadecimal string corresponding to hash of the input graph. + + Notes + ----- + To return the WL hashes of each subgraph of a graph, use + `weisfeiler_lehman_subgraph_hashes` + + Similarity between hashes does not imply similarity between graphs. + + References + ---------- + .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen, + Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman + Graph Kernels. Journal of Machine Learning Research. 2011. + http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf + + See also + -------- + weisfeiler_lehman_subgraph_hashes + """ + + def weisfeiler_lehman_step(G, labels, edge_attr=None): + """ + Apply neighborhood aggregation to each node + in the graph. + Computes a dictionary with labels for each node. + """ + new_labels = {} + for node in G.nodes(): + label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr) + new_labels[node] = _hash_label(label, digest_size) + return new_labels + + # set initial node labels + node_labels = _init_node_labels(G, edge_attr, node_attr) + + subgraph_hash_counts = [] + for _ in range(iterations): + node_labels = weisfeiler_lehman_step(G, node_labels, edge_attr=edge_attr) + counter = Counter(node_labels.values()) + # sort the counter, extend total counts + subgraph_hash_counts.extend(sorted(counter.items(), key=lambda x: x[0])) + + # hash the final counter + return _hash_label(str(tuple(subgraph_hash_counts)), digest_size) + + +def weisfeiler_lehman_subgraph_hashes( + G, edge_attr=None, node_attr=None, iterations=3, digest_size=16 +): + """ + Return a dictionary of subgraph hashes by node. + + The dictionary is keyed by node to a list of hashes in increasingly + sized induced subgraphs containing the nodes within 2*k edges + of the key node for increasing integer k until all nodes are included. + + The function iteratively aggregates and hashes neighbourhoods of each node. + This is achieved for each step by replacing for each node its label from + the previous iteration with its hashed 1-hop neighborhood aggregate. + The new node label is then appended to a list of node labels for each + node. + + To aggregate neighborhoods at each step for a node $n$, all labels of + nodes adjacent to $n$ are concatenated. If the `edge_attr` parameter is set, + labels for each neighboring node are prefixed with the value of this attribute + along the connecting edge from this neighbor to node $n$. The resulting string + is then hashed to compress this information into a fixed digest size. + + Thus, at the $i$th iteration nodes within $2i$ distance influence any given + hashed node label. We can therefore say that at depth $i$ for node $n$ + we have a hash for a subgraph induced by the $2i$-hop neighborhood of $n$. + + Can be used to to create general Weisfeiler-Lehman graph kernels, or + generate features for graphs or nodes, for example to generate 'words' in a + graph as seen in the 'graph2vec' algorithm. + See [1]_ & [2]_ respectively for details. + + Hashes are identical for isomorphic subgraphs and there exist strong + guarantees that non-isomorphic graphs will get different hashes. + See [1]_ for details. + + If no node or edge attributes are provided, the degree of each node + is used as its initial label. + Otherwise, node and/or edge labels are used to compute the hash. + + Parameters + ---------- + G: graph + The graph to be hashed. + Can have node and/or edge attributes. Can also have no attributes. + edge_attr: string, default=None + The key in edge attribute dictionary to be used for hashing. + If None, edge labels are ignored. + node_attr: string, default=None + The key in node attribute dictionary to be used for hashing. + If None, and no edge_attr given, use the degrees of the nodes as labels. + iterations: int, default=3 + Number of neighbor aggregations to perform. + Should be larger for larger graphs. + digest_size: int, default=16 + Size (in bits) of blake2b hash digest to use for hashing node labels. + The default size is 16 bits + + Returns + ------- + node_subgraph_hashes : dict + A dictionary with each key given by a node in G, and each value given + by the subgraph hashes in order of depth from the key node. + + Notes + ----- + To hash the full graph when subgraph hashes are not needed, use + `weisfeiler_lehman_graph_hash` for efficiency. + + Similarity between hashes does not imply similarity between graphs. + + References + ---------- + .. [1] Shervashidze, Nino, Pascal Schweitzer, Erik Jan Van Leeuwen, + Kurt Mehlhorn, and Karsten M. Borgwardt. Weisfeiler Lehman + Graph Kernels. Journal of Machine Learning Research. 2011. + http://www.jmlr.org/papers/volume12/shervashidze11a/shervashidze11a.pdf + .. [2] Annamalai Narayanan, Mahinthan Chandramohan, Rajasekar Venkatesan, + Lihui Chen, Yang Liu and Shantanu Jaiswa. graph2vec: Learning + Distributed Representations of Graphs. arXiv. 2017 + https://arxiv.org/pdf/1707.05005.pdf + + See also + -------- + weisfeiler_lehman_graph_hash + """ + + def weisfeiler_lehman_step(G, labels, node_subgraph_hashes, edge_attr=None): + """ + Apply neighborhood aggregation to each node + in the graph. + Computes a dictionary with labels for each node. + Appends the new hashed label to the dictionary of subgraph hashes + originating from and indexed by each node in G + """ + new_labels = {} + for node in G.nodes(): + label = _neighborhood_aggregate(G, node, labels, edge_attr=edge_attr) + hashed_label = _hash_label(label, digest_size) + new_labels[node] = hashed_label + node_subgraph_hashes[node].append(hashed_label) + return new_labels + + node_labels = _init_node_labels(G, edge_attr, node_attr) + + node_subgraph_hashes = defaultdict(list) + for _ in range(iterations): + node_labels = weisfeiler_lehman_step( + G, node_labels, node_subgraph_hashes, edge_attr + ) + + return dict(node_subgraph_hashes) From af08f37f37225da0f2b92c9a097eda99d5397adc Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Fri, 3 Feb 2023 09:51:00 -0800 Subject: [PATCH 02/26] Continued --- pymatgen/core/trajectory.py | 183 +++++++++++++++--------------------- 1 file changed, 78 insertions(+), 105 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index c4dcc3820af..3459c5ea8d1 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -25,6 +25,7 @@ Lattice, Species, Structure, + Molecule ) from pymatgen.io.vasp.outputs import Vasprun, Xdatcar @@ -599,6 +600,7 @@ def _get_site_props(self, frames: int | list[int]) -> SitePropsType | None: raise ValueError("Unexpected frames type.") raise ValueError("Unexpected site_properties type.") + class MoleculeOptimizeTrajectory(MSONable): """ Trajectory of a geometry optimization for a system without a lattice (namely, a Molecule object). @@ -668,11 +670,11 @@ def __init__( ) self.base_positions = base_positions else: - self.base_positions = frac_coords[0] # type: ignore[assignment] + self.base_positions = coords[0] # type: ignore[assignment] self.coords_are_displacement = coords_are_displacement self.species = species - self.frac_coords = np.asarray(frac_coords) + self.coords = np.asarray(coords) self.time_step = time_step self._check_site_props(site_properties) @@ -681,15 +683,15 @@ def __init__( self._check_frame_props(frame_properties) self.frame_properties = frame_properties - def get_structure(self, i: int) -> Structure: + def get_molecule(self, i: int) -> Molecule: """ - Get structure at specified index. + Get molecule at specified index. Args: i: Index of structure. Returns: - A pymatgen Structure object. + A pymatgen Molecule object. """ return self[i] @@ -697,9 +699,6 @@ def to_positions(self): """ Convert displacements between consecutive frames into positions. - `base_positions` and `frac_coords` should both be in fractional coords or - absolute coords. - This is the opposite operation of `to_displacements()`. """ if self.coords_are_displacement: @@ -712,10 +711,6 @@ def to_displacements(self): """ Converts positions of trajectory into displacements between consecutive frames. - `base_positions` and `frac_coords` should both be in fractional coords. Does - not work for absolute coords because the atoms are to be wrapped into the - simulation box. - This is the opposite operation of `to_positions()`. """ if not self.coords_are_displacement: @@ -725,25 +720,17 @@ def to_displacements(self): ) displacements[0] = np.zeros(np.shape(self.frac_coords[0])) - # Deal with PBC. - # For example - If in one frame an atom has fractional coordinates of - # [0, 0, 0.98] and in the next its coordinates are [0, 0, 0.01], this atom - # will have moved 0.03*c, but if we only subtract the positions, we would - # get a displacement vector of [0, 0, -0.97]. Therefore, we can correct for - # this by adding or subtracting 1 from the value. - displacements = [np.subtract(d, np.around(d)) for d in displacements] - self.frac_coords = displacements self.coords_are_displacement = True - def extend(self, trajectory: Trajectory): + def extend(self, trajectory: MoleculeOptimizeTrajectory): """ Append a trajectory to the current one. - The lattice, coords, and all other properties are combined. + The coords and all other properties are combined. Args: - trajectory: Trajectory to append. + trajectory: MoleculeOptimizeTrajectory to append. """ if self.time_step != trajectory.time_step: raise ValueError( @@ -775,16 +762,9 @@ def extend(self, trajectory: Trajectory): len(trajectory), ) - self.lattice, self.constant_lattice = self._combine_lattice( - self.lattice, - trajectory.lattice, - len(self), - len(trajectory), - ) - # Note, this should be after the other self._combine... method calls, since # len(self) is used there. - self.frac_coords = np.concatenate((self.frac_coords, trajectory.frac_coords)) + self.coords = np.concatenate((self.coords, trajectory.coords)) def __iter__(self): """ @@ -797,14 +777,14 @@ def __len__(self): """ Number of frames in the trajectory. """ - return len(self.frac_coords) + return len(self.coords) - def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory: + def __getitem__(self, frames: int | slice | list[int]) -> Molecule | MoleculeOptimizeTrajectory: """ Get a subset of the trajectory. The output depends on the type of the input `frames`. If an int is given, return - a pymatgen Structure at the specified frame. If a list or a slice, return a new + a pymatgen Molecule at the specified frame. If a list or a slice, return a new trajectory with a subset of frames. Args: @@ -821,14 +801,10 @@ def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory if frames >= len(self): raise IndexError(f"Frame index {frames} out of range.") - lattice = self.lattice if self.constant_lattice else self.lattice[frames] - - return Structure( - Lattice(lattice), + return Molecule( self.species, - self.frac_coords[frames], + self.coords[frames], site_properties=self._get_site_props(frames), # type: ignore - to_unit_cell=True, ) # For slice input, return a trajectory @@ -844,21 +820,18 @@ def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory bad_frames = [i for i in frames if i > len(self)] raise IndexError(f"Frame index {bad_frames} out of range.") - lattice = self.lattice if self.constant_lattice else self.lattice[selected] - frac_coords = self.frac_coords[selected] + coords = self.coords[selected] if self.frame_properties is not None: frame_properties = [self.frame_properties[i] for i in selected] else: frame_properties = None - return Trajectory( - lattice, + return MoleculeOptimizeTrajectory( self.species, - frac_coords, + coords, site_properties=self._get_site_props(selected), frame_properties=frame_properties, - constant_lattice=self.constant_lattice, time_step=self.time_step, coords_are_displacement=False, base_positions=self.base_positions, @@ -867,65 +840,65 @@ def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory supported = [int, slice, list or np.ndarray] raise ValueError(f"Expect the type of frames be one of {supported}; {type(frames)}.") - def write_Xdatcar( - self, - filename: str | Path = "XDATCAR", - system: str | None = None, - significant_figures: int = 6, - ): - """ - Writes to Xdatcar file. - - The supported kwargs are the same as those for the - Xdatcar_from_structs.get_string method and are passed through directly. - - Args: - filename: Name of file to write. It's prudent to end the filename with - 'XDATCAR', as most visualization and analysis software require this - for autodetection. - system: Description of system (e.g. 2D MoS2). - significant_figures: Significant figures in the output file. - """ - # Ensure trajectory is in position form - self.to_positions() - - if system is None: - system = f"{self[0].composition.reduced_formula}" - - lines = [] - format_str = f"{{:.{significant_figures}f}}" - syms = [site.specie.symbol for site in self[0]] - site_symbols = [a[0] for a in itertools.groupby(syms)] - syms = [site.specie.symbol for site in self[0]] - n_atoms = [len(tuple(a[1])) for a in itertools.groupby(syms)] - - for si, frac_coords in enumerate(self.frac_coords): - # Only print out the info block if - if si == 0 or not self.constant_lattice: - lines.extend([system, "1.0"]) - - if self.constant_lattice: - _lattice = self.lattice - else: - _lattice = self.lattice[si] - - for latt_vec in _lattice: - lines.append(f'{" ".join(map(str, latt_vec))}') - - lines.append(" ".join(site_symbols)) - lines.append(" ".join(map(str, n_atoms))) - - lines.append(f"Direct configuration= {si + 1}") - - for frac_coord, specie in zip(frac_coords, self.species): - coords = frac_coord - line = f'{" ".join(format_str.format(c) for c in coords)} {specie}' - lines.append(line) - - xdatcar_string = "\n".join(lines) + "\n" - - with zopen(filename, "wt") as f: - f.write(xdatcar_string) + # def write_Xdatcar( + # self, + # filename: str | Path = "XDATCAR", + # system: str | None = None, + # significant_figures: int = 6, + # ): + # """ + # Writes to Xdatcar file. + + # The supported kwargs are the same as those for the + # Xdatcar_from_structs.get_string method and are passed through directly. + + # Args: + # filename: Name of file to write. It's prudent to end the filename with + # 'XDATCAR', as most visualization and analysis software require this + # for autodetection. + # system: Description of system (e.g. 2D MoS2). + # significant_figures: Significant figures in the output file. + # """ + # # Ensure trajectory is in position form + # self.to_positions() + + # if system is None: + # system = f"{self[0].composition.reduced_formula}" + + # lines = [] + # format_str = f"{{:.{significant_figures}f}}" + # syms = [site.specie.symbol for site in self[0]] + # site_symbols = [a[0] for a in itertools.groupby(syms)] + # syms = [site.specie.symbol for site in self[0]] + # n_atoms = [len(tuple(a[1])) for a in itertools.groupby(syms)] + + # for si, frac_coords in enumerate(self.frac_coords): + # # Only print out the info block if + # if si == 0 or not self.constant_lattice: + # lines.extend([system, "1.0"]) + + # if self.constant_lattice: + # _lattice = self.lattice + # else: + # _lattice = self.lattice[si] + + # for latt_vec in _lattice: + # lines.append(f'{" ".join(map(str, latt_vec))}') + + # lines.append(" ".join(site_symbols)) + # lines.append(" ".join(map(str, n_atoms))) + + # lines.append(f"Direct configuration= {si + 1}") + + # for frac_coord, specie in zip(frac_coords, self.species): + # coords = frac_coord + # line = f'{" ".join(format_str.format(c) for c in coords)} {specie}' + # lines.append(line) + + # xdatcar_string = "\n".join(lines) + "\n" + + # with zopen(filename, "wt") as f: + # f.write(xdatcar_string) def as_dict(self) -> dict: """ From 8ce8446734d15ac81415de594aee09f3cc89f2b0 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Fri, 3 Feb 2023 09:59:55 -0800 Subject: [PATCH 03/26] Trajectory first draft done --- pymatgen/core/trajectory.py | 143 +++--------------------------------- 1 file changed, 10 insertions(+), 133 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 3459c5ea8d1..439d20d4c39 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -840,66 +840,6 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | MoleculeOpt supported = [int, slice, list or np.ndarray] raise ValueError(f"Expect the type of frames be one of {supported}; {type(frames)}.") - # def write_Xdatcar( - # self, - # filename: str | Path = "XDATCAR", - # system: str | None = None, - # significant_figures: int = 6, - # ): - # """ - # Writes to Xdatcar file. - - # The supported kwargs are the same as those for the - # Xdatcar_from_structs.get_string method and are passed through directly. - - # Args: - # filename: Name of file to write. It's prudent to end the filename with - # 'XDATCAR', as most visualization and analysis software require this - # for autodetection. - # system: Description of system (e.g. 2D MoS2). - # significant_figures: Significant figures in the output file. - # """ - # # Ensure trajectory is in position form - # self.to_positions() - - # if system is None: - # system = f"{self[0].composition.reduced_formula}" - - # lines = [] - # format_str = f"{{:.{significant_figures}f}}" - # syms = [site.specie.symbol for site in self[0]] - # site_symbols = [a[0] for a in itertools.groupby(syms)] - # syms = [site.specie.symbol for site in self[0]] - # n_atoms = [len(tuple(a[1])) for a in itertools.groupby(syms)] - - # for si, frac_coords in enumerate(self.frac_coords): - # # Only print out the info block if - # if si == 0 or not self.constant_lattice: - # lines.extend([system, "1.0"]) - - # if self.constant_lattice: - # _lattice = self.lattice - # else: - # _lattice = self.lattice[si] - - # for latt_vec in _lattice: - # lines.append(f'{" ".join(map(str, latt_vec))}') - - # lines.append(" ".join(site_symbols)) - # lines.append(" ".join(map(str, n_atoms))) - - # lines.append(f"Direct configuration= {si + 1}") - - # for frac_coord, specie in zip(frac_coords, self.species): - # coords = frac_coord - # line = f'{" ".join(format_str.format(c) for c in coords)} {specie}' - # lines.append(line) - - # xdatcar_string = "\n".join(lines) + "\n" - - # with zopen(filename, "wt") as f: - # f.write(xdatcar_string) - def as_dict(self) -> dict: """ Return the trajectory as a MSONAble dict. @@ -907,107 +847,44 @@ def as_dict(self) -> dict: return { "@module": type(self).__module__, "@class": type(self).__name__, - "lattice": self.lattice.tolist(), "species": self.species, - "frac_coords": self.frac_coords.tolist(), + "coords": self.coords.tolist(), "site_properties": self.site_properties, "frame_properties": self.frame_properties, - "constant_lattice": self.constant_lattice, "time_step": self.time_step, "coords_are_displacement": self.coords_are_displacement, "base_positions": self.base_positions, } @classmethod - def from_structures( + def from_molecules( cls, - structures: list[Structure], - constant_lattice: bool = True, + molecules: list[Molecule], **kwargs, ) -> Trajectory: """ - Create trajectory from a list of structures. + Create trajectory from a list of molecules. Note: Assumes no atoms removed during simulation. Args: - structures: pymatgen Structure objects. - constant_lattice: Whether the lattice changes during the simulation, - such as in an NPT MD simulation. + molecules: pymatgen Molecules objects. Returns: A trajectory from the structures. """ - if constant_lattice: - lattice = structures[0].lattice.matrix - else: - lattice = np.array([structure.lattice.matrix for structure in structures]) - species = structures[0].species - frac_coords = [structure.frac_coords for structure in structures] - site_properties = [structure.site_properties for structure in structures] + species = molecules[0].species + coords = [mol.cart_coords for mol in molecules] + site_properties = [mol.site_properties for mol in molecules] return cls( - lattice, species, # type: ignore - frac_coords, + coords, site_properties=site_properties, # type: ignore - constant_lattice=constant_lattice, - **kwargs, - ) - - @classmethod - def from_file( - cls, - filename: str | Path, - constant_lattice: bool = True, - **kwargs, - ) -> Trajectory: - """ - Create trajectory from XDATCAR or vasprun.xml file. - - Args: - filename: Path to the file to read from. - constant_lattice: Whether the lattice changes during the simulation, - such as in an NPT MD simulation. - - Returns: - A trajectory from the file. - """ - fname = Path(filename).expanduser().resolve().name - - if fnmatch(fname, "*XDATCAR*"): - structures = Xdatcar(filename).structures - elif fnmatch(fname, "vasprun*.xml*"): - structures = Vasprun(filename).structures - else: - supported = ("XDATCAR", "vasprun.xml") - raise ValueError(f"Expect file to be one of {supported}; got {filename}.") - - return cls.from_structures( - structures, - constant_lattice=constant_lattice, **kwargs, ) - @staticmethod - def _combine_lattice(lat1: np.ndarray, lat2: np.ndarray, len1: int, len2: int) -> tuple[np.ndarray, bool]: - """ - Helper function to combine trajectory lattice. - """ - if lat1.ndim == lat2.ndim == 2: - constant_lat = True - lat = lat1 - else: - constant_lat = False - if lat1.ndim == 2: - lat1 = np.tile(lat1, (len1, 1, 1)) - if lat2.ndim == 2: - lat2 = np.tile(lat2, (len2, 1, 1)) - lat = np.concatenate((lat1, lat2)) - - return lat, constant_lat - @staticmethod def _combine_site_props( prop1: SitePropsType | None, prop2: SitePropsType | None, len1: int, len2: int @@ -1078,7 +955,7 @@ def _check_site_props(self, site_props: SitePropsType | None): for k, v in d.items(): assert len(v) == num_sites, ( f"Size of site property {k} {len(v)}) does not equal to the " - f"number of sites in the structure {num_sites}." + f"number of sites in the molecule {num_sites}." ) def _check_frame_props(self, frame_props: list[dict] | None): From 3787c7b3e39641c9d79740e989a69c2a5944df9d Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Fri, 3 Feb 2023 11:39:20 -0800 Subject: [PATCH 04/26] Tests --- pymatgen/core/tests/test_trajectory.py | 332 ++++++++++++++++++++++++- pymatgen/core/trajectory.py | 4 + 2 files changed, 334 insertions(+), 2 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index 87303230dbd..f7fe024cf0c 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -6,10 +6,12 @@ import numpy as np from pymatgen.core.lattice import Lattice -from pymatgen.core.structure import Structure -from pymatgen.core.trajectory import Trajectory +from pymatgen.core.structure import Structure, Molecule +from pymatgen.core.trajectory import Trajectory, MoleculeOptimizeTrajectory from pymatgen.io.vasp.inputs import Poscar from pymatgen.io.vasp.outputs import Xdatcar +from pymatgen.io.qchem.outputs import QCOutput + from pymatgen.util.testing import PymatgenTest @@ -323,6 +325,332 @@ def test_xdatcar_write(self): os.remove("traj_test_XDATCAR") +class MoleculeOptimizeTrajectoryTest(PymatgenTest): + def setUp(self): + out = QCOutput(os.path.join(PymatgenTest.TEST_FILES_DIR, "molecules", "new_qchem_files", "ts.out")) + last_mol = out.data["molecule_from_last_geometry"] + species = last_mol.species + coords = out.data["geometries"] + + self.molecules = list() + for c in coords: + mol = Molecule(species, c, charge=last_mol.charge, spin_multiplicity=last_mol.spin_multiplicity) + self.molecules.append(mol) + + self.traj = MoleculeOptimizeTrajectory(species, coords, last_mol.charge, last_mol.spin_multiplicity) + + def _check_traj_equality(self, traj_1, traj_2): + if traj_1.species != traj_2.species: + return False + + return all(i == j for i, j in zip(self.traj, traj_2)) + + def _get_species_and_coords(self): + species = ["C", "O"] + coords = np.asarray( + [[[1.5709474478, -0.16099953, 0.0], [1.9291378639, -1.2161950538, 0.0]], + [[1.5688628148, -0.1548583957, 0.0], [1.9312224969, -1.2223361881, 0.0]], + [[1.5690858055, -0.1555153055, 0.0], [1.9309995062, -1.2216792783, 0.0]]] + ) + return species, coords, 0, 1 + + def test_single_index_slice(self): + assert all([self.traj[i] == self.molecules[i] for i in range(0, len(self.molecules))]) + + def test_slice(self): + sliced_traj = self.traj[0:2] + sliced_traj_from_mols = MoleculeOptimizeTrajectory.from_molecules(self.molecules[0:2]) + + if len(sliced_traj) == len(sliced_traj_from_mols): + assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) + else: + raise AssertionError + + sliced_traj = self.traj[:-2] + sliced_traj_from_mols = MoleculeOptimizeTrajectory.from_molecules(self.structures[:-2]) + + if len(sliced_traj) == len(sliced_traj_from_mols): + assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) + else: + raise AssertionError + + def test_list_slice(self): + sliced_traj = self.traj[[1, 3]] + sliced_traj_from_mols = MoleculeOptimizeTrajectory.from_molecules([self.molecules[i] for i in [1, 3]]) + + if len(sliced_traj) == len(sliced_traj_from_mols): + assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) + else: + raise AssertionError + + def test_conversion(self): + # Convert to displacements and back, and then check structures. + self.traj.to_displacements() + self.traj.to_positions() + + assert all([mol == self.molecules[i] for i, mol in enumerate(self.traj)]) + + def test_site_properties(self): + species, coords, charge, spin = self._get_species_and_coords() + + props = [ + { + "test": [[True, True, True], [False, False, False]], + }, + { + "test": [[False, False, False], [False, False, False]], + }, + { + "test": [[True, True, True], [False, False, False]], + }, + ] + traj = MoleculeOptimizeTrajectory(species, coords, charge, spin, site_properties=props) + + # compare the overall site properties list + assert traj.site_properties == props + + # compare the site properties after slicing + assert traj[0].site_properties == props[0] + assert traj[1:].site_properties == props[1:] + + def test_frame_properties(self): + species, coords, charge, spin = self._get_species_and_coords() + + props = [{"SCF_energy_in_the_final_basis_set": e} for e in [-113.3256885788, -113.3260019471, -113.326006415]] + + traj = MoleculeOptimizeTrajectory(species, coords, frame_properties=props) + + # compare the overall site properties + assert traj.frame_properties == props + + # compare the site properties after slicing + expected = props[1:] + assert traj[1:].frame_properties == expected + + def test_extend(self): + traj = copy.deepcopy(self.traj) + + # Case of compatible trajectories + compatible_traj = MoleculeOptimizeTrajectory( + ['C', 'C', 'O', 'C', 'O', 'O', 'O', 'Li', 'H', 'H', 'H', 'H', 'H', 'Li'], + [[-1.46958173, -0.47370158, -0.03391061], + [-0.79757102, 0.48588802, 0.94508206], + [0.50256405, 0.8947604 , 0.47698504], + [1.56101382, 0.13356272, 0.79931048], + [1.43897567, -0.8642765 , 1.56363034], + [2.66882238, 0.48431336, 0.30635727], + [-2.72606146, -0.81552889, 0.39696593], + [3.307822 , -1.01132269, 1.26654957], + [-0.81092724, -1.35590014, -0.1458541], + [-1.48634516, 0.02121279, -1.02465009], + [-0.71212347, 0.03008471, 1.93272477], + [-1.37888759, 1.40819443, 1.02143913], + [-4.79241099, 0.80275103, -0.39852432], + [-4.28509927, -1.03484764, 0.86348452]], + 0, + 2 + ) + traj.extend(compatible_traj) + + assert len(traj) == 5 + assert traj[-2] == traj[-1] + + # Case of incompatible trajectories + species, coords, charge, spin = self._get_species_and_coords() + + traj = copy.deepcopy(self.traj) + incompatible_traj = MoleculeOptimizeTrajectory(species, coords, charge, spin) + incompatible_test_success = False + try: + traj.extend(incompatible_traj) + except Exception: + incompatible_test_success = True + + assert incompatible_test_success + + def test_extend_site_props(self): + species, coords, charge, spin = self._get_species_and_coords() + num_frames = len(coords) + + props_1 = { + { + "test": [[True, True, True], [False, False, False]], + }, + } + props_2 = { + { + "test": [[False, False, False], [False, False, False]], + }, + } + props_3 = [ + { + "test": [[True, True, True], [False, False, False]], + }, + { + "test": [[False, False, False], [False, False, False]], + }, + { + "test": [[True, True, True], [False, False, False]], + }, + ] + + traj_1 = MoleculeOptimizeTrajectory(species, coords, charge, spin, site_properties=props_1) + traj_2 = MoleculeOptimizeTrajectory(species, coords, charge, spin, site_properties=props_2) + traj_3 = MoleculeOptimizeTrajectory(species, coords, charge, spin, site_properties=props_3) + traj_4 = MoleculeOptimizeTrajectory(species, coords, charge, spin) + + # const & const (both constant and the same site properties) + traj_combined = copy.deepcopy(traj_1) + traj_combined.extend(traj_1) + expected_site_props = props_1 + assert traj_combined.site_properties == expected_site_props + + # const & const (both constant but different site properties) + traj_combined = copy.deepcopy(traj_1) + traj_combined.extend(traj_2) + expected_site_props = [props_1] * num_frames + [props_2] * num_frames + assert traj_combined.site_properties == expected_site_props + + # const & changing + traj_combined = copy.deepcopy(traj_1) + traj_combined.extend(traj_3) + expected_site_props = [props_1] * num_frames + props_3 + assert traj_combined.site_properties == expected_site_props + + # const & none + traj_combined = copy.deepcopy(traj_1) + traj_combined.extend(traj_4) + expected_site_props = [props_1] * num_frames + [None] * num_frames + assert traj_combined.site_properties == expected_site_props + + # changing & const + traj_combined = copy.deepcopy(traj_3) + traj_combined.extend(traj_1) + expected_site_props = props_3 + [props_1] * num_frames + assert traj_combined.site_properties == expected_site_props + + # changing & changing + traj_combined = copy.deepcopy(traj_3) + traj_combined.extend(traj_3) + expected_site_props = props_3 + props_3 + assert traj_combined.site_properties == expected_site_props + + # changing & none + traj_combined = copy.deepcopy(traj_3) + traj_combined.extend(traj_4) + expected_site_props = props_3 + [None] * num_frames + assert traj_combined.site_properties == expected_site_props + + # none & const + traj_combined = copy.deepcopy(traj_4) + traj_combined.extend(traj_1) + expected_site_props = [None] * num_frames + [props_1] * num_frames + assert traj_combined.site_properties == expected_site_props + + # none & changing + traj_combined = copy.deepcopy(traj_4) + traj_combined.extend(traj_3) + expected_site_props = [None] * num_frames + props_3 + assert traj_combined.site_properties == expected_site_props + + # none & none + traj_combined = copy.deepcopy(traj_4) + traj_combined.extend(traj_4) + expected_site_props = None + assert traj_combined.site_properties == expected_site_props + + def test_extend_frame_props(self): + lattice, species, frac_coords = self._get_lattice_species_and_coords() + + energy_1 = [-3, -3.9, -4.1] + energy_2 = [-4.2, -4.25, -4.3] + pressure_2 = [2, 2.5, 2.5] + + # energy only properties + props_1 = [{"energy": e} for e in energy_1] + traj_1 = Trajectory(lattice, species, frac_coords, frame_properties=props_1) + + # energy and pressure properties + props_2 = [{"energy": e, "pressure": p} for e, p in zip(energy_2, pressure_2)] + traj_2 = Trajectory(lattice, species, frac_coords, frame_properties=props_2) + + # no properties + traj_3 = Trajectory(lattice, species, frac_coords, frame_properties=None) + + # test combining two with different properties + traj_combined = copy.deepcopy(traj_1) + traj_combined.extend(traj_2) + expected_props = props_1 + props_2 + assert traj_combined.frame_properties == expected_props + + # test combining two where one has properties and the other does not + traj_combined = copy.deepcopy(traj_1) + traj_combined.extend(traj_3) + expected_props = props_1 + [None] * len(frac_coords) + assert traj_combined.frame_properties == expected_props + + # test combining two both of which have no properties + traj_combined = copy.deepcopy(traj_3) + traj_combined.extend(traj_3) + assert traj_combined.frame_properties is None + + def test_length(self): + assert len(self.traj) == len(self.structures) + + def test_displacements(self): + poscar = Poscar.from_file(os.path.join(PymatgenTest.TEST_FILES_DIR, "POSCAR")) + structures = [poscar.structure] + displacements = np.zeros((11, *np.shape(structures[-1].frac_coords))) + + for i in range(10): + displacement = np.random.random_sample(np.shape(structures[-1].frac_coords)) / 20 + new_coords = displacement + structures[-1].frac_coords + structures.append(Structure(structures[-1].lattice, structures[-1].species, new_coords)) + displacements[i + 1, :, :] = displacement + + traj = Trajectory.from_structures(structures, constant_lattice=True) + traj.to_displacements() + + assert np.allclose(traj.frac_coords, displacements) + + def test_variable_lattice(self): + structure = self.structures[0] + + # Generate structures with different lattices + structures = [] + for _ in range(10): + new_lattice = np.dot(structure.lattice.matrix, np.diag(1 + np.random.random_sample(3) / 20)) + temp_struct = structure.copy() + temp_struct.lattice = Lattice(new_lattice) + structures.append(temp_struct) + + traj = Trajectory.from_structures(structures, constant_lattice=False) + + # Check if lattices were properly stored + assert all(np.allclose(struct.lattice.matrix, structures[i].lattice.matrix) for i, struct in enumerate(traj)) + + # Check if the file is written correctly when lattice is not constant. + traj.write_Xdatcar(filename="traj_test_XDATCAR") + + # Load trajectory from written xdatcar and compare to original + written_traj = Trajectory.from_file("traj_test_XDATCAR", constant_lattice=False) + self._check_traj_equality(traj, written_traj) + os.remove("traj_test_XDATCAR") + + def test_to_from_dict(self): + d = self.traj.as_dict() + traj = Trajectory.from_dict(d) + assert isinstance(traj, Trajectory) + + def test_xdatcar_write(self): + self.traj.write_Xdatcar(filename="traj_test_XDATCAR") + + # Load trajectory from written xdatcar and compare to original + written_traj = Trajectory.from_file("traj_test_XDATCAR") + self._check_traj_equality(self.traj, written_traj) + os.remove("traj_test_XDATCAR") + + if __name__ == "__main__": import unittest diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 439d20d4c39..92160d2c3f9 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -613,6 +613,8 @@ def __init__( self, species: list[str | Element | Species | DummySpecies | Composition], coords: list[list[Vector3D]] | np.ndarray, + charge: int, + spin_multiplicity: int, *, site_properties: SitePropsType | None = None, frame_properties: list[dict] | None = None, @@ -804,6 +806,8 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | MoleculeOpt return Molecule( self.species, self.coords[frames], + charge=self.charge, + spin_multiplicity=self.spin_multiplicity, site_properties=self._get_site_props(frames), # type: ignore ) From df29aa6d7c0c45f04251a8a26482c708aedac591 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Fri, 3 Feb 2023 13:09:53 -0800 Subject: [PATCH 05/26] Beginning to add entry classes to incorporate into Emmet --- pymatgen/core/tests/test_trajectory.py | 54 +----- pymatgen/core/trajectory.py | 16 +- pymatgen/entries/mol_entry.py | 246 +++++++++++++++++++++++++ 3 files changed, 258 insertions(+), 58 deletions(-) create mode 100644 pymatgen/entries/mol_entry.py diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index f7fe024cf0c..52f813e8d6e 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -432,7 +432,7 @@ def test_extend(self): # Case of compatible trajectories compatible_traj = MoleculeOptimizeTrajectory( - ['C', 'C', 'O', 'C', 'O', 'O', 'O', 'Li', 'H', 'H', 'H', 'H', 'H', 'Li'], + traj.species, [[-1.46958173, -0.47370158, -0.03391061], [-0.79757102, 0.48588802, 0.94508206], [0.50256405, 0.8947604 , 0.47698504], @@ -597,58 +597,10 @@ def test_extend_frame_props(self): def test_length(self): assert len(self.traj) == len(self.structures) - def test_displacements(self): - poscar = Poscar.from_file(os.path.join(PymatgenTest.TEST_FILES_DIR, "POSCAR")) - structures = [poscar.structure] - displacements = np.zeros((11, *np.shape(structures[-1].frac_coords))) - - for i in range(10): - displacement = np.random.random_sample(np.shape(structures[-1].frac_coords)) / 20 - new_coords = displacement + structures[-1].frac_coords - structures.append(Structure(structures[-1].lattice, structures[-1].species, new_coords)) - displacements[i + 1, :, :] = displacement - - traj = Trajectory.from_structures(structures, constant_lattice=True) - traj.to_displacements() - - assert np.allclose(traj.frac_coords, displacements) - - def test_variable_lattice(self): - structure = self.structures[0] - - # Generate structures with different lattices - structures = [] - for _ in range(10): - new_lattice = np.dot(structure.lattice.matrix, np.diag(1 + np.random.random_sample(3) / 20)) - temp_struct = structure.copy() - temp_struct.lattice = Lattice(new_lattice) - structures.append(temp_struct) - - traj = Trajectory.from_structures(structures, constant_lattice=False) - - # Check if lattices were properly stored - assert all(np.allclose(struct.lattice.matrix, structures[i].lattice.matrix) for i, struct in enumerate(traj)) - - # Check if the file is written correctly when lattice is not constant. - traj.write_Xdatcar(filename="traj_test_XDATCAR") - - # Load trajectory from written xdatcar and compare to original - written_traj = Trajectory.from_file("traj_test_XDATCAR", constant_lattice=False) - self._check_traj_equality(traj, written_traj) - os.remove("traj_test_XDATCAR") - def test_to_from_dict(self): d = self.traj.as_dict() - traj = Trajectory.from_dict(d) - assert isinstance(traj, Trajectory) - - def test_xdatcar_write(self): - self.traj.write_Xdatcar(filename="traj_test_XDATCAR") - - # Load trajectory from written xdatcar and compare to original - written_traj = Trajectory.from_file("traj_test_XDATCAR") - self._check_traj_equality(self.traj, written_traj) - os.remove("traj_test_XDATCAR") + traj = MoleculeOptimizeTrajectory.from_dict(d) + assert isinstance(traj, MoleculeOptimizeTrajectory) if __name__ == "__main__": diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 92160d2c3f9..298e1d77e9f 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -677,6 +677,8 @@ def __init__( self.species = species self.coords = np.asarray(coords) + self.charge = charge + self.spin_multiplicity = spin_multiplicity self.time_step = time_step self._check_site_props(site_properties) @@ -704,9 +706,9 @@ def to_positions(self): This is the opposite operation of `to_displacements()`. """ if self.coords_are_displacement: - cumulative_displacements = np.cumsum(self.frac_coords, axis=0) + cumulative_displacements = np.cumsum(self.coords, axis=0) positions = self.base_positions + cumulative_displacements - self.frac_coords = positions + self.coords = positions self.coords_are_displacement = False def to_displacements(self): @@ -717,12 +719,12 @@ def to_displacements(self): """ if not self.coords_are_displacement: displacements = np.subtract( - self.frac_coords, - np.roll(self.frac_coords, 1, axis=0), + self.coords, + np.roll(self.coords, 1, axis=0), ) - displacements[0] = np.zeros(np.shape(self.frac_coords[0])) + displacements[0] = np.zeros(np.shape(self.coords[0])) - self.frac_coords = displacements + self.coords = displacements self.coords_are_displacement = True def extend(self, trajectory: MoleculeOptimizeTrajectory): @@ -954,7 +956,7 @@ def _check_site_props(self, site_props: SitePropsType | None): self ), f"Size of the site properties {len(site_props)} does not equal to the number of frames {len(self)}." - num_sites = len(self.frac_coords[0]) + num_sites = len(self.coords[0]) for d in site_props: for k, v in d.items(): assert len(v) == num_sites, ( diff --git a/pymatgen/entries/mol_entry.py b/pymatgen/entries/mol_entry.py new file mode 100644 index 00000000000..33d8b0cc8bc --- /dev/null +++ b/pymatgen/entries/mol_entry.py @@ -0,0 +1,246 @@ +from pymatgen.core.structure import Molecule +from pymatgen.analysis.graphs import MoleculeGraph +from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender + + +class MoleculeEntry: + """ + A molecule entry class to provide easy access to Molecule properties. + + Args: + molecule: Molecule of interest. + energy: Electronic energy of the molecule in Hartree. + enthalpy: Enthalpy of the molecule (kcal/mol). Defaults to None. + entropy: Entropy of the molecule (cal/mol.K). Defaults to None. + entry_id: An optional id to uniquely identify the entry. + mol_graph: MoleculeGraph of the molecule. + """ + + def __init__( + self, + molecule, + energy, + enthalpy, + entropy, + entry_id, + mol_graph, + partial_charges_resp, + partial_charges_mulliken, + partial_charges_nbo, + electron_affinity, + ionization_energy, + spin_multiplicity, + partial_spins_nbo + ): + self.energy = energy + self.enthalpy = enthalpy + self.entropy = entropy + self.electron_affinity = electron_affinity + self.ionization_energy = ionization_energy + self.spin_multiplicity = spin_multiplicity + + self.ind = None + self.entry_id = entry_id + + self.star_hashes = {} + self.fragment_data = [] + + + if not mol_graph: + mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN()) + self.mol_graph = metal_edge_extender(mol_graph) + else: + self.mol_graph = mol_graph + + self.partial_charges_resp = partial_charges_resp + self.partial_charges_mulliken = partial_charges_mulliken + self.partial_charges_nbo = partial_charges_nbo + self.partial_spins_nbo = partial_spins_nbo + + self.molecule = self.mol_graph.molecule + self.graph = self.mol_graph.graph.to_undirected() + self.species = [str(s) for s in self.molecule.species] + + self.m_inds = [ + i for i, x in enumerate(self.species) if x in metals + ] + + # penalty gets used in the non local part of species filtering. + # certain species filters will increase penalty rather than explicitly filtering + # out a molecule. The non local filtering step prioritizes mols with a lower + # penalty. + self.penalty = 0 + self.covalent_graph = copy.deepcopy(self.graph) + self.covalent_graph.remove_nodes_from(self.m_inds) + + + self.formula = self.molecule.composition.alphabetical_formula + self.charge = self.molecule.charge + self.num_atoms = len(self.molecule) + + self.atom_locations = [ + site.coords for site in self.molecule] + + + self.free_energy = self.get_free_energy() + + self.non_metal_atoms = [ + i for i in range(self.num_atoms) + if self.species[i] not in metals] + + + + + @classmethod + def from_dataset_entry( + cls, + doc: Dict, + use_thermo: str = "raw", + ): + """ + Initialize a MoleculeEntry from a document in the LIBE (Lithium-Ion + Battery Electrolyte) or MADEIRA (MAgnesium Dataset of Electrolyte and + Interphase ReAgents) datasets. + + Args: + doc: Dictionary representing an entry from LIBE or MADEIRA + use_thermo: One of "raw" (meaning raw, uncorrected thermo data will + be used), "rrho_shifted" (meaning that a slightly modified + Rigid-Rotor Harmonic Oscillator approximation will be used - + see Ribiero et al., J. Phys. Chem. B 2011, 115, 14556-14562), or + "qrrho" (meaning that Grimme's Quasi-Rigid Rotor Harmonic + Oscillator - see Grimme, Chem. Eur. J. 2012, 18, 9955-9964) will + be used. + """ + + thermo = use_thermo.lower() + + if thermo not in ["raw", "rrho_shifted", "qrrho"]: + raise ValueError( + "Only allowed values for use_thermo are 'raw', 'rrho_shifted', " + "and 'qrrho'!" + ) + try: + if isinstance(doc["molecule"], Molecule): + molecule = doc["molecule"] + else: + molecule = Molecule.from_dict(doc["molecule"]) # type: ignore + + if ( + thermo == "rrho_shifted" + and doc["thermo"]["shifted_rrho_eV"] is not None + ): + energy = ( + doc["thermo"]["shifted_rrho_eV"]["electronic_energy"] * 0.0367493 + ) + enthalpy = doc["thermo"]["shifted_rrho_eV"]["total_enthalpy"] * 23.061 + entropy = doc["thermo"]["shifted_rrho_eV"]["total_entropy"] * 23061 + elif thermo == "qrrho" and doc["thermo"]["quasi_rrho_eV"] is not None: + energy = doc["thermo"]["quasi_rrho_eV"]["electronic_energy"] * 0.0367493 + enthalpy = doc["thermo"]["quasi_rrho_eV"]["total_enthalpy"] * 23.061 + entropy = doc["thermo"]["quasi_rrho_eV"]["total_entropy"] * 23061 + else: + energy = doc["thermo"]["raw"]["electronic_energy_Ha"] + enthalpy = doc["thermo"]["raw"]["total_enthalpy_kcal/mol"] + entropy = doc["thermo"]["raw"]["total_entropy_cal/molK"] + + entry_id = doc["molecule_id"] + + if isinstance(doc["molecule_graph"], MoleculeGraph): + mol_graph = doc["molecule_graph"] + else: + mol_graph = MoleculeGraph.from_dict(doc["molecule_graph"]) + + partial_charges_resp = doc['partial_charges']['resp'] + partial_charges_mulliken = doc['partial_charges']['mulliken'] + spin_multiplicity = doc['spin_multiplicity'] + + + if doc['number_atoms'] == 1: + partial_charges_nbo = doc['partial_charges']['mulliken'] + partial_spins_nbo = doc['partial_spins']['mulliken'] + else: + partial_charges_nbo = doc['partial_charges']['nbo'] + partial_spins_nbo = doc['partial_spins']['nbo'] + + electron_affinity_eV = None + ionization_energy_eV = None + if 'redox' in doc: + if 'electron_affinity_eV' in doc['redox']: + electron_affinity_eV = doc['redox']['electron_affinity_eV'] + + if 'ionization_energy_eV' in doc['redox']: + ionization_energy_eV = doc['redox']['ionization_energy_eV'] + + except KeyError as e: + raise Exception( + "Unable to construct molecule entry from molecule document; missing " + f"attribute {e} in `doc`." + ) + + + + return cls( + molecule=molecule, + energy=energy, + enthalpy=enthalpy, + entropy=entropy, + entry_id=entry_id, + mol_graph=mol_graph, + partial_charges_resp=partial_charges_resp, + partial_charges_mulliken=partial_charges_mulliken, + partial_charges_nbo=partial_charges_nbo, + electron_affinity=electron_affinity_eV, + ionization_energy=ionization_energy_eV, + spin_multiplicity=spin_multiplicity, + partial_spins_nbo=partial_spins_nbo + ) + + + + def get_free_energy(self, temperature: float = ROOM_TEMP) -> Optional[float]: + """ + Get the free energy at the give temperature. + """ + if self.enthalpy is not None and self.entropy is not None: + # TODO: fix these hard coded vals + return ( + self.energy * 27.21139 + + 0.0433641 * self.enthalpy + - temperature * self.entropy * 0.0000433641 + ) + else: + return None + + def __repr__(self): + + output = [ + f"MoleculeEntry {self.entry_id} - {self.formula}", + f"Total charge = {self.charge}", + ] + + energies = [ + ("Energy", "Hartree", self.energy), + ("Enthalpy", "kcal/mol", self.enthalpy), + ("Entropy", "cal/mol.K", self.entropy), + ("Free Energy (298.15 K)", "eV", self.get_free_energy()), + ] + for name, unit, value in energies: + if value is None: + output.append(f"{name} = {value} {unit}") + else: + output.append(f"{name} = {value:.4f} {unit}") + + if self.ind: + output.append("index: {}".format(self.ind)) + + return "\n".join(output) + + def __str__(self): + return self.__repr__() + + def __eq__(self, other): + if type(self) == type(other): + return str(self) == str(other) + else: + return False From 9baa12ea3a091c09c9dfed9e1963fbb77b5719ae Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Fri, 3 Feb 2023 13:57:24 -0800 Subject: [PATCH 06/26] More test fixes for molecule trajectory --- pymatgen/core/tests/test_trajectory.py | 52 +++++++-------- pymatgen/core/trajectory.py | 2 + pymatgen/entries/mol_entry.py | 87 +++++++++++++------------- 3 files changed, 68 insertions(+), 73 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index 52f813e8d6e..7b276d8caeb 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -418,7 +418,7 @@ def test_frame_properties(self): props = [{"SCF_energy_in_the_final_basis_set": e} for e in [-113.3256885788, -113.3260019471, -113.326006415]] - traj = MoleculeOptimizeTrajectory(species, coords, frame_properties=props) + traj = MoleculeOptimizeTrajectory(species, coords, charge, spin, frame_properties=props) # compare the overall site properties assert traj.frame_properties == props @@ -433,20 +433,20 @@ def test_extend(self): # Case of compatible trajectories compatible_traj = MoleculeOptimizeTrajectory( traj.species, - [[-1.46958173, -0.47370158, -0.03391061], - [-0.79757102, 0.48588802, 0.94508206], - [0.50256405, 0.8947604 , 0.47698504], - [1.56101382, 0.13356272, 0.79931048], - [1.43897567, -0.8642765 , 1.56363034], - [2.66882238, 0.48431336, 0.30635727], - [-2.72606146, -0.81552889, 0.39696593], - [3.307822 , -1.01132269, 1.26654957], - [-0.81092724, -1.35590014, -0.1458541], - [-1.48634516, 0.02121279, -1.02465009], - [-0.71212347, 0.03008471, 1.93272477], - [-1.37888759, 1.40819443, 1.02143913], - [-4.79241099, 0.80275103, -0.39852432], - [-4.28509927, -1.03484764, 0.86348452]], + [[[-1.46958173, -0.47370158, -0.03391061], + [-0.79757102, 0.48588802, 0.94508206], + [0.50256405, 0.8947604 , 0.47698504], + [1.56101382, 0.13356272, 0.79931048], + [1.43897567, -0.8642765 , 1.56363034], + [2.66882238, 0.48431336, 0.30635727], + [-2.72606146, -0.81552889, 0.39696593], + [3.307822 , -1.01132269, 1.26654957], + [-0.81092724, -1.35590014, -0.1458541], + [-1.48634516, 0.02121279, -1.02465009], + [-0.71212347, 0.03008471, 1.93272477], + [-1.37888759, 1.40819443, 1.02143913], + [-4.79241099, 0.80275103, -0.39852432], + [-4.28509927, -1.03484764, 0.86348452]]], 0, 2 ) @@ -473,15 +473,11 @@ def test_extend_site_props(self): num_frames = len(coords) props_1 = { - { "test": [[True, True, True], [False, False, False]], - }, - } + } props_2 = { - { "test": [[False, False, False], [False, False, False]], - }, - } + } props_3 = [ { "test": [[True, True, True], [False, False, False]], @@ -560,22 +556,22 @@ def test_extend_site_props(self): assert traj_combined.site_properties == expected_site_props def test_extend_frame_props(self): - lattice, species, frac_coords = self._get_lattice_species_and_coords() + species, coords, charge, spin = self._get_species_and_coords() energy_1 = [-3, -3.9, -4.1] energy_2 = [-4.2, -4.25, -4.3] - pressure_2 = [2, 2.5, 2.5] + enthalpy_2 = [1.0, 1.25, 1.3] # energy only properties props_1 = [{"energy": e} for e in energy_1] - traj_1 = Trajectory(lattice, species, frac_coords, frame_properties=props_1) + traj_1 = MoleculeOptimizeTrajectory(species, coords, charge, spin, frame_properties=props_1) # energy and pressure properties - props_2 = [{"energy": e, "pressure": p} for e, p in zip(energy_2, pressure_2)] - traj_2 = Trajectory(lattice, species, frac_coords, frame_properties=props_2) + props_2 = [{"energy": e, "pressure": p} for e, p in zip(energy_2, enthalpy_2)] + traj_2 = MoleculeOptimizeTrajectory(species, coords, charge, spin, frame_properties=props_2) # no properties - traj_3 = Trajectory(lattice, species, frac_coords, frame_properties=None) + traj_3 = MoleculeOptimizeTrajectory(species, coords, charge, spin, frame_properties=None) # test combining two with different properties traj_combined = copy.deepcopy(traj_1) @@ -586,7 +582,7 @@ def test_extend_frame_props(self): # test combining two where one has properties and the other does not traj_combined = copy.deepcopy(traj_1) traj_combined.extend(traj_3) - expected_props = props_1 + [None] * len(frac_coords) + expected_props = props_1 + [None] * len(coords) assert traj_combined.frame_properties == expected_props # test combining two both of which have no properties diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 298e1d77e9f..6f949982805 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -836,6 +836,8 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | MoleculeOpt return MoleculeOptimizeTrajectory( self.species, coords, + self.charge, + self.spin_multiplicity, site_properties=self._get_site_props(selected), frame_properties=frame_properties, time_step=self.time_step, diff --git a/pymatgen/entries/mol_entry.py b/pymatgen/entries/mol_entry.py index 33d8b0cc8bc..9c0859da49c 100644 --- a/pymatgen/entries/mol_entry.py +++ b/pymatgen/entries/mol_entry.py @@ -1,3 +1,7 @@ +from typing import Any, Dict, Optional + +import numpy as np + from pymatgen.core.structure import Molecule from pymatgen.analysis.graphs import MoleculeGraph from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender @@ -5,74 +9,67 @@ class MoleculeEntry: """ - A molecule entry class to provide easy access to Molecule properties. + A class to provide easy access to Molecule properties. Args: - molecule: Molecule of interest. - energy: Electronic energy of the molecule in Hartree. - enthalpy: Enthalpy of the molecule (kcal/mol). Defaults to None. - entropy: Entropy of the molecule (cal/mol.K). Defaults to None. - entry_id: An optional id to uniquely identify the entry. - mol_graph: MoleculeGraph of the molecule. + molecule: pymatgen Molecule object + energy: Electronic energy (units: Hartree) + entry_id: Unique identifier for this MoleculeEntry (default: None) + enthalpy: Enthalpy (units: kcal/mol) (default: None) + entropy: Entropy (units: cal/mol-K) (default: None) + mol_graph: graph representation of the Molecule (default: None) + data: Other data associated with this entry (default: None) """ def __init__( self, - molecule, - energy, - enthalpy, - entropy, - entry_id, - mol_graph, - partial_charges_resp, - partial_charges_mulliken, - partial_charges_nbo, - electron_affinity, - ionization_energy, - spin_multiplicity, - partial_spins_nbo + molecule: Molecule, + energy: float, + entry_id: object | None = None, + enthalpy: Optional[float] = None, + entropy: Optional[float] = None, + mol_graph: Optional[MoleculeGraph] = None, + data: Optional[Dict[str, Any]] = None ): + self.molecule = molecule + self.energy = energy self.enthalpy = enthalpy self.entropy = entropy - self.electron_affinity = electron_affinity - self.ionization_energy = ionization_energy - self.spin_multiplicity = spin_multiplicity - self.ind = None self.entry_id = entry_id - self.star_hashes = {} - self.fragment_data = [] - - if not mol_graph: mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN()) self.mol_graph = metal_edge_extender(mol_graph) else: self.mol_graph = mol_graph - self.partial_charges_resp = partial_charges_resp - self.partial_charges_mulliken = partial_charges_mulliken - self.partial_charges_nbo = partial_charges_nbo - self.partial_spins_nbo = partial_spins_nbo + if data is None: + self.data = dict() + else: + self.data = data - self.molecule = self.mol_graph.molecule - self.graph = self.mol_graph.graph.to_undirected() - self.species = [str(s) for s in self.molecule.species] - self.m_inds = [ - i for i, x in enumerate(self.species) if x in metals - ] + @property + def free_energy(): + pass + + @property + def graph(self): + return self.mol_graph.graph.to_undirected() + + @property + def formula(self): + return self.molecule.composition.alphabetical_formula - # penalty gets used in the non local part of species filtering. - # certain species filters will increase penalty rather than explicitly filtering - # out a molecule. The non local filtering step prioritizes mols with a lower - # penalty. - self.penalty = 0 - self.covalent_graph = copy.deepcopy(self.graph) - self.covalent_graph.remove_nodes_from(self.m_inds) + @property + def charge(self): + return self.molecule.charge + @property + def spin_multiplicity(self): + return self.molecule.spin_multiplicity self.formula = self.molecule.composition.alphabetical_formula self.charge = self.molecule.charge From f603a206eb6a9fdd98ba4bdea8424f0e3d0d58b0 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Fri, 3 Feb 2023 14:12:11 -0800 Subject: [PATCH 07/26] molecule trajectory works --- pymatgen/core/tests/test_trajectory.py | 4 ++-- pymatgen/core/trajectory.py | 4 ++++ pymatgen/entries/mol_entry.py | 19 +++---------------- 3 files changed, 9 insertions(+), 18 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index 7b276d8caeb..f449a29d523 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -367,7 +367,7 @@ def test_slice(self): raise AssertionError sliced_traj = self.traj[:-2] - sliced_traj_from_mols = MoleculeOptimizeTrajectory.from_molecules(self.structures[:-2]) + sliced_traj_from_mols = MoleculeOptimizeTrajectory.from_molecules(self.molecules[:-2]) if len(sliced_traj) == len(sliced_traj_from_mols): assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) @@ -591,7 +591,7 @@ def test_extend_frame_props(self): assert traj_combined.frame_properties is None def test_length(self): - assert len(self.traj) == len(self.structures) + assert len(self.traj) == len(self.molecules) def test_to_from_dict(self): d = self.traj.as_dict() diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 6f949982805..e73c355f879 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -857,6 +857,8 @@ def as_dict(self) -> dict: "@class": type(self).__name__, "species": self.species, "coords": self.coords.tolist(), + "charge": self.charge, + "spin_multiplicity": self.spin_multiplicity, "site_properties": self.site_properties, "frame_properties": self.frame_properties, "time_step": self.time_step, @@ -889,6 +891,8 @@ def from_molecules( return cls( species, # type: ignore coords, + molecules[0].charge, + molecules[0].spin_multiplicity, site_properties=site_properties, # type: ignore **kwargs, ) diff --git a/pymatgen/entries/mol_entry.py b/pymatgen/entries/mol_entry.py index 9c0859da49c..cdfecd6c118 100644 --- a/pymatgen/entries/mol_entry.py +++ b/pymatgen/entries/mol_entry.py @@ -71,22 +71,9 @@ def charge(self): def spin_multiplicity(self): return self.molecule.spin_multiplicity - self.formula = self.molecule.composition.alphabetical_formula - self.charge = self.molecule.charge - self.num_atoms = len(self.molecule) - - self.atom_locations = [ - site.coords for site in self.molecule] - - - self.free_energy = self.get_free_energy() - - self.non_metal_atoms = [ - i for i in range(self.num_atoms) - if self.species[i] not in metals] - - - + @property + def num_atoms(self): + return len(self.molecule) @classmethod def from_dataset_entry( From 3ef2f85d463efe449963b32e3fa8077b53d842db Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Sat, 18 Mar 2023 10:27:26 -0700 Subject: [PATCH 08/26] Small addition --- pymatgen/entries/mol_entry.py | 139 +++++++++++++++++++++------------- 1 file changed, 88 insertions(+), 51 deletions(-) diff --git a/pymatgen/entries/mol_entry.py b/pymatgen/entries/mol_entry.py index cdfecd6c118..e50cebcb115 100644 --- a/pymatgen/entries/mol_entry.py +++ b/pymatgen/entries/mol_entry.py @@ -6,8 +6,18 @@ from pymatgen.analysis.graphs import MoleculeGraph from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender +from pymatgen.entries import Entry -class MoleculeEntry: + +__author__ = "Evan Spotte-Smith, Samuel Blau, Daniel Barter" +__copyright__ = "Copyright 2023, The Materials Project" +__version__ = "0.1" +__maintainer__ = "Evan Spotte-Smith" +__email__ = "espottesmith@gmail.com" +__date__ = "02/04/2023" + + +class MoleculeEntry(Entry): """ A class to provide easy access to Molecule properties. @@ -33,7 +43,11 @@ def __init__( ): self.molecule = molecule - self.energy = energy + super().__init__( + self.molecule.composition, + self._energy + ) + self.enthalpy = enthalpy self.entropy = entropy @@ -50,11 +64,6 @@ def __init__( else: self.data = data - - @property - def free_energy(): - pass - @property def graph(self): return self.mol_graph.graph.to_undirected() @@ -75,16 +84,38 @@ def spin_multiplicity(self): def num_atoms(self): return len(self.molecule) + @property + def energy(self): + return self._energy + + def free_energy(self, temperature: float = 298.15) -> float: + """ + Gibbs free energy of the molecule (in eV) at a given temperature. + + Args: + temperature (float): Temperature in Kelvin. Default is 298.15, room temperature + """ + + if self.enthalpy is None or self.entropy is None: + raise ValueError("Gibbs free energy requires enthalpy and entropy to be known!") + + u = self.energy * 27.2114 + h = self.enthalpy * 0.043363 + s = self.entropy * 0.000043363 + + return u + h - temperature * s + @classmethod - def from_dataset_entry( + def from_libe_madeira( cls, doc: Dict, use_thermo: str = "raw", ): """ Initialize a MoleculeEntry from a document in the LIBE (Lithium-Ion - Battery Electrolyte) or MADEIRA (MAgnesium Dataset of Electrolyte and - Interphase ReAgents) datasets. + Battery Electrolyte; Spotte-Smith*, Blau*, et al., Sci. Data 8(203), 2021 + ) or MADEIRA (MAgnesium Dataset of Electrolyte and + Interphase ReAgents; manuscript in submission) datasets. Args: doc: Dictionary representing an entry from LIBE or MADEIRA @@ -135,26 +166,28 @@ def from_dataset_entry( else: mol_graph = MoleculeGraph.from_dict(doc["molecule_graph"]) - partial_charges_resp = doc['partial_charges']['resp'] - partial_charges_mulliken = doc['partial_charges']['mulliken'] - spin_multiplicity = doc['spin_multiplicity'] - + data = dict() + + partial_charges_resp = doc.get('partial_charges', dict()).get('resp') + if partial_charges_resp: + data["partial_charges_resp"] = partial_charges_resp + partial_charges_mulliken = doc.get('partial_charges', dict()).get('mulliken') + if partial_charges_mulliken: + data["partial_charges_mulliken"] = partial_charges_mulliken + + partial_charges_nbo = doc.get('partial_charges', dict()).get('nbo') + if partial_charges_nbo: + data["partial_charges_nbo"] = partial_charges_nbo + partial_spins_nbo = doc.get('partial_spins', dict()).get('nbo') + if partial_spins_nbo: + data["partial_spins_nbo"] = partial_spins_nbo - if doc['number_atoms'] == 1: - partial_charges_nbo = doc['partial_charges']['mulliken'] - partial_spins_nbo = doc['partial_spins']['mulliken'] - else: - partial_charges_nbo = doc['partial_charges']['nbo'] - partial_spins_nbo = doc['partial_spins']['nbo'] - - electron_affinity_eV = None - ionization_energy_eV = None if 'redox' in doc: if 'electron_affinity_eV' in doc['redox']: - electron_affinity_eV = doc['redox']['electron_affinity_eV'] + data["electron_affinity_eV"] = doc['redox']['electron_affinity_eV'] if 'ionization_energy_eV' in doc['redox']: - ionization_energy_eV = doc['redox']['ionization_energy_eV'] + data["ionization_energy_eV"] = doc['redox']['ionization_energy_eV'] except KeyError as e: raise Exception( @@ -162,52 +195,47 @@ def from_dataset_entry( f"attribute {e} in `doc`." ) - - return cls( molecule=molecule, energy=energy, + entry_id=entry_id, enthalpy=enthalpy, entropy=entropy, - entry_id=entry_id, mol_graph=mol_graph, - partial_charges_resp=partial_charges_resp, - partial_charges_mulliken=partial_charges_mulliken, - partial_charges_nbo=partial_charges_nbo, - electron_affinity=electron_affinity_eV, - ionization_energy=ionization_energy_eV, - spin_multiplicity=spin_multiplicity, - partial_spins_nbo=partial_spins_nbo + data=data ) - - - def get_free_energy(self, temperature: float = ROOM_TEMP) -> Optional[float]: + @classmethod + def from_mpcule_summary_doc(d: Dict[str, Any], + solvent: str): """ - Get the free energy at the give temperature. + Initialize a MoleculeEntry from a summary document in the Materials Project + molecules (MPcules) database. + + """ - if self.enthalpy is not None and self.entropy is not None: - # TODO: fix these hard coded vals - return ( - self.energy * 27.21139 - + 0.0433641 * self.enthalpy - - temperature * self.entropy * 0.0000433641 - ) - else: - return None + + @classmethod + def from_dict(d: Dict[str, Any]): + pass + + @classmethod + def as_dict(): + pass def __repr__(self): output = [ f"MoleculeEntry {self.entry_id} - {self.formula}", - f"Total charge = {self.charge}", + f"Charge = {self.charge}", + f"Spin = {self.spin_multiplicity}" ] energies = [ ("Energy", "Hartree", self.energy), ("Enthalpy", "kcal/mol", self.enthalpy), ("Entropy", "cal/mol.K", self.entropy), - ("Free Energy (298.15 K)", "eV", self.get_free_energy()), + ("Free Energy (298.15 K)", "eV", self.free_energy()), ] for name, unit, value in energies: if value is None: @@ -216,7 +244,12 @@ def __repr__(self): output.append(f"{name} = {value:.4f} {unit}") if self.ind: - output.append("index: {}".format(self.ind)) + output.append("Index: {}".format(self.ind)) + + if len(self.data) > 0: + output.append("Additional Data:") + for k, v in self.data.items(): + output.append(f"\t{k} = {v}") return "\n".join(output) @@ -228,3 +261,7 @@ def __eq__(self, other): return str(self) == str(other) else: return False + + +class MoleculeCalculationEntry(Entry): + pass From e2bc08b7f9ae664e7b7308af70248b996a6c3ef1 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 19 Apr 2023 07:33:44 -0700 Subject: [PATCH 09/26] Add tests for graph hashing --- pymatgen/entries/mol_entry.py | 267 ---------------------- pymatgen/util/tests/test_graph_hashing.py | 88 +++++++ 2 files changed, 88 insertions(+), 267 deletions(-) delete mode 100644 pymatgen/entries/mol_entry.py create mode 100644 pymatgen/util/tests/test_graph_hashing.py diff --git a/pymatgen/entries/mol_entry.py b/pymatgen/entries/mol_entry.py deleted file mode 100644 index e50cebcb115..00000000000 --- a/pymatgen/entries/mol_entry.py +++ /dev/null @@ -1,267 +0,0 @@ -from typing import Any, Dict, Optional - -import numpy as np - -from pymatgen.core.structure import Molecule -from pymatgen.analysis.graphs import MoleculeGraph -from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender - -from pymatgen.entries import Entry - - -__author__ = "Evan Spotte-Smith, Samuel Blau, Daniel Barter" -__copyright__ = "Copyright 2023, The Materials Project" -__version__ = "0.1" -__maintainer__ = "Evan Spotte-Smith" -__email__ = "espottesmith@gmail.com" -__date__ = "02/04/2023" - - -class MoleculeEntry(Entry): - """ - A class to provide easy access to Molecule properties. - - Args: - molecule: pymatgen Molecule object - energy: Electronic energy (units: Hartree) - entry_id: Unique identifier for this MoleculeEntry (default: None) - enthalpy: Enthalpy (units: kcal/mol) (default: None) - entropy: Entropy (units: cal/mol-K) (default: None) - mol_graph: graph representation of the Molecule (default: None) - data: Other data associated with this entry (default: None) - """ - - def __init__( - self, - molecule: Molecule, - energy: float, - entry_id: object | None = None, - enthalpy: Optional[float] = None, - entropy: Optional[float] = None, - mol_graph: Optional[MoleculeGraph] = None, - data: Optional[Dict[str, Any]] = None - ): - self.molecule = molecule - - super().__init__( - self.molecule.composition, - self._energy - ) - - self.enthalpy = enthalpy - self.entropy = entropy - - self.entry_id = entry_id - - if not mol_graph: - mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN()) - self.mol_graph = metal_edge_extender(mol_graph) - else: - self.mol_graph = mol_graph - - if data is None: - self.data = dict() - else: - self.data = data - - @property - def graph(self): - return self.mol_graph.graph.to_undirected() - - @property - def formula(self): - return self.molecule.composition.alphabetical_formula - - @property - def charge(self): - return self.molecule.charge - - @property - def spin_multiplicity(self): - return self.molecule.spin_multiplicity - - @property - def num_atoms(self): - return len(self.molecule) - - @property - def energy(self): - return self._energy - - def free_energy(self, temperature: float = 298.15) -> float: - """ - Gibbs free energy of the molecule (in eV) at a given temperature. - - Args: - temperature (float): Temperature in Kelvin. Default is 298.15, room temperature - """ - - if self.enthalpy is None or self.entropy is None: - raise ValueError("Gibbs free energy requires enthalpy and entropy to be known!") - - u = self.energy * 27.2114 - h = self.enthalpy * 0.043363 - s = self.entropy * 0.000043363 - - return u + h - temperature * s - - @classmethod - def from_libe_madeira( - cls, - doc: Dict, - use_thermo: str = "raw", - ): - """ - Initialize a MoleculeEntry from a document in the LIBE (Lithium-Ion - Battery Electrolyte; Spotte-Smith*, Blau*, et al., Sci. Data 8(203), 2021 - ) or MADEIRA (MAgnesium Dataset of Electrolyte and - Interphase ReAgents; manuscript in submission) datasets. - - Args: - doc: Dictionary representing an entry from LIBE or MADEIRA - use_thermo: One of "raw" (meaning raw, uncorrected thermo data will - be used), "rrho_shifted" (meaning that a slightly modified - Rigid-Rotor Harmonic Oscillator approximation will be used - - see Ribiero et al., J. Phys. Chem. B 2011, 115, 14556-14562), or - "qrrho" (meaning that Grimme's Quasi-Rigid Rotor Harmonic - Oscillator - see Grimme, Chem. Eur. J. 2012, 18, 9955-9964) will - be used. - """ - - thermo = use_thermo.lower() - - if thermo not in ["raw", "rrho_shifted", "qrrho"]: - raise ValueError( - "Only allowed values for use_thermo are 'raw', 'rrho_shifted', " - "and 'qrrho'!" - ) - try: - if isinstance(doc["molecule"], Molecule): - molecule = doc["molecule"] - else: - molecule = Molecule.from_dict(doc["molecule"]) # type: ignore - - if ( - thermo == "rrho_shifted" - and doc["thermo"]["shifted_rrho_eV"] is not None - ): - energy = ( - doc["thermo"]["shifted_rrho_eV"]["electronic_energy"] * 0.0367493 - ) - enthalpy = doc["thermo"]["shifted_rrho_eV"]["total_enthalpy"] * 23.061 - entropy = doc["thermo"]["shifted_rrho_eV"]["total_entropy"] * 23061 - elif thermo == "qrrho" and doc["thermo"]["quasi_rrho_eV"] is not None: - energy = doc["thermo"]["quasi_rrho_eV"]["electronic_energy"] * 0.0367493 - enthalpy = doc["thermo"]["quasi_rrho_eV"]["total_enthalpy"] * 23.061 - entropy = doc["thermo"]["quasi_rrho_eV"]["total_entropy"] * 23061 - else: - energy = doc["thermo"]["raw"]["electronic_energy_Ha"] - enthalpy = doc["thermo"]["raw"]["total_enthalpy_kcal/mol"] - entropy = doc["thermo"]["raw"]["total_entropy_cal/molK"] - - entry_id = doc["molecule_id"] - - if isinstance(doc["molecule_graph"], MoleculeGraph): - mol_graph = doc["molecule_graph"] - else: - mol_graph = MoleculeGraph.from_dict(doc["molecule_graph"]) - - data = dict() - - partial_charges_resp = doc.get('partial_charges', dict()).get('resp') - if partial_charges_resp: - data["partial_charges_resp"] = partial_charges_resp - partial_charges_mulliken = doc.get('partial_charges', dict()).get('mulliken') - if partial_charges_mulliken: - data["partial_charges_mulliken"] = partial_charges_mulliken - - partial_charges_nbo = doc.get('partial_charges', dict()).get('nbo') - if partial_charges_nbo: - data["partial_charges_nbo"] = partial_charges_nbo - partial_spins_nbo = doc.get('partial_spins', dict()).get('nbo') - if partial_spins_nbo: - data["partial_spins_nbo"] = partial_spins_nbo - - if 'redox' in doc: - if 'electron_affinity_eV' in doc['redox']: - data["electron_affinity_eV"] = doc['redox']['electron_affinity_eV'] - - if 'ionization_energy_eV' in doc['redox']: - data["ionization_energy_eV"] = doc['redox']['ionization_energy_eV'] - - except KeyError as e: - raise Exception( - "Unable to construct molecule entry from molecule document; missing " - f"attribute {e} in `doc`." - ) - - return cls( - molecule=molecule, - energy=energy, - entry_id=entry_id, - enthalpy=enthalpy, - entropy=entropy, - mol_graph=mol_graph, - data=data - ) - - @classmethod - def from_mpcule_summary_doc(d: Dict[str, Any], - solvent: str): - """ - Initialize a MoleculeEntry from a summary document in the Materials Project - molecules (MPcules) database. - - - """ - - @classmethod - def from_dict(d: Dict[str, Any]): - pass - - @classmethod - def as_dict(): - pass - - def __repr__(self): - - output = [ - f"MoleculeEntry {self.entry_id} - {self.formula}", - f"Charge = {self.charge}", - f"Spin = {self.spin_multiplicity}" - ] - - energies = [ - ("Energy", "Hartree", self.energy), - ("Enthalpy", "kcal/mol", self.enthalpy), - ("Entropy", "cal/mol.K", self.entropy), - ("Free Energy (298.15 K)", "eV", self.free_energy()), - ] - for name, unit, value in energies: - if value is None: - output.append(f"{name} = {value} {unit}") - else: - output.append(f"{name} = {value:.4f} {unit}") - - if self.ind: - output.append("Index: {}".format(self.ind)) - - if len(self.data) > 0: - output.append("Additional Data:") - for k, v in self.data.items(): - output.append(f"\t{k} = {v}") - - return "\n".join(output) - - def __str__(self): - return self.__repr__() - - def __eq__(self, other): - if type(self) == type(other): - return str(self) == str(other) - else: - return False - - -class MoleculeCalculationEntry(Entry): - pass diff --git a/pymatgen/util/tests/test_graph_hashing.py b/pymatgen/util/tests/test_graph_hashing.py new file mode 100644 index 00000000000..8b3beac4e0d --- /dev/null +++ b/pymatgen/util/tests/test_graph_hashing.py @@ -0,0 +1,88 @@ +""" +Copyright (C) 2004-2022, NetworkX Developers +Aric Hagberg +Dan Schult +Pieter Swart +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NetworkX Developers nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + + +import networkx as nx +from emmet.core.graph_hashing import ( + weisfeiler_lehman_graph_hash, + weisfeiler_lehman_subgraph_hashes +) + + +def test_graph_hash(): + G1 = nx.Graph() + G1.add_edges_from( + [ + (1, 2, {"label": "A"}), + (2, 3, {"label": "A"}), + (3, 1, {"label": "A"}), + (1, 4, {"label": "B"}), + ] + ) + G2 = nx.Graph() + G2.add_edges_from( + [ + (5, 6, {"label": "B"}), + (6, 7, {"label": "A"}), + (7, 5, {"label": "A"}), + (7, 8, {"label": "A"}), + ] + ) + + assert weisfeiler_lehman_graph_hash(G1) == weisfeiler_lehman_graph_hash(G2) + assert weisfeiler_lehman_graph_hash(G1, edge_attr="label") == 'c653d85538bcf041d88c011f4f905f10' + assert weisfeiler_lehman_graph_hash(G2, edge_attr="label") == '3dcd84af1ca855d0eff3c978d88e7ec7' + + +def test_subgraph_hashes(): + G1 = nx.Graph() + G1.add_edges_from( + [ + (1, 2), (2, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 7) + ] + ) + G2 = nx.Graph() + G2.add_edges_from( + [ + (1, 3), (2, 3), (1, 6), (1, 5), (4, 6) + ] + ) + + g1_hashes = weisfeiler_lehman_subgraph_hashes(G1, iterations=3, digest_size=8) + g2_hashes = weisfeiler_lehman_subgraph_hashes(G2, iterations=3, digest_size=8) + + assert g1_hashes[1] == ['a93b64973cfc8897', 'db1b43ae35a1878f', '57872a7d2059c1c0'] + assert g2_hashes[5] == ['a93b64973cfc8897', 'db1b43ae35a1878f', '1716d2a4012fa4bc'] \ No newline at end of file From e4f2e57f816ccb23cd38bb5941f286c69c059980 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 19 Apr 2023 07:35:29 -0700 Subject: [PATCH 10/26] Small tweaks to trajectory tests --- pymatgen/core/tests/test_trajectory.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index aa68fe00eaa..14b7556c323 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -331,7 +331,7 @@ def setUp(self): last_mol = out.data["molecule_from_last_geometry"] species = last_mol.species coords = out.data["geometries"] - + self.molecules = list() for c in coords: mol = Molecule(species, c, charge=last_mol.charge, spin_multiplicity=last_mol.spin_multiplicity) @@ -435,12 +435,12 @@ def test_extend(self): traj.species, [[[-1.46958173, -0.47370158, -0.03391061], [-0.79757102, 0.48588802, 0.94508206], - [0.50256405, 0.8947604 , 0.47698504], + [0.50256405, 0.8947604, 0.47698504], [1.56101382, 0.13356272, 0.79931048], - [1.43897567, -0.8642765 , 1.56363034], + [1.43897567, -0.8642765, 1.56363034], [2.66882238, 0.48431336, 0.30635727], [-2.72606146, -0.81552889, 0.39696593], - [3.307822 , -1.01132269, 1.26654957], + [3.307822, -1.01132269, 1.26654957], [-0.81092724, -1.35590014, -0.1458541], [-1.48634516, 0.02121279, -1.02465009], [-0.71212347, 0.03008471, 1.93272477], From ef6874ef3bbd67b106c64ed342f9990f3d1ad360 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Apr 2023 14:49:41 +0000 Subject: [PATCH 11/26] pre-commit auto-fixes --- pymatgen/core/tests/test_trajectory.py | 59 ++++++++++++----------- pymatgen/core/trajectory.py | 12 +---- pymatgen/util/graph_hashing.py | 13 ++--- pymatgen/util/tests/test_graph_hashing.py | 27 +++-------- 4 files changed, 46 insertions(+), 65 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index 14b7556c323..a472d10aa2f 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -6,12 +6,11 @@ import numpy as np from pymatgen.core.lattice import Lattice -from pymatgen.core.structure import Structure, Molecule -from pymatgen.core.trajectory import Trajectory, MoleculeOptimizeTrajectory +from pymatgen.core.structure import Molecule, Structure +from pymatgen.core.trajectory import MoleculeOptimizeTrajectory, Trajectory +from pymatgen.io.qchem.outputs import QCOutput from pymatgen.io.vasp.inputs import Poscar from pymatgen.io.vasp.outputs import Xdatcar -from pymatgen.io.qchem.outputs import QCOutput - from pymatgen.util.testing import PymatgenTest @@ -332,7 +331,7 @@ def setUp(self): species = last_mol.species coords = out.data["geometries"] - self.molecules = list() + self.molecules = [] for c in coords: mol = Molecule(species, c, charge=last_mol.charge, spin_multiplicity=last_mol.spin_multiplicity) self.molecules.append(mol) @@ -348,9 +347,11 @@ def _check_traj_equality(self, traj_1, traj_2): def _get_species_and_coords(self): species = ["C", "O"] coords = np.asarray( - [[[1.5709474478, -0.16099953, 0.0], [1.9291378639, -1.2161950538, 0.0]], - [[1.5688628148, -0.1548583957, 0.0], [1.9312224969, -1.2223361881, 0.0]], - [[1.5690858055, -0.1555153055, 0.0], [1.9309995062, -1.2216792783, 0.0]]] + [ + [[1.5709474478, -0.16099953, 0.0], [1.9291378639, -1.2161950538, 0.0]], + [[1.5688628148, -0.1548583957, 0.0], [1.9312224969, -1.2223361881, 0.0]], + [[1.5690858055, -0.1555153055, 0.0], [1.9309995062, -1.2216792783, 0.0]], + ] ) return species, coords, 0, 1 @@ -433,22 +434,26 @@ def test_extend(self): # Case of compatible trajectories compatible_traj = MoleculeOptimizeTrajectory( traj.species, - [[[-1.46958173, -0.47370158, -0.03391061], - [-0.79757102, 0.48588802, 0.94508206], - [0.50256405, 0.8947604, 0.47698504], - [1.56101382, 0.13356272, 0.79931048], - [1.43897567, -0.8642765, 1.56363034], - [2.66882238, 0.48431336, 0.30635727], - [-2.72606146, -0.81552889, 0.39696593], - [3.307822, -1.01132269, 1.26654957], - [-0.81092724, -1.35590014, -0.1458541], - [-1.48634516, 0.02121279, -1.02465009], - [-0.71212347, 0.03008471, 1.93272477], - [-1.37888759, 1.40819443, 1.02143913], - [-4.79241099, 0.80275103, -0.39852432], - [-4.28509927, -1.03484764, 0.86348452]]], + [ + [ + [-1.46958173, -0.47370158, -0.03391061], + [-0.79757102, 0.48588802, 0.94508206], + [0.50256405, 0.8947604, 0.47698504], + [1.56101382, 0.13356272, 0.79931048], + [1.43897567, -0.8642765, 1.56363034], + [2.66882238, 0.48431336, 0.30635727], + [-2.72606146, -0.81552889, 0.39696593], + [3.307822, -1.01132269, 1.26654957], + [-0.81092724, -1.35590014, -0.1458541], + [-1.48634516, 0.02121279, -1.02465009], + [-0.71212347, 0.03008471, 1.93272477], + [-1.37888759, 1.40819443, 1.02143913], + [-4.79241099, 0.80275103, -0.39852432], + [-4.28509927, -1.03484764, 0.86348452], + ] + ], 0, - 2 + 2, ) traj.extend(compatible_traj) @@ -473,11 +478,11 @@ def test_extend_site_props(self): num_frames = len(coords) props_1 = { - "test": [[True, True, True], [False, False, False]], - } + "test": [[True, True, True], [False, False, False]], + } props_2 = { - "test": [[False, False, False], [False, False, False]], - } + "test": [[False, False, False], [False, False, False]], + } props_3 = [ { "test": [[True, True, True], [False, False, False]], diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 5940dddac15..fd488d19ae0 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -15,15 +15,7 @@ from monty.io import zopen from monty.json import MSONable -from pymatgen.core.structure import ( - Composition, - DummySpecies, - Element, - Lattice, - Species, - Structure, - Molecule -) +from pymatgen.core.structure import Composition, DummySpecies, Element, Lattice, Molecule, Species, Structure from pymatgen.io.vasp.outputs import Vasprun, Xdatcar __author__ = "Eric Sivonxay, Shyam Dwaraknath, Mingjian Wen, Evan Spotte-Smith" @@ -989,4 +981,4 @@ def _get_site_props(self, frames: int | list[int]) -> SitePropsType | None: if isinstance(frames, list): return [self.site_properties[i] for i in frames] raise ValueError("Unexpected frames type.") - raise ValueError("Unexpected site_properties type.") \ No newline at end of file + raise ValueError("Unexpected site_properties type.") diff --git a/pymatgen/util/graph_hashing.py b/pymatgen/util/graph_hashing.py index 08246d820e9..7212342a16a 100644 --- a/pymatgen/util/graph_hashing.py +++ b/pymatgen/util/graph_hashing.py @@ -40,6 +40,7 @@ For now, only Weisfeiler-Lehman hashing is implemented. """ +from __future__ import annotations from collections import Counter, defaultdict from hashlib import blake2b @@ -70,9 +71,7 @@ def _neighborhood_aggregate(G, node, node_labels, edge_attr=None): return node_labels[node] + "".join(sorted(label_list)) -def weisfeiler_lehman_graph_hash( - G, edge_attr=None, node_attr=None, iterations=3, digest_size=16 -): +def weisfeiler_lehman_graph_hash(G, edge_attr=None, node_attr=None, iterations=3, digest_size=16): """Return Weisfeiler Lehman (WL) graph hash. The function iteratively aggregates and hashes neighbourhoods of each node. @@ -153,9 +152,7 @@ def weisfeiler_lehman_step(G, labels, edge_attr=None): return _hash_label(str(tuple(subgraph_hash_counts)), digest_size) -def weisfeiler_lehman_subgraph_hashes( - G, edge_attr=None, node_attr=None, iterations=3, digest_size=16 -): +def weisfeiler_lehman_subgraph_hashes(G, edge_attr=None, node_attr=None, iterations=3, digest_size=16): """ Return a dictionary of subgraph hashes by node. @@ -259,8 +256,6 @@ def weisfeiler_lehman_step(G, labels, node_subgraph_hashes, edge_attr=None): node_subgraph_hashes = defaultdict(list) for _ in range(iterations): - node_labels = weisfeiler_lehman_step( - G, node_labels, node_subgraph_hashes, edge_attr - ) + node_labels = weisfeiler_lehman_step(G, node_labels, node_subgraph_hashes, edge_attr) return dict(node_subgraph_hashes) diff --git a/pymatgen/util/tests/test_graph_hashing.py b/pymatgen/util/tests/test_graph_hashing.py index 8b3beac4e0d..9ed9d95a4a3 100644 --- a/pymatgen/util/tests/test_graph_hashing.py +++ b/pymatgen/util/tests/test_graph_hashing.py @@ -33,13 +33,10 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ - +from __future__ import annotations import networkx as nx -from emmet.core.graph_hashing import ( - weisfeiler_lehman_graph_hash, - weisfeiler_lehman_subgraph_hashes -) +from emmet.core.graph_hashing import weisfeiler_lehman_graph_hash, weisfeiler_lehman_subgraph_hashes def test_graph_hash(): @@ -63,26 +60,18 @@ def test_graph_hash(): ) assert weisfeiler_lehman_graph_hash(G1) == weisfeiler_lehman_graph_hash(G2) - assert weisfeiler_lehman_graph_hash(G1, edge_attr="label") == 'c653d85538bcf041d88c011f4f905f10' - assert weisfeiler_lehman_graph_hash(G2, edge_attr="label") == '3dcd84af1ca855d0eff3c978d88e7ec7' + assert weisfeiler_lehman_graph_hash(G1, edge_attr="label") == "c653d85538bcf041d88c011f4f905f10" + assert weisfeiler_lehman_graph_hash(G2, edge_attr="label") == "3dcd84af1ca855d0eff3c978d88e7ec7" def test_subgraph_hashes(): G1 = nx.Graph() - G1.add_edges_from( - [ - (1, 2), (2, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 7) - ] - ) + G1.add_edges_from([(1, 2), (2, 3), (2, 4), (3, 5), (4, 6), (5, 7), (6, 7)]) G2 = nx.Graph() - G2.add_edges_from( - [ - (1, 3), (2, 3), (1, 6), (1, 5), (4, 6) - ] - ) + G2.add_edges_from([(1, 3), (2, 3), (1, 6), (1, 5), (4, 6)]) g1_hashes = weisfeiler_lehman_subgraph_hashes(G1, iterations=3, digest_size=8) g2_hashes = weisfeiler_lehman_subgraph_hashes(G2, iterations=3, digest_size=8) - assert g1_hashes[1] == ['a93b64973cfc8897', 'db1b43ae35a1878f', '57872a7d2059c1c0'] - assert g2_hashes[5] == ['a93b64973cfc8897', 'db1b43ae35a1878f', '1716d2a4012fa4bc'] \ No newline at end of file + assert g1_hashes[1] == ["a93b64973cfc8897", "db1b43ae35a1878f", "57872a7d2059c1c0"] + assert g2_hashes[5] == ["a93b64973cfc8897", "db1b43ae35a1878f", "1716d2a4012fa4bc"] From cac6a257f5f74e37ac58cfea3308c3b266fcf602 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 19 Apr 2023 08:12:01 -0700 Subject: [PATCH 12/26] mypy is a cruelty to both programmers and programming languages --- pymatgen/core/tests/test_trajectory.py | 4 ++-- pymatgen/core/trajectory.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index 14b7556c323..be13c791e48 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -334,10 +334,10 @@ def setUp(self): self.molecules = list() for c in coords: - mol = Molecule(species, c, charge=last_mol.charge, spin_multiplicity=last_mol.spin_multiplicity) + mol = Molecule(species, c, charge=int(last_mol.charge), spin_multiplicity=int(last_mol.spin_multiplicity)) self.molecules.append(mol) - self.traj = MoleculeOptimizeTrajectory(species, coords, last_mol.charge, last_mol.spin_multiplicity) + self.traj = MoleculeOptimizeTrajectory(species, coords, int(last_mol.charge), int(last_mol.spin_multiplicity)) def _check_traj_equality(self, traj_1, traj_2): if traj_1.species != traj_2.species: diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 5940dddac15..6e18a6b1aaf 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -606,7 +606,7 @@ class MoleculeOptimizeTrajectory(MSONable): def __init__( self, species: list[str | Element | Species | DummySpecies | Composition], - coords: list[list[Vector3D]] | np.ndarray, + coords: list[list[Vector3D]] | np.ndarray | list[np.ndarray], charge: int, spin_multiplicity: int, *, From 1dc02f41a11d27781e50c88a0b5b086dbab823a3 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 19 Apr 2023 08:18:39 -0700 Subject: [PATCH 13/26] Ah, yes. Let's just staple type-checking to a language built around duck-typing. That'll go great --- pymatgen/core/trajectory.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 21e7604df96..19c70561794 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -599,8 +599,8 @@ def __init__( self, species: list[str | Element | Species | DummySpecies | Composition], coords: list[list[Vector3D]] | np.ndarray | list[np.ndarray], - charge: int, - spin_multiplicity: int, + charge: int | float, + spin_multiplicity: int | float, *, site_properties: SitePropsType | None = None, frame_properties: list[dict] | None = None, @@ -663,8 +663,8 @@ def __init__( self.species = species self.coords = np.asarray(coords) - self.charge = charge - self.spin_multiplicity = spin_multiplicity + self.charge = int(charge) + self.spin_multiplicity = int(spin_multiplicity) self.time_step = time_step self._check_site_props(site_properties) From a8ca931aade58435f51cfdbe3d22f97b998973aa Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Wed, 19 Apr 2023 14:00:57 -0700 Subject: [PATCH 14/26] fix common typo MSONAble->MSONable --- CHANGES.rst | 2 +- docs_rst/change_log.rst | 2 +- pymatgen/alchemy/filters.py | 2 +- .../chemenv/coordination_environments/chemenv_strategies.py | 6 +++--- pymatgen/analysis/chemenv/utils/graph_utils.py | 2 +- pymatgen/analysis/diffraction/tests/test_xrd.py | 2 +- pymatgen/analysis/molecule_matcher.py | 4 ++-- .../structure_prediction/substitution_probability.py | 2 +- pymatgen/apps/battery/insertion_battery.py | 2 +- pymatgen/apps/borg/hive.py | 2 +- pymatgen/core/interface.py | 2 +- pymatgen/core/operations.py | 2 +- pymatgen/core/periodic_table.py | 2 +- pymatgen/core/surface.py | 2 +- pymatgen/core/tensors.py | 2 +- pymatgen/core/trajectory.py | 5 ++--- pymatgen/entries/computed_entries.py | 4 ++-- pymatgen/io/qchem/outputs.py | 2 +- pymatgen/io/shengbte.py | 2 +- pymatgen/io/vasp/outputs.py | 2 +- pymatgen/io/vasp/sets.py | 2 +- pymatgen/symmetry/structure.py | 2 +- 22 files changed, 27 insertions(+), 28 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index ad106f38322..b3280deac4f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -686,7 +686,7 @@ v2020.7.16 v2020.7.14 ---------- -* EwaldSummation is now MSONAble (@lbluque). +* EwaldSummation is now MSONable (@lbluque). * Fix for QChem freq parsing (@samblau) * Much improved linting and workflows. diff --git a/docs_rst/change_log.rst b/docs_rst/change_log.rst index ad106f38322..b3280deac4f 100644 --- a/docs_rst/change_log.rst +++ b/docs_rst/change_log.rst @@ -686,7 +686,7 @@ v2020.7.16 v2020.7.14 ---------- -* EwaldSummation is now MSONAble (@lbluque). +* EwaldSummation is now MSONable (@lbluque). * Fix for QChem freq parsing (@samblau) * Much improved linting and workflows. diff --git a/pymatgen/alchemy/filters.py b/pymatgen/alchemy/filters.py index 079e644207c..94d3bf52634 100644 --- a/pymatgen/alchemy/filters.py +++ b/pymatgen/alchemy/filters.py @@ -96,7 +96,7 @@ def __repr__(self): def as_dict(self): """ - Returns: MSONAble dict + Returns: MSONable dict """ return { "@module": type(self).__module__, diff --git a/pymatgen/analysis/chemenv/coordination_environments/chemenv_strategies.py b/pymatgen/analysis/chemenv/coordination_environments/chemenv_strategies.py index 8a22593c87a..0bdcd911935 100644 --- a/pymatgen/analysis/chemenv/coordination_environments/chemenv_strategies.py +++ b/pymatgen/analysis/chemenv/coordination_environments/chemenv_strategies.py @@ -77,7 +77,7 @@ def __new__(cls, myfloat): return flt def as_dict(self): - """MSONAble dict""" + """MSONable dict""" return { "@module": type(self).__module__, "@class": type(self).__name__, @@ -109,7 +109,7 @@ def __new__(cls, myfloat): return flt def as_dict(self): - """MSONAble dict""" + """MSONable dict""" return { "@module": type(self).__module__, "@class": type(self).__name__, @@ -1300,7 +1300,7 @@ def __eq__(self, other: object) -> bool: return self.aa == other.aa # type: ignore def as_dict(self): - """MSONAble dict""" + """MSONable dict""" return { "@module": type(self).__module__, "@class": type(self).__name__, diff --git a/pymatgen/analysis/chemenv/utils/graph_utils.py b/pymatgen/analysis/chemenv/utils/graph_utils.py index 52463bda4d7..b33bed74c18 100644 --- a/pymatgen/analysis/chemenv/utils/graph_utils.py +++ b/pymatgen/analysis/chemenv/utils/graph_utils.py @@ -296,7 +296,7 @@ def from_edges(cls, edges, edges_are_ordered=True): def as_dict(self): """ - :return: MSONAble dict + :return: MSONable dict """ d = MSONable.as_dict(self) # Transforming tuple object to a list to allow BSON and MongoDB diff --git a/pymatgen/analysis/diffraction/tests/test_xrd.py b/pymatgen/analysis/diffraction/tests/test_xrd.py index d665e62429f..464e9f20509 100644 --- a/pymatgen/analysis/diffraction/tests/test_xrd.py +++ b/pymatgen/analysis/diffraction/tests/test_xrd.py @@ -32,7 +32,7 @@ def test_get_pattern(self): struct = self.get_structure("CsCl") c = XRDCalculator() xrd = c.get_pattern(struct, two_theta_range=(0, 90)) - assert xrd.to_json() # Test MSONAble property + assert xrd.to_json() # Test MSONable property # Check the first two peaks assert xrd.x[0] == pytest.approx(21.107738329639844) assert xrd.y[0] == pytest.approx(36.483184003748946) diff --git a/pymatgen/analysis/molecule_matcher.py b/pymatgen/analysis/molecule_matcher.py index 9526ca430e4..8977c331e9d 100644 --- a/pymatgen/analysis/molecule_matcher.py +++ b/pymatgen/analysis/molecule_matcher.py @@ -218,7 +218,7 @@ def __init__(self, angle_tolerance=10.0): def as_dict(self): """ Returns: - MSONAble dict. + MSONable dict. """ return { "version": __version__, @@ -710,7 +710,7 @@ def group_molecules(self, mol_list): def as_dict(self): """ Returns: - MSONAble dict. + MSONable dict. """ return { "version": __version__, diff --git a/pymatgen/analysis/structure_prediction/substitution_probability.py b/pymatgen/analysis/structure_prediction/substitution_probability.py index 713865a4401..323b7375381 100644 --- a/pymatgen/analysis/structure_prediction/substitution_probability.py +++ b/pymatgen/analysis/structure_prediction/substitution_probability.py @@ -154,7 +154,7 @@ def cond_prob_list(self, l1, l2): def as_dict(self): """ - Returns: MSONAble dict + Returns: MSONable dict """ return { "name": type(self).__name__, diff --git a/pymatgen/apps/battery/insertion_battery.py b/pymatgen/apps/battery/insertion_battery.py index 10be2040a8a..e6e60825003 100644 --- a/pymatgen/apps/battery/insertion_battery.py +++ b/pymatgen/apps/battery/insertion_battery.py @@ -389,7 +389,7 @@ def from_dict_legacy(cls, d): def as_dict_legacy(self): """ - Returns: MSONAble dict + Returns: MSONable dict """ return { "@module": type(self).__module__, diff --git a/pymatgen/apps/borg/hive.py b/pymatgen/apps/borg/hive.py index 51c106b82c3..9dda3f527a1 100644 --- a/pymatgen/apps/borg/hive.py +++ b/pymatgen/apps/borg/hive.py @@ -291,7 +291,7 @@ def __str__(self): def as_dict(self): """ - Returns: MSONAble dict + Returns: MSONable dict """ return { "init_args": {"inc_structure": self._inc_structure}, diff --git a/pymatgen/core/interface.py b/pymatgen/core/interface.py index 783f1241554..2004e0c1459 100644 --- a/pymatgen/core/interface.py +++ b/pymatgen/core/interface.py @@ -300,7 +300,7 @@ def __update_c(self, new_c: float) -> None: def as_dict(self): """ - :return: MSONAble dict + :return: MSONable dict """ d = super().as_dict() d["in_plane_offset"] = self.in_plane_offset.tolist() diff --git a/pymatgen/core/operations.py b/pymatgen/core/operations.py index 69ee5e75cb6..f55679baa3d 100644 --- a/pymatgen/core/operations.py +++ b/pymatgen/core/operations.py @@ -427,7 +427,7 @@ def rotoreflection(axis: ArrayLike, angle: float, origin: ArrayLike = (0, 0, 0)) def as_dict(self) -> dict[str, Any]: """ - :return: MSONAble dict. + :return: MSONable dict. """ return { "@module": type(self).__module__, diff --git a/pymatgen/core/periodic_table.py b/pymatgen/core/periodic_table.py index 2f4e0ff53ad..e912104d5ec 100644 --- a/pymatgen/core/periodic_table.py +++ b/pymatgen/core/periodic_table.py @@ -1510,7 +1510,7 @@ def from_string(species_string: str) -> DummySpecies: def as_dict(self) -> dict: """ - :return: MSONAble dict representation. + :return: MSONable dict representation. """ d = { "@module": type(self).__module__, diff --git a/pymatgen/core/surface.py b/pymatgen/core/surface.py index f109a559194..ba488c6b9a0 100644 --- a/pymatgen/core/surface.py +++ b/pymatgen/core/surface.py @@ -495,7 +495,7 @@ def to_s(x): def as_dict(self): """ - :return: MSONAble dict + :return: MSONable dict """ d = super().as_dict() d["@module"] = type(self).__module__ diff --git a/pymatgen/core/tensors.py b/pymatgen/core/tensors.py index e57f6bc52bb..a07d71575e2 100644 --- a/pymatgen/core/tensors.py +++ b/pymatgen/core/tensors.py @@ -686,7 +686,7 @@ def as_dict(self, voigt: bool = False) -> dict: @classmethod def from_dict(cls, d) -> Tensor: - """Instantiate Tensors from dicts (using MSONAble API). + """Instantiate Tensors from dicts (using MSONable API). Returns: Tensor: hydrated tensor object diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 19c70561794..b2a189127d1 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -380,7 +380,7 @@ def write_Xdatcar( def as_dict(self) -> dict: """ - Return the trajectory as a MSONAble dict. + Return the trajectory as a MSONable dict. """ return { "@module": type(self).__module__, @@ -649,7 +649,6 @@ def __init__( `coords_are_displacement=True`. Defaults to the first index of `coords` when `coords_are_displacement=False`. """ - if coords_are_displacement: if base_positions is None: warnings.warn( @@ -836,7 +835,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | MoleculeOpt def as_dict(self) -> dict: """ - Return the trajectory as a MSONAble dict. + Return the trajectory as a MSONable dict. """ return { "@module": type(self).__module__, diff --git a/pymatgen/entries/computed_entries.py b/pymatgen/entries/computed_entries.py index 4235079a818..b7ce8030ba2 100644 --- a/pymatgen/entries/computed_entries.py +++ b/pymatgen/entries/computed_entries.py @@ -645,7 +645,7 @@ def structure(self) -> Structure: def as_dict(self) -> dict: """ - :return: MSONAble dict. + :return: MSONable dict. """ d = super().as_dict() d["structure"] = self.structure.as_dict() @@ -986,7 +986,7 @@ def from_entries(cls, entries, temp=300, gibbs_model="SISSO") -> list[GibbsCompu def as_dict(self) -> dict: """ - :return: MSONAble dict. + :return: MSONable dict. """ d = super().as_dict() d["formation_enthalpy_per_atom"] = self.formation_enthalpy_per_atom diff --git a/pymatgen/io/qchem/outputs.py b/pymatgen/io/qchem/outputs.py index 1e75b43877a..3f15ae1383e 100644 --- a/pymatgen/io/qchem/outputs.py +++ b/pymatgen/io/qchem/outputs.py @@ -2138,7 +2138,7 @@ def _check_completion_errors(self): def as_dict(self): """ Returns: - MSONAble dict. + MSONable dict. """ d = {} d["data"] = self.data diff --git a/pymatgen/io/shengbte.py b/pymatgen/io/shengbte.py index f30aa40ee9a..18839c72774 100644 --- a/pymatgen/io/shengbte.py +++ b/pymatgen/io/shengbte.py @@ -269,7 +269,7 @@ def get_structure(self) -> Structure: def as_dict(self): """ - Returns: MSONAble dict + Returns: MSONable dict """ return dict(self) diff --git a/pymatgen/io/vasp/outputs.py b/pymatgen/io/vasp/outputs.py index 5b56e1f24f5..b85b47b0cb3 100644 --- a/pymatgen/io/vasp/outputs.py +++ b/pymatgen/io/vasp/outputs.py @@ -3318,7 +3318,7 @@ def read_avg_core_poten(self): def as_dict(self): """ - :return: MSONAble dict. + :return: MSONable dict. """ d = { "@module": type(self).__module__, diff --git a/pymatgen/io/vasp/sets.py b/pymatgen/io/vasp/sets.py index 3e42d2bf747..51b9fb02f76 100644 --- a/pymatgen/io/vasp/sets.py +++ b/pymatgen/io/vasp/sets.py @@ -2231,7 +2231,7 @@ def kpoints(self): def as_dict(self, verbosity=2): """ :param verbosity: Verbosity of dict. E.g., whether to include Structure. - :return: MSONAble dict + :return: MSONable dict """ d = MSONable.as_dict(self) if verbosity == 1: diff --git a/pymatgen/symmetry/structure.py b/pymatgen/symmetry/structure.py index 9b16f1553aa..873979611fc 100644 --- a/pymatgen/symmetry/structure.py +++ b/pymatgen/symmetry/structure.py @@ -132,7 +132,7 @@ def to_s(x): def as_dict(self): """ - :return: MSONAble dict + :return: MSONable dict """ structure = Structure.from_sites(self.sites) return { From 625fa1295227044ebf996251a52bcae7deeaf7f5 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Wed, 19 Apr 2023 14:01:16 -0700 Subject: [PATCH 15/26] tweak PR template --- .github/PULL_REQUEST_TEMPLATE.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d5cbb42625c..7b164c1532e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -24,11 +24,11 @@ Before a pull request can be merged, the following items must be checked: - [ ] Tests have been added for any new functionality or bug fixes. - [ ] All linting and tests pass. -Note that the CI system will run all the above checks. But it will be much more efficient if you already fix most errors prior to submitting the PR. We highly recommended installing `pre-commit` hooks. Simply Run +Our CI will run all the above checks but it might be more efficient if you already fix most errors before submitting the PR. We highly recommended installing `pre-commit` hooks. Simply Run ```sh pip install -U pre-commit pre-commit install ``` -in the repo's root directory. Afterwards linters will run before every commit and abort if any issues pop up. +in the repo's root directory. Once `pre-commit` has installed `git` hooks, our linters will run before every commit and abort if issues pop up. From ff3946738f31f455769a7eef5a38667a751df1a0 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Wed, 19 Apr 2023 14:02:02 -0700 Subject: [PATCH 16/26] fix supported types list --- pymatgen/core/trajectory.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index b2a189127d1..f7dccac4eb8 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -267,7 +267,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory Return: Subset of trajectory """ - # Convert to position mode if not ready + # Convert to position mode if not already self.to_positions() # For integer input, return the structure at that frame @@ -318,7 +318,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory base_positions=self.base_positions, ) - supported = [int, slice, list or np.ndarray] + supported = [int, slice, list, np.ndarray] raise ValueError(f"Expect the type of frames be one of {supported}; {type(frames)}.") def write_Xdatcar( @@ -782,7 +782,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | MoleculeOpt Return: Subset of trajectory """ - # Convert to position mode if not ready + # Convert to position mode if not already self.to_positions() # For integer input, return the structure at that frame @@ -830,7 +830,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | MoleculeOpt base_positions=self.base_positions, ) - supported = [int, slice, list or np.ndarray] + supported = [int, slice, list, np.ndarray] raise ValueError(f"Expect the type of frames be one of {supported}; {type(frames)}.") def as_dict(self) -> dict: From 652fe454031b9f8c7504d285baedc1e390ed20e5 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Wed, 19 Apr 2023 14:02:17 -0700 Subject: [PATCH 17/26] fix from_molecules() return type --- pymatgen/core/trajectory.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index f7dccac4eb8..13ad9012c9e 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -852,11 +852,7 @@ def as_dict(self) -> dict: } @classmethod - def from_molecules( - cls, - molecules: list[Molecule], - **kwargs, - ) -> Trajectory: + def from_molecules(cls, molecules: list[Molecule], **kwargs) -> MoleculeOptimizeTrajectory: """ Create trajectory from a list of molecules. @@ -864,11 +860,11 @@ def from_molecules( Args: molecules: pymatgen Molecules objects. + **kwargs: Passed to the class constructor. Returns: A trajectory from the structures. """ - species = molecules[0].species coords = [mol.cart_coords for mol in molecules] site_properties = [mol.site_properties for mol in molecules] From 4994b451f112446863724686a7b6ccdf7906180e Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 19 Apr 2023 18:59:55 -0700 Subject: [PATCH 18/26] Requested change --- pymatgen/core/tests/test_trajectory.py | 532 ++++++++---------- pymatgen/core/trajectory.py | 714 ++++++++----------------- 2 files changed, 457 insertions(+), 789 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index 25013d3625b..73551bebd0b 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -7,7 +7,7 @@ from pymatgen.core.lattice import Lattice from pymatgen.core.structure import Molecule, Structure -from pymatgen.core.trajectory import MoleculeOptimizeTrajectory, Trajectory +from pymatgen.core.trajectory import Trajectory from pymatgen.io.qchem.outputs import QCOutput from pymatgen.io.vasp.inputs import Poscar from pymatgen.io.vasp.outputs import Xdatcar @@ -20,9 +20,28 @@ def setUp(self): self.traj = Trajectory.from_file(os.path.join(PymatgenTest.TEST_FILES_DIR, "Traj_XDATCAR")) self.structures = xdatcar.structures + out = QCOutput(os.path.join(PymatgenTest.TEST_FILES_DIR, "molecules", "new_qchem_files", "ts.out")) + last_mol = out.data["molecule_from_last_geometry"] + species = last_mol.species + coords = out.data["geometries"] + + self.molecules = list() + for c in coords: + mol = Molecule(species, c, charge=int(last_mol.charge), spin_multiplicity=int(last_mol.spin_multiplicity)) + self.molecules.append(mol) + + self.traj_mols = Trajectory( + use_molecule=True, + species=species, + coords=coords, + charge=int(last_mol.charge), + spin_multiplicity=int(last_mol.spin_multiplicity) + ) + def _check_traj_equality(self, traj_1, traj_2): - if not np.allclose(traj_1.lattice, traj_2.lattice): - return False + if not traj_1.use_molecule: + if not np.allclose(traj_1.lattice, traj_2.lattice): + return False if traj_1.species != traj_2.species: return False @@ -32,17 +51,29 @@ def _check_traj_equality(self, traj_1, traj_2): def _get_lattice_species_and_coords(self): lattice = ((1, 0, 0), (0, 1, 0), (0, 0, 1)) species = ["Si", "Si"] - frac_coords = np.asarray( + coords = np.asarray( [ [[0, 0, 0], [0.5, 0.5, 0.5]], [[0.1, 0.1, 0.1], [0.6, 0.6, 0.6]], [[0.2, 0.2, 0.2], [0.7, 0.7, 0.7]], ] ) - return lattice, species, frac_coords + return lattice, species, coords + + def _get_species_and_coords(self): + species = ["C", "O"] + coords = np.asarray( + [ + [[1.5709474478, -0.16099953, 0.0], [1.9291378639, -1.2161950538, 0.0]], + [[1.5688628148, -0.1548583957, 0.0], [1.9312224969, -1.2223361881, 0.0]], + [[1.5690858055, -0.1555153055, 0.0], [1.9309995062, -1.2216792783, 0.0]], + ] + ) + return species, coords, 0, 1 def test_single_index_slice(self): assert all(self.traj[i] == self.structures[i] for i in range(0, len(self.structures), 19)) + assert all([self.traj_mols[i] == self.molecules[i] for i in range(0, len(self.molecules))]) def test_slice(self): sliced_traj = self.traj[2:99:3] @@ -61,6 +92,22 @@ def test_slice(self): else: raise AssertionError + sliced_traj = self.traj_mols[0:2] + sliced_traj_from_mols = Trajectory.from_molecules(self.molecules[0:2]) + + if len(sliced_traj) == len(sliced_traj_from_mols): + assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) + else: + raise AssertionError + + sliced_traj = self.traj_mols[:-2] + sliced_traj_from_mols = Trajectory.from_molecules(self.molecules[:-2]) + + if len(sliced_traj) == len(sliced_traj_from_mols): + assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) + else: + raise AssertionError + def test_list_slice(self): sliced_traj = self.traj[[10, 30, 70]] sliced_traj_from_structs = Trajectory.from_structures([self.structures[i] for i in [10, 30, 70]]) @@ -70,6 +117,14 @@ def test_list_slice(self): else: raise AssertionError + sliced_traj = self.traj_mols[[1, 3]] + sliced_traj_from_mols = Trajectory.from_molecules([self.molecules[i] for i in [1, 3]]) + + if len(sliced_traj) == len(sliced_traj_from_mols): + assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) + else: + raise AssertionError + def test_conversion(self): # Convert to displacements and back, and then check structures. self.traj.to_displacements() @@ -77,8 +132,13 @@ def test_conversion(self): assert all(struct == self.structures[i] for i, struct in enumerate(self.traj)) + self.traj_mols.to_displacements() + self.traj_mols.to_positions() + + assert all([mol == self.molecules[i] for i, mol in enumerate(self.traj_mols)]) + def test_site_properties(self): - lattice, species, frac_coords = self._get_lattice_species_and_coords() + lattice, species, coords = self._get_lattice_species_and_coords() props = [ { @@ -94,7 +154,36 @@ def test_site_properties(self): "magmom": [5, 5], }, ] - traj = Trajectory(lattice, species, frac_coords, site_properties=props) + traj = Trajectory(use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=props) + + # compare the overall site properties list + assert traj.site_properties == props + + # compare the site properties after slicing + assert traj[0].site_properties == props[0] + assert traj[1:].site_properties == props[1:] + + species, coords, charge, spin = self._get_species_and_coords() + + props = [ + { + "test": [[True, True, True], [False, False, False]], + }, + { + "test": [[False, False, False], [False, False, False]], + }, + { + "test": [[True, True, True], [False, False, False]], + }, + ] + traj = Trajectory( + use_molecule=True, + species=species, + coords=coords, + charge=charge, + spin_multiplicity=spin, + site_properties=props + ) # compare the overall site properties list assert traj.site_properties == props @@ -104,11 +193,31 @@ def test_site_properties(self): assert traj[1:].site_properties == props[1:] def test_frame_properties(self): - lattice, species, frac_coords = self._get_lattice_species_and_coords() + lattice, species, coords = self._get_lattice_species_and_coords() props = [{"energy_per_atom": e} for e in [-3.0001, -3.0971, -3.0465]] - traj = Trajectory(lattice, species, frac_coords, frame_properties=props) + traj = Trajectory(use_molecule=False, lattice=lattice, species=species, coords=coords, frame_properties=props) + + # compare the overall site properties + assert traj.frame_properties == props + + # compare the site properties after slicing + expected = props[1:] + assert traj[1:].frame_properties == expected + + species, coords, charge, spin = self._get_species_and_coords() + + props = [{"SCF_energy_in_the_final_basis_set": e} for e in [-113.3256885788, -113.3260019471, -113.326006415]] + + traj = Trajectory( + use_molecule=True, + species=species, + coords=coords, + charge=charge, + spin_multiplicity=spin, + frame_properties=props + ) # compare the overall site properties assert traj.frame_properties == props @@ -140,9 +249,60 @@ def test_extend(self): assert compatible_success and incompatible_test_success + traj = copy.deepcopy(self.traj_mols) + + # Case of compatible trajectories + compatible_traj = Trajectory( + use_molecule=True, + species=traj.species, + coords=[ + [ + [-1.46958173, -0.47370158, -0.03391061], + [-0.79757102, 0.48588802, 0.94508206], + [0.50256405, 0.8947604, 0.47698504], + [1.56101382, 0.13356272, 0.79931048], + [1.43897567, -0.8642765, 1.56363034], + [2.66882238, 0.48431336, 0.30635727], + [-2.72606146, -0.81552889, 0.39696593], + [3.307822, -1.01132269, 1.26654957], + [-0.81092724, -1.35590014, -0.1458541], + [-1.48634516, 0.02121279, -1.02465009], + [-0.71212347, 0.03008471, 1.93272477], + [-1.37888759, 1.40819443, 1.02143913], + [-4.79241099, 0.80275103, -0.39852432], + [-4.28509927, -1.03484764, 0.86348452], + ] + ], + charge=0, + spin_multiplicity=2, + ) + traj.extend(compatible_traj) + + assert len(traj) == 5 + assert traj[-2] == traj[-1] + + # Case of incompatible trajectories + species, coords, charge, spin = self._get_species_and_coords() + + traj = copy.deepcopy(self.traj) + incompatible_traj = Trajectory( + use_molecule=True, + species=species, + coords=coords, + charge=charge, + spin_multiplicity=spin + ) + incompatible_test_success = False + try: + traj.extend(incompatible_traj) + except Exception: + incompatible_test_success = True + + assert incompatible_test_success + def test_extend_site_props(self): - lattice, species, frac_coords = self._get_lattice_species_and_coords() - num_frames = len(frac_coords) + lattice, species, coords = self._get_lattice_species_and_coords() + num_frames = len(coords) props_1 = { "selective_dynamics": [[False, False, False], [False, False, False]], @@ -167,10 +327,37 @@ def test_extend_site_props(self): }, ] - traj_1 = Trajectory(lattice, species, frac_coords, site_properties=props_1) - traj_2 = Trajectory(lattice, species, frac_coords, site_properties=props_2) - traj_3 = Trajectory(lattice, species, frac_coords, site_properties=props_3) - traj_4 = Trajectory(lattice, species, frac_coords, site_properties=None) + traj_1 = Trajectory( + use_molecule=False, + lattice=lattice, + species=species, + coords=coords, + site_properties=props_1 + ) + + traj_2 = Trajectory( + use_molecule=False, + lattice=lattice, + species=species, + coords=coords, + site_properties=props_2 + ) + + traj_3 = Trajectory( + use_molecule=False, + lattice=lattice, + species=species, + coords=coords, + site_properties=props_3 + ) + + traj_4 = Trajectory( + use_molecule=False, + lattice=lattice, + species=species, + coords=coords, + site_properties=None + ) # const & const (both constant and the same site properties) traj_combined = copy.deepcopy(traj_1) @@ -233,7 +420,7 @@ def test_extend_site_props(self): assert traj_combined.site_properties == expected_site_props def test_extend_frame_props(self): - lattice, species, frac_coords = self._get_lattice_species_and_coords() + lattice, species, coords = self._get_lattice_species_and_coords() energy_1 = [-3, -3.9, -4.1] energy_2 = [-4.2, -4.25, -4.3] @@ -241,14 +428,32 @@ def test_extend_frame_props(self): # energy only properties props_1 = [{"energy": e} for e in energy_1] - traj_1 = Trajectory(lattice, species, frac_coords, frame_properties=props_1) + traj_1 = Trajectory( + use_molecule=False, + lattice=lattice, + species=species, + coords=coords, + frame_properties=props_1 + ) # energy and pressure properties props_2 = [{"energy": e, "pressure": p} for e, p in zip(energy_2, pressure_2)] - traj_2 = Trajectory(lattice, species, frac_coords, frame_properties=props_2) + traj_2 = Trajectory( + use_molecule=False, + lattice=lattice, + species=species, + coords=coords, + frame_properties=props_2 + ) # no properties - traj_3 = Trajectory(lattice, species, frac_coords, frame_properties=None) + traj_3 = Trajectory( + use_molecule=False, + lattice=lattice, + species=species, + coords=coords, + frame_properties=None + ) # test combining two with different properties traj_combined = copy.deepcopy(traj_1) @@ -259,7 +464,7 @@ def test_extend_frame_props(self): # test combining two where one has properties and the other does not traj_combined = copy.deepcopy(traj_1) traj_combined.extend(traj_3) - expected_props = props_1 + [None] * len(frac_coords) + expected_props = props_1 + [None] * len(coords) assert traj_combined.frame_properties == expected_props # test combining two both of which have no properties @@ -269,6 +474,7 @@ def test_extend_frame_props(self): def test_length(self): assert len(self.traj) == len(self.structures) + assert len(self.traj_mols) == len(self.molecules) def test_displacements(self): poscar = Poscar.from_file(os.path.join(PymatgenTest.TEST_FILES_DIR, "POSCAR")) @@ -284,7 +490,7 @@ def test_displacements(self): traj = Trajectory.from_structures(structures, constant_lattice=True) traj.to_displacements() - assert np.allclose(traj.frac_coords, displacements) + assert np.allclose(traj.coords, displacements) def test_variable_lattice(self): structure = self.structures[0] @@ -315,6 +521,10 @@ def test_to_from_dict(self): traj = Trajectory.from_dict(d) assert isinstance(traj, Trajectory) + d = self.traj_mols.as_dict() + traj = Trajectory.from_dict(d) + assert isinstance(traj, Trajectory) + def test_xdatcar_write(self): self.traj.write_Xdatcar(filename="traj_test_XDATCAR") @@ -324,286 +534,6 @@ def test_xdatcar_write(self): os.remove("traj_test_XDATCAR") -class MoleculeOptimizeTrajectoryTest(PymatgenTest): - def setUp(self): - out = QCOutput(os.path.join(PymatgenTest.TEST_FILES_DIR, "molecules", "new_qchem_files", "ts.out")) - last_mol = out.data["molecule_from_last_geometry"] - species = last_mol.species - coords = out.data["geometries"] - - self.molecules = [] - for c in coords: - mol = Molecule(species, c, charge=int(last_mol.charge), spin_multiplicity=int(last_mol.spin_multiplicity)) - self.molecules.append(mol) - - self.traj = MoleculeOptimizeTrajectory(species, coords, int(last_mol.charge), int(last_mol.spin_multiplicity)) - - def _check_traj_equality(self, traj_1, traj_2): - if traj_1.species != traj_2.species: - return False - - return all(i == j for i, j in zip(self.traj, traj_2)) - - def _get_species_and_coords(self): - species = ["C", "O"] - coords = np.asarray( - [ - [[1.5709474478, -0.16099953, 0.0], [1.9291378639, -1.2161950538, 0.0]], - [[1.5688628148, -0.1548583957, 0.0], [1.9312224969, -1.2223361881, 0.0]], - [[1.5690858055, -0.1555153055, 0.0], [1.9309995062, -1.2216792783, 0.0]], - ] - ) - return species, coords, 0, 1 - - def test_single_index_slice(self): - assert all([self.traj[i] == self.molecules[i] for i in range(0, len(self.molecules))]) - - def test_slice(self): - sliced_traj = self.traj[0:2] - sliced_traj_from_mols = MoleculeOptimizeTrajectory.from_molecules(self.molecules[0:2]) - - if len(sliced_traj) == len(sliced_traj_from_mols): - assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) - else: - raise AssertionError - - sliced_traj = self.traj[:-2] - sliced_traj_from_mols = MoleculeOptimizeTrajectory.from_molecules(self.molecules[:-2]) - - if len(sliced_traj) == len(sliced_traj_from_mols): - assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) - else: - raise AssertionError - - def test_list_slice(self): - sliced_traj = self.traj[[1, 3]] - sliced_traj_from_mols = MoleculeOptimizeTrajectory.from_molecules([self.molecules[i] for i in [1, 3]]) - - if len(sliced_traj) == len(sliced_traj_from_mols): - assert all([sliced_traj[i] == sliced_traj_from_mols[i] for i in range(len(sliced_traj))]) - else: - raise AssertionError - - def test_conversion(self): - # Convert to displacements and back, and then check structures. - self.traj.to_displacements() - self.traj.to_positions() - - assert all([mol == self.molecules[i] for i, mol in enumerate(self.traj)]) - - def test_site_properties(self): - species, coords, charge, spin = self._get_species_and_coords() - - props = [ - { - "test": [[True, True, True], [False, False, False]], - }, - { - "test": [[False, False, False], [False, False, False]], - }, - { - "test": [[True, True, True], [False, False, False]], - }, - ] - traj = MoleculeOptimizeTrajectory(species, coords, charge, spin, site_properties=props) - - # compare the overall site properties list - assert traj.site_properties == props - - # compare the site properties after slicing - assert traj[0].site_properties == props[0] - assert traj[1:].site_properties == props[1:] - - def test_frame_properties(self): - species, coords, charge, spin = self._get_species_and_coords() - - props = [{"SCF_energy_in_the_final_basis_set": e} for e in [-113.3256885788, -113.3260019471, -113.326006415]] - - traj = MoleculeOptimizeTrajectory(species, coords, charge, spin, frame_properties=props) - - # compare the overall site properties - assert traj.frame_properties == props - - # compare the site properties after slicing - expected = props[1:] - assert traj[1:].frame_properties == expected - - def test_extend(self): - traj = copy.deepcopy(self.traj) - - # Case of compatible trajectories - compatible_traj = MoleculeOptimizeTrajectory( - traj.species, - [ - [ - [-1.46958173, -0.47370158, -0.03391061], - [-0.79757102, 0.48588802, 0.94508206], - [0.50256405, 0.8947604, 0.47698504], - [1.56101382, 0.13356272, 0.79931048], - [1.43897567, -0.8642765, 1.56363034], - [2.66882238, 0.48431336, 0.30635727], - [-2.72606146, -0.81552889, 0.39696593], - [3.307822, -1.01132269, 1.26654957], - [-0.81092724, -1.35590014, -0.1458541], - [-1.48634516, 0.02121279, -1.02465009], - [-0.71212347, 0.03008471, 1.93272477], - [-1.37888759, 1.40819443, 1.02143913], - [-4.79241099, 0.80275103, -0.39852432], - [-4.28509927, -1.03484764, 0.86348452], - ] - ], - 0, - 2, - ) - traj.extend(compatible_traj) - - assert len(traj) == 5 - assert traj[-2] == traj[-1] - - # Case of incompatible trajectories - species, coords, charge, spin = self._get_species_and_coords() - - traj = copy.deepcopy(self.traj) - incompatible_traj = MoleculeOptimizeTrajectory(species, coords, charge, spin) - incompatible_test_success = False - try: - traj.extend(incompatible_traj) - except Exception: - incompatible_test_success = True - - assert incompatible_test_success - - def test_extend_site_props(self): - species, coords, charge, spin = self._get_species_and_coords() - num_frames = len(coords) - - props_1 = { - "test": [[True, True, True], [False, False, False]], - } - props_2 = { - "test": [[False, False, False], [False, False, False]], - } - props_3 = [ - { - "test": [[True, True, True], [False, False, False]], - }, - { - "test": [[False, False, False], [False, False, False]], - }, - { - "test": [[True, True, True], [False, False, False]], - }, - ] - - traj_1 = MoleculeOptimizeTrajectory(species, coords, charge, spin, site_properties=props_1) - traj_2 = MoleculeOptimizeTrajectory(species, coords, charge, spin, site_properties=props_2) - traj_3 = MoleculeOptimizeTrajectory(species, coords, charge, spin, site_properties=props_3) - traj_4 = MoleculeOptimizeTrajectory(species, coords, charge, spin) - - # const & const (both constant and the same site properties) - traj_combined = copy.deepcopy(traj_1) - traj_combined.extend(traj_1) - expected_site_props = props_1 - assert traj_combined.site_properties == expected_site_props - - # const & const (both constant but different site properties) - traj_combined = copy.deepcopy(traj_1) - traj_combined.extend(traj_2) - expected_site_props = [props_1] * num_frames + [props_2] * num_frames - assert traj_combined.site_properties == expected_site_props - - # const & changing - traj_combined = copy.deepcopy(traj_1) - traj_combined.extend(traj_3) - expected_site_props = [props_1] * num_frames + props_3 - assert traj_combined.site_properties == expected_site_props - - # const & none - traj_combined = copy.deepcopy(traj_1) - traj_combined.extend(traj_4) - expected_site_props = [props_1] * num_frames + [None] * num_frames - assert traj_combined.site_properties == expected_site_props - - # changing & const - traj_combined = copy.deepcopy(traj_3) - traj_combined.extend(traj_1) - expected_site_props = props_3 + [props_1] * num_frames - assert traj_combined.site_properties == expected_site_props - - # changing & changing - traj_combined = copy.deepcopy(traj_3) - traj_combined.extend(traj_3) - expected_site_props = props_3 + props_3 - assert traj_combined.site_properties == expected_site_props - - # changing & none - traj_combined = copy.deepcopy(traj_3) - traj_combined.extend(traj_4) - expected_site_props = props_3 + [None] * num_frames - assert traj_combined.site_properties == expected_site_props - - # none & const - traj_combined = copy.deepcopy(traj_4) - traj_combined.extend(traj_1) - expected_site_props = [None] * num_frames + [props_1] * num_frames - assert traj_combined.site_properties == expected_site_props - - # none & changing - traj_combined = copy.deepcopy(traj_4) - traj_combined.extend(traj_3) - expected_site_props = [None] * num_frames + props_3 - assert traj_combined.site_properties == expected_site_props - - # none & none - traj_combined = copy.deepcopy(traj_4) - traj_combined.extend(traj_4) - expected_site_props = None - assert traj_combined.site_properties == expected_site_props - - def test_extend_frame_props(self): - species, coords, charge, spin = self._get_species_and_coords() - - energy_1 = [-3, -3.9, -4.1] - energy_2 = [-4.2, -4.25, -4.3] - enthalpy_2 = [1.0, 1.25, 1.3] - - # energy only properties - props_1 = [{"energy": e} for e in energy_1] - traj_1 = MoleculeOptimizeTrajectory(species, coords, charge, spin, frame_properties=props_1) - - # energy and pressure properties - props_2 = [{"energy": e, "pressure": p} for e, p in zip(energy_2, enthalpy_2)] - traj_2 = MoleculeOptimizeTrajectory(species, coords, charge, spin, frame_properties=props_2) - - # no properties - traj_3 = MoleculeOptimizeTrajectory(species, coords, charge, spin, frame_properties=None) - - # test combining two with different properties - traj_combined = copy.deepcopy(traj_1) - traj_combined.extend(traj_2) - expected_props = props_1 + props_2 - assert traj_combined.frame_properties == expected_props - - # test combining two where one has properties and the other does not - traj_combined = copy.deepcopy(traj_1) - traj_combined.extend(traj_3) - expected_props = props_1 + [None] * len(coords) - assert traj_combined.frame_properties == expected_props - - # test combining two both of which have no properties - traj_combined = copy.deepcopy(traj_3) - traj_combined.extend(traj_3) - assert traj_combined.frame_properties is None - - def test_length(self): - assert len(self.traj) == len(self.molecules) - - def test_to_from_dict(self): - d = self.traj.as_dict() - traj = MoleculeOptimizeTrajectory.from_dict(d) - assert isinstance(traj, MoleculeOptimizeTrajectory) - - if __name__ == "__main__": import unittest diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 19c70561794..e5563a2c5b7 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -9,7 +9,7 @@ import warnings from fnmatch import fnmatch from pathlib import Path -from typing import Any, Dict, List, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union import numpy as np from monty.io import zopen @@ -29,7 +29,7 @@ class Trajectory(MSONable): """ - Trajectory of a relaxation or molecular dynamics simulation. + Trajectory of a geometry optimization or molecular dynamics simulation. Provides basic functions such as slicing trajectory, combining trajectories, and obtaining displacements. @@ -37,9 +37,12 @@ class Trajectory(MSONable): def __init__( self, - lattice: Lattice | Matrix3D | list[Lattice] | list[Matrix3D] | np.ndarray, species: list[str | Element | Species | DummySpecies | Composition], - frac_coords: list[list[Vector3D]] | np.ndarray, + coords: list[list[Vector3D]] | np.ndarray | list[np.ndarray], + use_molecule: bool = True, + charge: Optional[int | float] = None, + spin_multiplicity: Optional[int | float] = None, + lattice: Optional[Lattice | Matrix3D | list[Lattice] | list[Matrix3D] | np.ndarray] = None, *, site_properties: SitePropsType | None = None, frame_properties: list[dict] | None = None, @@ -53,13 +56,6 @@ def __init__( number of frames in the trajectory. Args: - lattice: shape (3, 3) or (M, 3, 3). Lattice of the structures in the - trajectory; should be used together with `constant_lattice`. - If `constant_lattice=True`, this should be a single lattice that is - common for all structures in the trajectory (e.g. in an NVT run). - If `constant_lattice=False`, this should be a list of lattices, - each for one structure in the trajectory (e.g. in an NPT run or a - relaxation that allows changing the cell size). species: shape (N,). List of species on each site. Can take in flexible input, including: i. A sequence of element / species specified either as string @@ -68,7 +64,21 @@ def __init__( ii. List of dict of elements/species and occupancies, e.g., [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of disordered structures. - frac_coords: shape (M, N, 3). fractional coordinates of the sites. + coords: shape (M, N, 3). fractional coordinates of the sites. + use_molecule: bool (default True). Is this trajectory based on Molecules + or based on Structures? + charge: int or float. Charge of the system. This is only used for Molecule-based + trajectories (use_molecule = True). + spin_multiplicity: int or float. Spin multiplicity of the system. This is only + used for Molecule-based trajectories (use_molecule = True). + lattice: shape (3, 3) or (M, 3, 3). Lattice of the structures in the + trajectory; should be used together with `constant_lattice`. + If `constant_lattice=True`, this should be a single lattice that is + common for all structures in the trajectory (e.g. in an NVT run). + If `constant_lattice=False`, this should be a list of lattices, + each for one structure in the trajectory (e.g. in an NPT run or a + relaxation that allows changing the cell size). This is only used for + Structure-based trajectories (use_molecule=False). site_properties: Properties associated with the sites. This should be a list of `M` dicts for a single dict. If a list of dicts, each provides the site properties for a frame. Each value in a dict should be a @@ -84,35 +94,59 @@ def __init__( providing the properties for a frame. For example, for a trajectory with `M=2`, the `frame_properties` can be [{'energy':1.0}, {'energy':2.0}]. constant_lattice: Whether the lattice changes during the simulation. - Should be used together with `lattice`. See usage there. + Should be used together with `lattice`. See usage there. This is only + used for Structure-based trajectories (use_molecule=False). time_step: Time step of MD simulation in femto-seconds. Should be `None` - for relaxation trajectory. - coords_are_displacement: Whether `frac_coords` are given in displacements - (True) or positions (False). Note, if this is `True`, `frac_coords` + for a trajectory representing a geometry optimization. + coords_are_displacement: Whether `coords` are given in displacements + (True) or positions (False). Note, if this is `True`, `coords` of a frame (say i) should be relative to the previous frame (i.e. i-1), but not relative to the `base_position`. base_positions: shape (N, 3). The starting positions of all atoms in the trajectory. Used to reconstruct positions when converting from displacements to positions. Only needs to be specified if `coords_are_displacement=True`. Defaults to the first index of - `frac_coords` when `coords_are_displacement=False`. - """ - if isinstance(lattice, Lattice): - lattice = lattice.matrix - elif isinstance(lattice, list) and isinstance(lattice[0], Lattice): - lattice = [x.matrix for x in lattice] # type: ignore - lattice = np.asarray(lattice) - - if not constant_lattice and lattice.shape == (3, 3): - self.lattice = np.tile(lattice, (len(frac_coords), 1, 1)) - warnings.warn( - "Get `constant_lattice=False`, but only get a single `lattice`. " - "Use this single `lattice` as the lattice for all frames." - ) + `coords` when `coords_are_displacement=False`. + """ + + self.use_molecule = use_molecule + + # First, sanity check that the necessary inputs have been provided + if self.use_molecule: + if charge is None: + raise ValueError("`charge` must be provided for a Molecule-based Trajectory!") + + self.charge = int(charge) + if spin_multiplicity is None: + self.spin_multiplicity = None + else: + self.spin_multiplicity = spin_multiplicity + + self.lattice = None + self.constant_lattice = None else: - self.lattice = lattice + if lattice is None: + raise ValueError("`lattice` must be provided for a Structure-based Trajectory!") - self.constant_lattice = constant_lattice + if isinstance(lattice, Lattice): + lattice = lattice.matrix + elif isinstance(lattice, list) and isinstance(lattice[0], Lattice): + lattice = [x.matrix for x in lattice] # type: ignore + lattice = np.asarray(lattice) + + if not constant_lattice and lattice.shape == (3, 3): + self.lattice = np.tile(lattice, (len(coords), 1, 1)) + warnings.warn( + "Get `constant_lattice=False`, but only get a single `lattice`. " + "Use this single `lattice` as the lattice for all frames." + ) + else: + self.lattice = lattice + + self.constant_lattice = constant_lattice + + self.charge = None + self.spin_multiplicity = None if coords_are_displacement: if base_positions is None: @@ -122,11 +156,11 @@ def __init__( ) self.base_positions = base_positions else: - self.base_positions = frac_coords[0] # type: ignore[assignment] + self.base_positions = coords[0] # type: ignore[assignment] self.coords_are_displacement = coords_are_displacement self.species = species - self.frac_coords = np.asarray(frac_coords) + self.coords = np.asarray(coords) self.time_step = time_step self._check_site_props(site_properties) @@ -145,28 +179,48 @@ def get_structure(self, i: int) -> Structure: Returns: A pymatgen Structure object. """ + if self.use_molecule: + raise TypeError("Cannot return `Structure` for `Molecule`-based" + "`Trajectory`! Use `get_molecule` instead!") + + return self[i] + + def get_molecule(self, i: int) -> Molecule: + """ + Get molecule at specified index. + + Args: + i: Index of structure. + + Returns: + A pymatgen Molecule object. + """ + if not self.use_molecule: + raise TypeError("Cannot return `Molecule` for `Structure`-based" + "`Trajectory`! Use `get_structure` instead!") + return self[i] def to_positions(self): """ Convert displacements between consecutive frames into positions. - `base_positions` and `frac_coords` should both be in fractional coords or + `base_positions` and `coords` should both be in fractional coords or absolute coords. This is the opposite operation of `to_displacements()`. """ if self.coords_are_displacement: - cumulative_displacements = np.cumsum(self.frac_coords, axis=0) + cumulative_displacements = np.cumsum(self.coords, axis=0) positions = self.base_positions + cumulative_displacements - self.frac_coords = positions + self.coords = positions self.coords_are_displacement = False def to_displacements(self): """ Converts positions of trajectory into displacements between consecutive frames. - `base_positions` and `frac_coords` should both be in fractional coords. Does + `base_positions` and `coords` should both be in fractional coords. Does not work for absolute coords because the atoms are to be wrapped into the simulation box. @@ -174,20 +228,21 @@ def to_displacements(self): """ if not self.coords_are_displacement: displacements = np.subtract( - self.frac_coords, - np.roll(self.frac_coords, 1, axis=0), + self.coords, + np.roll(self.coords, 1, axis=0), ) - displacements[0] = np.zeros(np.shape(self.frac_coords[0])) + displacements[0] = np.zeros(np.shape(self.coords[0])) - # Deal with PBC. - # For example - If in one frame an atom has fractional coordinates of - # [0, 0, 0.98] and in the next its coordinates are [0, 0, 0.01], this atom - # will have moved 0.03*c, but if we only subtract the positions, we would - # get a displacement vector of [0, 0, -0.97]. Therefore, we can correct for - # this by adding or subtracting 1 from the value. - displacements = [np.subtract(d, np.around(d)) for d in displacements] + if not self.use_molecule: + # Deal with PBC. + # For example - If in one frame an atom has fractional coordinates of + # [0, 0, 0.98] and in the next its coordinates are [0, 0, 0.01], this atom + # will have moved 0.03*c, but if we only subtract the positions, we would + # get a displacement vector of [0, 0, -0.97]. Therefore, we can correct for + # this by adding or subtracting 1 from the value. + displacements = [np.subtract(d, np.around(d)) for d in displacements] - self.frac_coords = displacements + self.coords = displacements self.coords_are_displacement = True def extend(self, trajectory: Trajectory): @@ -199,6 +254,14 @@ def extend(self, trajectory: Trajectory): Args: trajectory: Trajectory to append. """ + + # Cannot combine Molecule-based and Structure-based Trajectories + if self.use_molecule != trajectory.use_molecule: + raise ValueError( + "Cannot combine `Molecule`- and `Structure`-based `Trajectory`. " + "objects." + ) + if self.time_step != trajectory.time_step: raise ValueError( "Cannot extend trajectory. Time steps of the trajectories are " @@ -229,16 +292,17 @@ def extend(self, trajectory: Trajectory): len(trajectory), ) - self.lattice, self.constant_lattice = self._combine_lattice( - self.lattice, - trajectory.lattice, - len(self), - len(trajectory), - ) + if not self.use_molecule: + self.lattice, self.constant_lattice = self._combine_lattice( + self.lattice, + trajectory.lattice, + len(self), + len(trajectory), + ) # Note, this should be after the other self._combine... method calls, since # len(self) is used there. - self.frac_coords = np.concatenate((self.frac_coords, trajectory.frac_coords)) + self.coords = np.concatenate((self.coords, trajectory.coords)) def __iter__(self): """ @@ -251,14 +315,14 @@ def __len__(self): """ Number of frames in the trajectory. """ - return len(self.frac_coords) + return len(self.coords) - def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory: + def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | Trajectory: """ Get a subset of the trajectory. The output depends on the type of the input `frames`. If an int is given, return - a pymatgen Structure at the specified frame. If a list or a slice, return a new + a pymatgen Molecule or Structure at the specified frame. If a list or a slice, return a new trajectory with a subset of frames. Args: @@ -275,15 +339,25 @@ def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory if frames >= len(self): raise IndexError(f"Frame index {frames} out of range.") - lattice = self.lattice if self.constant_lattice else self.lattice[frames] + if self.use_molecule: + return Molecule( + self.species, + self.coords[frames], + charge=self.charge, + spin_multiplicity=self.spin_multiplicity, + site_properties=self._get_site_props(frames), # type: ignore + ) - return Structure( - Lattice(lattice), - self.species, - self.frac_coords[frames], - site_properties=self._get_site_props(frames), # type: ignore - to_unit_cell=True, - ) + else: + lattice = self.lattice if self.constant_lattice else self.lattice[frames] + + return Structure( + Lattice(lattice), + self.species, + self.coords[frames], + site_properties=self._get_site_props(frames), # type: ignore + to_unit_cell=True, + ) # For slice input, return a trajectory if isinstance(frames, (slice, list, np.ndarray)): @@ -298,25 +372,41 @@ def __getitem__(self, frames: int | slice | list[int]) -> Structure | Trajectory bad_frames = [i for i in frames if i > len(self)] raise IndexError(f"Frame index {bad_frames} out of range.") - lattice = self.lattice if self.constant_lattice else self.lattice[selected] - frac_coords = self.frac_coords[selected] - + coords = self.coords[selected] if self.frame_properties is not None: frame_properties = [self.frame_properties[i] for i in selected] else: frame_properties = None - return Trajectory( - lattice, - self.species, - frac_coords, - site_properties=self._get_site_props(selected), - frame_properties=frame_properties, - constant_lattice=self.constant_lattice, - time_step=self.time_step, - coords_are_displacement=False, - base_positions=self.base_positions, - ) + if self.use_molecule: + return Trajectory( + species=self.species, + coords=coords, + use_molecule=True, + charge=self.charge, + spin_multiplicity=self.spin_multiplicity, + site_properties=self._get_site_props(selected), + frame_properties=frame_properties, + time_step=self.time_step, + coords_are_displacement=False, + base_positions=self.base_positions, + ) + + else: + lattice = self.lattice if self.constant_lattice else self.lattice[selected] + + return Trajectory( + species=self.species, + coords=coords, + use_molecule=False, + lattice=lattice, + site_properties=self._get_site_props(selected), + frame_properties=frame_properties, + constant_lattice=self.constant_lattice, + time_step=self.time_step, + coords_are_displacement=False, + base_positions=self.base_positions + ) supported = [int, slice, list or np.ndarray] raise ValueError(f"Expect the type of frames be one of {supported}; {type(frames)}.") @@ -340,6 +430,9 @@ def write_Xdatcar( system: Description of system (e.g. 2D MoS2). significant_figures: Significant figures in the output file. """ + if self.use_molecule: + raise TypeError("`write_Xdatcar` can only be used with `Structure`-based `Trajectory` objects!") + # Ensure trajectory is in position form self.to_positions() @@ -353,7 +446,7 @@ def write_Xdatcar( syms = [site.specie.symbol for site in self[0]] n_atoms = [len(tuple(a[1])) for a in itertools.groupby(syms)] - for si, frac_coords in enumerate(self.frac_coords): + for si, coords in enumerate(self.coords): # Only print out the info block if if si == 0 or not self.constant_lattice: lines.extend([system, "1.0"]) @@ -368,8 +461,8 @@ def write_Xdatcar( lines.append(f"Direct configuration= {si + 1}") - for frac_coord, specie in zip(frac_coords, self.species): - coords = frac_coord + for coord, specie in zip(coords, self.species): + coords = coord line = f'{" ".join(format_str.format(c) for c in coords)} {specie}' lines.append(line) @@ -382,12 +475,21 @@ def as_dict(self) -> dict: """ Return the trajectory as a MSONAble dict. """ + + if self.lattice is not None: + lat = self.lattice.tolist() + else: + lat = None + return { "@module": type(self).__module__, "@class": type(self).__name__, - "lattice": self.lattice.tolist(), "species": self.species, - "frac_coords": self.frac_coords.tolist(), + "coords": self.coords.tolist(), + "use_molecule": self.use_molecule, + "charge": self.charge, + "spin_multiplicity": self.spin_multiplicity, + "lattice": lat, "site_properties": self.site_properties, "frame_properties": self.frame_properties, "constant_lattice": self.constant_lattice, @@ -422,18 +524,51 @@ def from_structures( lattice = np.array([structure.lattice.matrix for structure in structures]) species = structures[0].species - frac_coords = [structure.frac_coords for structure in structures] + coords = [structure.frac_coords for structure in structures] site_properties = [structure.site_properties for structure in structures] return cls( - lattice, - species, # type: ignore - frac_coords, + species=species, # type: ignore + coords=coords, + use_molecule=False, + lattice=lattice, site_properties=site_properties, # type: ignore constant_lattice=constant_lattice, **kwargs, ) + @classmethod + def from_molecules( + cls, + molecules: list[Molecule], + **kwargs, + ) -> Trajectory: + """ + Create trajectory from a list of molecules. + + Note: Assumes no atoms removed during simulation. + + Args: + molecules: pymatgen Molecules objects. + + Returns: + A trajectory from the structures. + """ + + species = molecules[0].species + coords = [mol.cart_coords for mol in molecules] + site_properties = [mol.site_properties for mol in molecules] + + return cls( + species=species, # type: ignore + coords=coords, + use_molecule=True, + charge=int(molecules[0].charge), + spin_multiplicity=int(molecules[0].spin_multiplicity), + site_properties=site_properties, # type: ignore + **kwargs, + ) + @classmethod def from_file( cls, @@ -537,403 +672,6 @@ def _combine_frame_props(prop1: list[dict] | None, prop2: list[dict] | None, len return list(prop1) + [None] * len2 # type: ignore return list(prop1) + list(prop2) # type:ignore - def _check_site_props(self, site_props: SitePropsType | None): - """ - Check data shape of site properties. - """ - if site_props is None: - return - - if isinstance(site_props, dict): - site_props = [site_props] - else: - assert len(site_props) == len( - self - ), f"Size of the site properties {len(site_props)} does not equal to the number of frames {len(self)}." - - num_sites = len(self.frac_coords[0]) - for d in site_props: - for k, v in d.items(): - assert len(v) == num_sites, ( - f"Size of site property {k} {len(v)}) does not equal to the " - f"number of sites in the structure {num_sites}." - ) - - def _check_frame_props(self, frame_props: list[dict] | None): - """ - Check data shape of site properties. - """ - if frame_props is None: - return - - assert len(frame_props) == len( - self - ), f"Size of the frame properties {len(frame_props)} does not equal to the number of frames {len(self)}." - - def _get_site_props(self, frames: int | list[int]) -> SitePropsType | None: - """ - Slice site properties. - """ - if self.site_properties is None: - return None - if isinstance(self.site_properties, dict): - return self.site_properties - if isinstance(self.site_properties, list): - if isinstance(frames, int): - return self.site_properties[frames] - if isinstance(frames, list): - return [self.site_properties[i] for i in frames] - raise ValueError("Unexpected frames type.") - raise ValueError("Unexpected site_properties type.") - - -class MoleculeOptimizeTrajectory(MSONable): - """ - Trajectory of a geometry optimization for a system without a lattice (namely, a Molecule object). - - Provides basic functions such as slicing trajectory, combining trajectories, and - obtaining displacements. - """ - - def __init__( - self, - species: list[str | Element | Species | DummySpecies | Composition], - coords: list[list[Vector3D]] | np.ndarray | list[np.ndarray], - charge: int | float, - spin_multiplicity: int | float, - *, - site_properties: SitePropsType | None = None, - frame_properties: list[dict] | None = None, - constant_lattice: bool = True, - time_step: int | float | None = None, - coords_are_displacement: bool = False, - base_positions: list[list[Vector3D]] | np.ndarray | None = None, - ): - """ - In below, `N` denotes the number of sites in the Molecule, and `M` denotes the - number of frames in the trajectory. - - Args: - species: shape (N,). List of species on each site. Can take in flexible - input, including: - i. A sequence of element / species specified either as string - symbols, e.g. ["Li", "Fe2+", "P", ...] or atomic numbers, - e.g., (3, 56, ...) or actual Element or Species objects. - ii. List of dict of elements/species and occupancies, e.g., - [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of - disordered structures. - coords: shape (M, N, 3). coordinates of the sites. - site_properties: Properties associated with the sites. This should be a - list of `M` dicts for a single dict. If a list of dicts, each provides - the site properties for a frame. Each value in a dict should be a - sequence of length `N`, giving the properties of the `N` sites. - For example, for a trajectory with `M=2` and `N=4`, the - `site_properties` can be: [{"magmom":[5,5,5,5]}, {"magmom":[5,5,5,5]}]. - If a single dict, the site properties in the dict apply to all frames - in the trajectory. For example, for a trajectory with `M=2` and `N=4`, - {"magmom":[2,2,2,2]} means that, through the entire trajectory, - the magmom are kept constant at 2 for all four atoms. - frame_properties: Properties associated with the structure (e.g. total - energy). This should be a sequence of `M` dicts, with each dict - providing the properties for a frame. For example, for a trajectory with - `M=2`, the `frame_properties` can be [{'energy':1.0}, {'energy':2.0}]. - time_step: Time step of MD simulation in femto-seconds. Should be `None` - for a geometry optimization. - coords_are_displacement: Whether `coords` are given in displacements - (True) or positions (False). Note, if this is `True`, `coords` - of a frame (say i) should be relative to the previous frame (i.e. - i-1), but not relative to the `base_position`. - base_positions: shape (N, 3). The starting positions of all atoms in the - trajectory. Used to reconstruct positions when converting from - displacements to positions. Only needs to be specified if - `coords_are_displacement=True`. Defaults to the first index of - `coords` when `coords_are_displacement=False`. - """ - - if coords_are_displacement: - if base_positions is None: - warnings.warn( - "Without providing an array of starting positions, the positions " - "for each time step will not be available." - ) - self.base_positions = base_positions - else: - self.base_positions = coords[0] # type: ignore[assignment] - self.coords_are_displacement = coords_are_displacement - - self.species = species - self.coords = np.asarray(coords) - self.charge = int(charge) - self.spin_multiplicity = int(spin_multiplicity) - self.time_step = time_step - - self._check_site_props(site_properties) - self.site_properties = site_properties - - self._check_frame_props(frame_properties) - self.frame_properties = frame_properties - - def get_molecule(self, i: int) -> Molecule: - """ - Get molecule at specified index. - - Args: - i: Index of structure. - - Returns: - A pymatgen Molecule object. - """ - return self[i] - - def to_positions(self): - """ - Convert displacements between consecutive frames into positions. - - This is the opposite operation of `to_displacements()`. - """ - if self.coords_are_displacement: - cumulative_displacements = np.cumsum(self.coords, axis=0) - positions = self.base_positions + cumulative_displacements - self.coords = positions - self.coords_are_displacement = False - - def to_displacements(self): - """ - Converts positions of trajectory into displacements between consecutive frames. - - This is the opposite operation of `to_positions()`. - """ - if not self.coords_are_displacement: - displacements = np.subtract( - self.coords, - np.roll(self.coords, 1, axis=0), - ) - displacements[0] = np.zeros(np.shape(self.coords[0])) - - self.coords = displacements - self.coords_are_displacement = True - - def extend(self, trajectory: MoleculeOptimizeTrajectory): - """ - Append a trajectory to the current one. - - The coords and all other properties are combined. - - Args: - trajectory: MoleculeOptimizeTrajectory to append. - """ - if self.time_step != trajectory.time_step: - raise ValueError( - "Cannot extend trajectory. Time steps of the trajectories are " - f"incompatible: {self.time_step} and {trajectory.time_step}." - ) - - if self.species != trajectory.species: - raise ValueError( - "Cannot extend trajectory. Species in the trajectories are " - f"incompatible: {self.species} and {trajectory.species}." - ) - - # Ensure both trajectories are in positions before combining - self.to_positions() - trajectory.to_positions() - - self.site_properties = self._combine_site_props( - self.site_properties, - trajectory.site_properties, - len(self), - len(trajectory), - ) - - self.frame_properties = self._combine_frame_props( - self.frame_properties, - trajectory.frame_properties, - len(self), - len(trajectory), - ) - - # Note, this should be after the other self._combine... method calls, since - # len(self) is used there. - self.coords = np.concatenate((self.coords, trajectory.coords)) - - def __iter__(self): - """ - Iterator of the trajectory, yielding a pymatgen structure for each frame. - """ - for i in range(len(self)): - yield self[i] - - def __len__(self): - """ - Number of frames in the trajectory. - """ - return len(self.coords) - - def __getitem__(self, frames: int | slice | list[int]) -> Molecule | MoleculeOptimizeTrajectory: - """ - Get a subset of the trajectory. - - The output depends on the type of the input `frames`. If an int is given, return - a pymatgen Molecule at the specified frame. If a list or a slice, return a new - trajectory with a subset of frames. - - Args: - frames: Indices of the trajectory to return. - - Return: - Subset of trajectory - """ - # Convert to position mode if not ready - self.to_positions() - - # For integer input, return the structure at that frame - if isinstance(frames, int): - if frames >= len(self): - raise IndexError(f"Frame index {frames} out of range.") - - return Molecule( - self.species, - self.coords[frames], - charge=self.charge, - spin_multiplicity=self.spin_multiplicity, - site_properties=self._get_site_props(frames), # type: ignore - ) - - # For slice input, return a trajectory - if isinstance(frames, (slice, list, np.ndarray)): - if isinstance(frames, slice): - start, stop, step = frames.indices(len(self)) - selected = list(range(start, stop, step)) - else: - # Get rid of frames that exceed trajectory length - selected = [i for i in frames if i < len(self)] - - if len(selected) < len(frames): - bad_frames = [i for i in frames if i > len(self)] - raise IndexError(f"Frame index {bad_frames} out of range.") - - coords = self.coords[selected] - - if self.frame_properties is not None: - frame_properties = [self.frame_properties[i] for i in selected] - else: - frame_properties = None - - return MoleculeOptimizeTrajectory( - self.species, - coords, - self.charge, - self.spin_multiplicity, - site_properties=self._get_site_props(selected), - frame_properties=frame_properties, - time_step=self.time_step, - coords_are_displacement=False, - base_positions=self.base_positions, - ) - - supported = [int, slice, list or np.ndarray] - raise ValueError(f"Expect the type of frames be one of {supported}; {type(frames)}.") - - def as_dict(self) -> dict: - """ - Return the trajectory as a MSONAble dict. - """ - return { - "@module": type(self).__module__, - "@class": type(self).__name__, - "species": self.species, - "coords": self.coords.tolist(), - "charge": self.charge, - "spin_multiplicity": self.spin_multiplicity, - "site_properties": self.site_properties, - "frame_properties": self.frame_properties, - "time_step": self.time_step, - "coords_are_displacement": self.coords_are_displacement, - "base_positions": self.base_positions, - } - - @classmethod - def from_molecules( - cls, - molecules: list[Molecule], - **kwargs, - ) -> Trajectory: - """ - Create trajectory from a list of molecules. - - Note: Assumes no atoms removed during simulation. - - Args: - molecules: pymatgen Molecules objects. - - Returns: - A trajectory from the structures. - """ - - species = molecules[0].species - coords = [mol.cart_coords for mol in molecules] - site_properties = [mol.site_properties for mol in molecules] - - return cls( - species, # type: ignore - coords, - molecules[0].charge, - molecules[0].spin_multiplicity, - site_properties=site_properties, # type: ignore - **kwargs, - ) - - @staticmethod - def _combine_site_props( - prop1: SitePropsType | None, prop2: SitePropsType | None, len1: int, len2: int - ) -> SitePropsType | None: - """ - Combine site properties. - - Either one of prop1 or prop2 can be None, dict, or a list of dict. All - possibilities of combining them are considered. - """ - # special cases - - if prop1 is None and prop2 is None: - return None - - if isinstance(prop1, dict) and prop1 == prop2: - return prop1 - - # general case - - assert prop1 is None or isinstance(prop1, (list, dict)) - assert prop2 is None or isinstance(prop2, (list, dict)) - - p1_candidates = { - "NoneType": [None] * len1, - "dict": [prop1] * len1, - "list": prop1, - } - p2_candidates = { - "NoneType": [None] * len2, - "dict": [prop2] * len2, - "list": prop2, - } - p1_selected: list = p1_candidates[type(prop1).__name__] # type: ignore - p2_selected: list = p2_candidates[type(prop2).__name__] # type: ignore - - return p1_selected + p2_selected - - @staticmethod - def _combine_frame_props(prop1: list[dict] | None, prop2: list[dict] | None, len1: int, len2: int) -> list | None: - """ - Combine frame properties. - """ - if prop1 is None and prop2 is None: - return None - if prop1 is None: - return [None] * len1 + list(prop2) # type: ignore - if prop2 is None: - return list(prop1) + [None] * len2 # type: ignore - return list(prop1) + list(prop2) # type:ignore - def _check_site_props(self, site_props: SitePropsType | None): """ Check data shape of site properties. @@ -953,7 +691,7 @@ def _check_site_props(self, site_props: SitePropsType | None): for k, v in d.items(): assert len(v) == num_sites, ( f"Size of site property {k} {len(v)}) does not equal to the " - f"number of sites in the molecule {num_sites}." + f"number of sites in the structure {num_sites}." ) def _check_frame_props(self, frame_props: list[dict] | None): From 74862f4de8ffa17de61f117540b9dd710872763d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Apr 2023 02:01:57 +0000 Subject: [PATCH 19/26] pre-commit auto-fixes --- pymatgen/core/tests/test_trajectory.py | 65 ++++++-------------------- pymatgen/core/trajectory.py | 30 ++++++------ 2 files changed, 27 insertions(+), 68 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index 73551bebd0b..b9009b4febc 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -25,7 +25,7 @@ def setUp(self): species = last_mol.species coords = out.data["geometries"] - self.molecules = list() + self.molecules = [] for c in coords: mol = Molecule(species, c, charge=int(last_mol.charge), spin_multiplicity=int(last_mol.spin_multiplicity)) self.molecules.append(mol) @@ -35,13 +35,12 @@ def setUp(self): species=species, coords=coords, charge=int(last_mol.charge), - spin_multiplicity=int(last_mol.spin_multiplicity) + spin_multiplicity=int(last_mol.spin_multiplicity), ) def _check_traj_equality(self, traj_1, traj_2): - if not traj_1.use_molecule: - if not np.allclose(traj_1.lattice, traj_2.lattice): - return False + if not traj_1.use_molecule and not np.allclose(traj_1.lattice, traj_2.lattice): + return False if traj_1.species != traj_2.species: return False @@ -182,7 +181,7 @@ def test_site_properties(self): coords=coords, charge=charge, spin_multiplicity=spin, - site_properties=props + site_properties=props, ) # compare the overall site properties list @@ -216,7 +215,7 @@ def test_frame_properties(self): coords=coords, charge=charge, spin_multiplicity=spin, - frame_properties=props + frame_properties=props, ) # compare the overall site properties @@ -286,11 +285,7 @@ def test_extend(self): traj = copy.deepcopy(self.traj) incompatible_traj = Trajectory( - use_molecule=True, - species=species, - coords=coords, - charge=charge, - spin_multiplicity=spin + use_molecule=True, species=species, coords=coords, charge=charge, spin_multiplicity=spin ) incompatible_test_success = False try: @@ -328,36 +323,18 @@ def test_extend_site_props(self): ] traj_1 = Trajectory( - use_molecule=False, - lattice=lattice, - species=species, - coords=coords, - site_properties=props_1 + use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=props_1 ) traj_2 = Trajectory( - use_molecule=False, - lattice=lattice, - species=species, - coords=coords, - site_properties=props_2 + use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=props_2 ) traj_3 = Trajectory( - use_molecule=False, - lattice=lattice, - species=species, - coords=coords, - site_properties=props_3 + use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=props_3 ) - traj_4 = Trajectory( - use_molecule=False, - lattice=lattice, - species=species, - coords=coords, - site_properties=None - ) + traj_4 = Trajectory(use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=None) # const & const (both constant and the same site properties) traj_combined = copy.deepcopy(traj_1) @@ -429,31 +406,17 @@ def test_extend_frame_props(self): # energy only properties props_1 = [{"energy": e} for e in energy_1] traj_1 = Trajectory( - use_molecule=False, - lattice=lattice, - species=species, - coords=coords, - frame_properties=props_1 + use_molecule=False, lattice=lattice, species=species, coords=coords, frame_properties=props_1 ) # energy and pressure properties props_2 = [{"energy": e, "pressure": p} for e, p in zip(energy_2, pressure_2)] traj_2 = Trajectory( - use_molecule=False, - lattice=lattice, - species=species, - coords=coords, - frame_properties=props_2 + use_molecule=False, lattice=lattice, species=species, coords=coords, frame_properties=props_2 ) # no properties - traj_3 = Trajectory( - use_molecule=False, - lattice=lattice, - species=species, - coords=coords, - frame_properties=None - ) + traj_3 = Trajectory(use_molecule=False, lattice=lattice, species=species, coords=coords, frame_properties=None) # test combining two with different properties traj_combined = copy.deepcopy(traj_1) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 4b12e46d6a2..d1f1697adbd 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -9,7 +9,7 @@ import warnings from fnmatch import fnmatch from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, List, Sequence, Tuple, Union import numpy as np from monty.io import zopen @@ -40,9 +40,9 @@ def __init__( species: list[str | Element | Species | DummySpecies | Composition], coords: list[list[Vector3D]] | np.ndarray | list[np.ndarray], use_molecule: bool = True, - charge: Optional[int | float] = None, - spin_multiplicity: Optional[int | float] = None, - lattice: Optional[Lattice | Matrix3D | list[Lattice] | list[Matrix3D] | np.ndarray] = None, + charge: int | float | None = None, + spin_multiplicity: int | float | None = None, + lattice: Lattice | Matrix3D | list[Lattice] | list[Matrix3D] | np.ndarray | None = None, *, site_properties: SitePropsType | None = None, frame_properties: list[dict] | None = None, @@ -180,8 +180,9 @@ def get_structure(self, i: int) -> Structure: A pymatgen Structure object. """ if self.use_molecule: - raise TypeError("Cannot return `Structure` for `Molecule`-based" - "`Trajectory`! Use `get_molecule` instead!") + raise TypeError( + "Cannot return `Structure` for `Molecule`-based" "`Trajectory`! Use `get_molecule` instead!" + ) return self[i] @@ -196,8 +197,9 @@ def get_molecule(self, i: int) -> Molecule: A pymatgen Molecule object. """ if not self.use_molecule: - raise TypeError("Cannot return `Molecule` for `Structure`-based" - "`Trajectory`! Use `get_structure` instead!") + raise TypeError( + "Cannot return `Molecule` for `Structure`-based" "`Trajectory`! Use `get_structure` instead!" + ) return self[i] @@ -257,10 +259,7 @@ def extend(self, trajectory: Trajectory): # Cannot combine Molecule-based and Structure-based Trajectories if self.use_molecule != trajectory.use_molecule: - raise ValueError( - "Cannot combine `Molecule`- and `Structure`-based `Trajectory`. " - "objects." - ) + raise ValueError("Cannot combine `Molecule`- and `Structure`-based `Trajectory`. " "objects.") if self.time_step != trajectory.time_step: raise ValueError( @@ -405,7 +404,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | constant_lattice=self.constant_lattice, time_step=self.time_step, coords_are_displacement=False, - base_positions=self.base_positions + base_positions=self.base_positions, ) supported = [int, slice, list or np.ndarray] @@ -476,10 +475,7 @@ def as_dict(self) -> dict: Return the trajectory as a MSONable dict. """ - if self.lattice is not None: - lat = self.lattice.tolist() - else: - lat = None + lat = self.lattice.tolist() if self.lattice is not None else None return { "@module": type(self).__module__, From f73693b668cda1dbcf39e672651cb04faac9909b Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 19 Apr 2023 19:48:57 -0700 Subject: [PATCH 20/26] Trying to make mypy happy, like a crying child --- pymatgen/core/trajectory.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index d1f1697adbd..0c7897c3a97 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -46,7 +46,7 @@ def __init__( *, site_properties: SitePropsType | None = None, frame_properties: list[dict] | None = None, - constant_lattice: bool = True, + constant_lattice: Optional[bool] = True, time_step: int | float | None = None, coords_are_displacement: bool = False, base_positions: list[list[Vector3D]] | np.ndarray | None = None, @@ -111,6 +111,11 @@ def __init__( self.use_molecule = use_molecule + self.charge = None + self.spin_multiplicity = None + self.lattice = None + self.constant_lattice = None + # First, sanity check that the necessary inputs have been provided if self.use_molecule: if charge is None: @@ -121,9 +126,6 @@ def __init__( self.spin_multiplicity = None else: self.spin_multiplicity = spin_multiplicity - - self.lattice = None - self.constant_lattice = None else: if lattice is None: raise ValueError("`lattice` must be provided for a Structure-based Trajectory!") @@ -145,9 +147,6 @@ def __init__( self.constant_lattice = constant_lattice - self.charge = None - self.spin_multiplicity = None - if coords_are_displacement: if base_positions is None: warnings.warn( @@ -291,7 +290,7 @@ def extend(self, trajectory: Trajectory): len(trajectory), ) - if not self.use_molecule: + if not self.use_molecule and self.lattice is not None and trajectory.lattice is not None: self.lattice, self.constant_lattice = self._combine_lattice( self.lattice, trajectory.lattice, @@ -339,11 +338,15 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | raise IndexError(f"Frame index {frames} out of range.") if self.use_molecule: + if self.charge is not None: + charge = int(self.charge) + if self.spin_multiplicity is not None: + spin = int(self.spin_multiplicity) return Molecule( self.species, self.coords[frames], - charge=self.charge, - spin_multiplicity=self.spin_multiplicity, + charge=charge, + spin_multiplicity=spin, site_properties=self._get_site_props(frames), # type: ignore ) From 9e6a69eaa5c62ecdce885fc0e3471d789404a1b9 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 19 Apr 2023 19:53:02 -0700 Subject: [PATCH 21/26] And now ruff --- pymatgen/core/trajectory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 0c7897c3a97..d1456f1b205 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -9,7 +9,7 @@ import warnings from fnmatch import fnmatch from pathlib import Path -from typing import Any, Dict, List, Sequence, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union import numpy as np from monty.io import zopen From 9af72c25a2a4c83c5dbc5e720ac628cbaa7a430c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 20 Apr 2023 02:54:13 +0000 Subject: [PATCH 22/26] pre-commit auto-fixes --- pymatgen/core/trajectory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index d1456f1b205..03e0b31f234 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -9,7 +9,7 @@ import warnings from fnmatch import fnmatch from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Dict, List, Sequence, Tuple, Union import numpy as np from monty.io import zopen @@ -46,7 +46,7 @@ def __init__( *, site_properties: SitePropsType | None = None, frame_properties: list[dict] | None = None, - constant_lattice: Optional[bool] = True, + constant_lattice: bool | None = True, time_step: int | float | None = None, coords_are_displacement: bool = False, base_positions: list[list[Vector3D]] | np.ndarray | None = None, From 73bbeac9b90a332581fe69d0daeacf90594c0605 Mon Sep 17 00:00:00 2001 From: Evan Walter Clark Spotte-Smith Date: Wed, 19 Apr 2023 20:04:48 -0700 Subject: [PATCH 23/26] Please let it end --- pymatgen/core/trajectory.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 03e0b31f234..f4b7d11cb9a 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -337,7 +337,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | if frames >= len(self): raise IndexError(f"Frame index {frames} out of range.") - if self.use_molecule: + if self.use_molecule or self.lattice is None: if self.charge is not None: charge = int(self.charge) if self.spin_multiplicity is not None: @@ -351,7 +351,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | ) else: - lattice = self.lattice if self.constant_lattice else self.lattice[frames] + lattice = self.lattice if self.constant_lattice else self.lattice[frames] # type: ignore return Structure( Lattice(lattice), @@ -380,7 +380,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | else: frame_properties = None - if self.use_molecule: + if self.use_molecule or self.lattice is None: return Trajectory( species=self.species, coords=coords, @@ -395,7 +395,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | ) else: - lattice = self.lattice if self.constant_lattice else self.lattice[selected] + lattice = self.lattice if self.constant_lattice else self.lattice[selected] # type: ignore return Trajectory( species=self.species, @@ -453,7 +453,7 @@ def write_Xdatcar( if si == 0 or not self.constant_lattice: lines.extend([system, "1.0"]) - _lattice = self.lattice if self.constant_lattice else self.lattice[si] + _lattice = self.lattice if self.constant_lattice else self.lattice[si] # type: ignore for latt_vec in _lattice: lines.append(f'{" ".join(map(str, latt_vec))}') From 2ca75b8bc703b5f626148bf30aa77effa0b83896 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 22 Apr 2023 07:29:57 -0700 Subject: [PATCH 24/26] faff --- pymatgen/core/structure.py | 2 +- pymatgen/core/trajectory.py | 60 +++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/pymatgen/core/structure.py b/pymatgen/core/structure.py index 09be0dc2bb2..7c05322f291 100644 --- a/pymatgen/core/structure.py +++ b/pymatgen/core/structure.py @@ -1034,7 +1034,7 @@ def lattice(self) -> Lattice: @property def density(self) -> float: """ - Returns the density in units of g/cc + Returns the density in units of g/cm^3. """ m = Mass(self.composition.weight, "amu") return m.to("g") / (self.volume * Length(1, "ang").to("cm") ** 3) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index f4b7d11cb9a..477bbc4da2e 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -9,7 +9,7 @@ import warnings from fnmatch import fnmatch from pathlib import Path -from typing import Any, Dict, List, Sequence, Tuple, Union +from typing import Any, Dict, Iterator, List, Sequence, Tuple, Union import numpy as np from monty.io import zopen @@ -108,7 +108,6 @@ def __init__( `coords_are_displacement=True`. Defaults to the first index of `coords` when `coords_are_displacement=False`. """ - self.use_molecule = use_molecule self.charge = None @@ -168,12 +167,12 @@ def __init__( self._check_frame_props(frame_properties) self.frame_properties = frame_properties - def get_structure(self, i: int) -> Structure: + def get_structure(self, idx: int) -> Structure: """ Get structure at specified index. Args: - i: Index of structure. + idx: Index of structure. Returns: A pymatgen Structure object. @@ -183,14 +182,14 @@ def get_structure(self, i: int) -> Structure: "Cannot return `Structure` for `Molecule`-based" "`Trajectory`! Use `get_molecule` instead!" ) - return self[i] + return self[idx] - def get_molecule(self, i: int) -> Molecule: + def get_molecule(self, idx: int) -> Molecule: """ Get molecule at specified index. Args: - i: Index of structure. + idx: Index of molecule. Returns: A pymatgen Molecule object. @@ -200,9 +199,9 @@ def get_molecule(self, i: int) -> Molecule: "Cannot return `Molecule` for `Structure`-based" "`Trajectory`! Use `get_structure` instead!" ) - return self[i] + return self[idx] - def to_positions(self): + def to_positions(self) -> None: """ Convert displacements between consecutive frames into positions. @@ -217,7 +216,7 @@ def to_positions(self): self.coords = positions self.coords_are_displacement = False - def to_displacements(self): + def to_displacements(self) -> None: """ Converts positions of trajectory into displacements between consecutive frames. @@ -246,7 +245,7 @@ def to_displacements(self): self.coords = displacements self.coords_are_displacement = True - def extend(self, trajectory: Trajectory): + def extend(self, trajectory: Trajectory) -> None: """ Append a trajectory to the current one. @@ -255,7 +254,6 @@ def extend(self, trajectory: Trajectory): Args: trajectory: Trajectory to append. """ - # Cannot combine Molecule-based and Structure-based Trajectories if self.use_molecule != trajectory.use_molecule: raise ValueError("Cannot combine `Molecule`- and `Structure`-based `Trajectory`. " "objects.") @@ -302,14 +300,14 @@ def extend(self, trajectory: Trajectory): # len(self) is used there. self.coords = np.concatenate((self.coords, trajectory.coords)) - def __iter__(self): + def __iter__(self) -> Iterator[Structure | Molecule]: """ - Iterator of the trajectory, yielding a pymatgen structure for each frame. + Iterator of the trajectory, yielding a pymatgen Structure or Molecule for each frame. """ - for i in range(len(self)): - yield self[i] + for idx in range(len(self)): + yield self[idx] - def __len__(self): + def __len__(self) -> int: """ Number of frames in the trajectory. """ @@ -464,8 +462,7 @@ def write_Xdatcar( lines.append(f"Direct configuration= {si + 1}") for coord, specie in zip(coords, self.species): - coords = coord - line = f'{" ".join(format_str.format(c) for c in coords)} {specie}' + line = f'{" ".join(format_str.format(c) for c in coord)} {specie}' lines.append(line) xdatcar_string = "\n".join(lines) + "\n" @@ -477,7 +474,6 @@ def as_dict(self) -> dict: """ Return the trajectory as a MSONable dict. """ - lat = self.lattice.tolist() if self.lattice is not None else None return { @@ -553,7 +549,6 @@ def from_molecules( Returns: A trajectory from the structures. """ - species = molecules[0].species coords = [mol.cart_coords for mol in molecules] site_properties = [mol.site_properties for mol in molecules] @@ -671,9 +666,16 @@ def _combine_frame_props(prop1: list[dict] | None, prop2: list[dict] | None, len return list(prop1) + [None] * len2 # type: ignore return list(prop1) + list(prop2) # type:ignore - def _check_site_props(self, site_props: SitePropsType | None): + def _check_site_props(self, site_props: SitePropsType | None) -> None: """ Check data shape of site properties. + + Args: + site_props (dict | list[dict] | None): Returns immediately if None. + + Raises: + AssertionError: If the size of the site properties does not match + the number of sites in the structure. """ if site_props is None: return @@ -685,15 +687,15 @@ def _check_site_props(self, site_props: SitePropsType | None): self ), f"Size of the site properties {len(site_props)} does not equal to the number of frames {len(self)}." - num_sites = len(self.coords[0]) - for d in site_props: - for k, v in d.items(): - assert len(v) == num_sites, ( - f"Size of site property {k} {len(v)}) does not equal to the " - f"number of sites in the structure {num_sites}." + n_sites = len(self.coords[0]) + for dct in site_props: + for key, val in dct.items(): + assert len(val) == n_sites, ( + f"Size of site property {key} {len(val)}) does not equal to the " + f"number of sites in the structure {n_sites}." ) - def _check_frame_props(self, frame_props: list[dict] | None): + def _check_frame_props(self, frame_props: list[dict] | None) -> None: """ Check data shape of site properties. """ From 4c9e24c24caa99ac43e898722164ab7026e16575 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 22 Apr 2023 07:31:20 -0700 Subject: [PATCH 25/26] remove use_molecule kwarg can we rely on heuristic if lattice is None, we're handling molecules? --- pymatgen/core/tests/test_trajectory.py | 38 ++++++------------ pymatgen/core/trajectory.py | 53 ++++++++++++-------------- 2 files changed, 35 insertions(+), 56 deletions(-) diff --git a/pymatgen/core/tests/test_trajectory.py b/pymatgen/core/tests/test_trajectory.py index b9009b4febc..ed924b436a5 100644 --- a/pymatgen/core/tests/test_trajectory.py +++ b/pymatgen/core/tests/test_trajectory.py @@ -31,7 +31,6 @@ def setUp(self): self.molecules.append(mol) self.traj_mols = Trajectory( - use_molecule=True, species=species, coords=coords, charge=int(last_mol.charge), @@ -39,7 +38,7 @@ def setUp(self): ) def _check_traj_equality(self, traj_1, traj_2): - if not traj_1.use_molecule and not np.allclose(traj_1.lattice, traj_2.lattice): + if traj_1.lattice is not None and not np.allclose(traj_1.lattice, traj_2.lattice): return False if traj_1.species != traj_2.species: @@ -153,7 +152,7 @@ def test_site_properties(self): "magmom": [5, 5], }, ] - traj = Trajectory(use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=props) + traj = Trajectory(lattice=lattice, species=species, coords=coords, site_properties=props) # compare the overall site properties list assert traj.site_properties == props @@ -176,7 +175,6 @@ def test_site_properties(self): }, ] traj = Trajectory( - use_molecule=True, species=species, coords=coords, charge=charge, @@ -196,7 +194,7 @@ def test_frame_properties(self): props = [{"energy_per_atom": e} for e in [-3.0001, -3.0971, -3.0465]] - traj = Trajectory(use_molecule=False, lattice=lattice, species=species, coords=coords, frame_properties=props) + traj = Trajectory(lattice=lattice, species=species, coords=coords, frame_properties=props) # compare the overall site properties assert traj.frame_properties == props @@ -210,7 +208,6 @@ def test_frame_properties(self): props = [{"SCF_energy_in_the_final_basis_set": e} for e in [-113.3256885788, -113.3260019471, -113.326006415]] traj = Trajectory( - use_molecule=True, species=species, coords=coords, charge=charge, @@ -252,7 +249,6 @@ def test_extend(self): # Case of compatible trajectories compatible_traj = Trajectory( - use_molecule=True, species=traj.species, coords=[ [ @@ -284,9 +280,7 @@ def test_extend(self): species, coords, charge, spin = self._get_species_and_coords() traj = copy.deepcopy(self.traj) - incompatible_traj = Trajectory( - use_molecule=True, species=species, coords=coords, charge=charge, spin_multiplicity=spin - ) + incompatible_traj = Trajectory(species=species, coords=coords, charge=charge, spin_multiplicity=spin) incompatible_test_success = False try: traj.extend(incompatible_traj) @@ -322,19 +316,13 @@ def test_extend_site_props(self): }, ] - traj_1 = Trajectory( - use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=props_1 - ) + traj_1 = Trajectory(lattice=lattice, species=species, coords=coords, site_properties=props_1) - traj_2 = Trajectory( - use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=props_2 - ) + traj_2 = Trajectory(lattice=lattice, species=species, coords=coords, site_properties=props_2) - traj_3 = Trajectory( - use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=props_3 - ) + traj_3 = Trajectory(lattice=lattice, species=species, coords=coords, site_properties=props_3) - traj_4 = Trajectory(use_molecule=False, lattice=lattice, species=species, coords=coords, site_properties=None) + traj_4 = Trajectory(lattice=lattice, species=species, coords=coords, site_properties=None) # const & const (both constant and the same site properties) traj_combined = copy.deepcopy(traj_1) @@ -405,18 +393,14 @@ def test_extend_frame_props(self): # energy only properties props_1 = [{"energy": e} for e in energy_1] - traj_1 = Trajectory( - use_molecule=False, lattice=lattice, species=species, coords=coords, frame_properties=props_1 - ) + traj_1 = Trajectory(lattice=lattice, species=species, coords=coords, frame_properties=props_1) # energy and pressure properties props_2 = [{"energy": e, "pressure": p} for e, p in zip(energy_2, pressure_2)] - traj_2 = Trajectory( - use_molecule=False, lattice=lattice, species=species, coords=coords, frame_properties=props_2 - ) + traj_2 = Trajectory(lattice=lattice, species=species, coords=coords, frame_properties=props_2) # no properties - traj_3 = Trajectory(use_molecule=False, lattice=lattice, species=species, coords=coords, frame_properties=None) + traj_3 = Trajectory(lattice=lattice, species=species, coords=coords, frame_properties=None) # test combining two with different properties traj_combined = copy.deepcopy(traj_1) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 477bbc4da2e..920774a846d 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -39,7 +39,6 @@ def __init__( self, species: list[str | Element | Species | DummySpecies | Composition], coords: list[list[Vector3D]] | np.ndarray | list[np.ndarray], - use_molecule: bool = True, charge: int | float | None = None, spin_multiplicity: int | float | None = None, lattice: Lattice | Matrix3D | list[Lattice] | list[Matrix3D] | np.ndarray | None = None, @@ -65,12 +64,10 @@ def __init__( [{"Fe" : 0.5, "Mn":0.5}, ...]. This allows the setup of disordered structures. coords: shape (M, N, 3). fractional coordinates of the sites. - use_molecule: bool (default True). Is this trajectory based on Molecules - or based on Structures? charge: int or float. Charge of the system. This is only used for Molecule-based - trajectories (use_molecule = True). + trajectories. spin_multiplicity: int or float. Spin multiplicity of the system. This is only - used for Molecule-based trajectories (use_molecule = True). + used for Molecule-based trajectories. lattice: shape (3, 3) or (M, 3, 3). Lattice of the structures in the trajectory; should be used together with `constant_lattice`. If `constant_lattice=True`, this should be a single lattice that is @@ -78,7 +75,7 @@ def __init__( If `constant_lattice=False`, this should be a list of lattices, each for one structure in the trajectory (e.g. in an NPT run or a relaxation that allows changing the cell size). This is only used for - Structure-based trajectories (use_molecule=False). + Structure-based trajectories. site_properties: Properties associated with the sites. This should be a list of `M` dicts for a single dict. If a list of dicts, each provides the site properties for a frame. Each value in a dict should be a @@ -95,7 +92,7 @@ def __init__( `M=2`, the `frame_properties` can be [{'energy':1.0}, {'energy':2.0}]. constant_lattice: Whether the lattice changes during the simulation. Should be used together with `lattice`. See usage there. This is only - used for Structure-based trajectories (use_molecule=False). + used for Structure-based trajectories. time_step: Time step of MD simulation in femto-seconds. Should be `None` for a trajectory representing a geometry optimization. coords_are_displacement: Whether `coords` are given in displacements @@ -108,15 +105,13 @@ def __init__( `coords_are_displacement=True`. Defaults to the first index of `coords` when `coords_are_displacement=False`. """ - self.use_molecule = use_molecule - self.charge = None self.spin_multiplicity = None self.lattice = None self.constant_lattice = None # First, sanity check that the necessary inputs have been provided - if self.use_molecule: + if lattice is None: if charge is None: raise ValueError("`charge` must be provided for a Molecule-based Trajectory!") @@ -126,9 +121,6 @@ def __init__( else: self.spin_multiplicity = spin_multiplicity else: - if lattice is None: - raise ValueError("`lattice` must be provided for a Structure-based Trajectory!") - if isinstance(lattice, Lattice): lattice = lattice.matrix elif isinstance(lattice, list) and isinstance(lattice[0], Lattice): @@ -177,12 +169,13 @@ def get_structure(self, idx: int) -> Structure: Returns: A pymatgen Structure object. """ - if self.use_molecule: + struct = self[idx] + if isinstance(struct, Molecule): raise TypeError( "Cannot return `Structure` for `Molecule`-based" "`Trajectory`! Use `get_molecule` instead!" ) - return self[idx] + return struct def get_molecule(self, idx: int) -> Molecule: """ @@ -194,12 +187,13 @@ def get_molecule(self, idx: int) -> Molecule: Returns: A pymatgen Molecule object. """ - if not self.use_molecule: + mol = self[idx] + if isinstance(mol, Structure): raise TypeError( "Cannot return `Molecule` for `Structure`-based" "`Trajectory`! Use `get_structure` instead!" ) - return self[idx] + return mol def to_positions(self) -> None: """ @@ -233,7 +227,8 @@ def to_displacements(self) -> None: ) displacements[0] = np.zeros(np.shape(self.coords[0])) - if not self.use_molecule: + # check if the trajectory is periodic + if self.lattice is not None: # Deal with PBC. # For example - If in one frame an atom has fractional coordinates of # [0, 0, 0.98] and in the next its coordinates are [0, 0, 0.01], this atom @@ -254,8 +249,13 @@ def extend(self, trajectory: Trajectory) -> None: Args: trajectory: Trajectory to append. """ - # Cannot combine Molecule-based and Structure-based Trajectories - if self.use_molecule != trajectory.use_molecule: + # Cannot combine Molecule and Structure Trajectories + if ( + self.lattice is None # is molecules + and trajectory.lattice is not None # is structures + or self.lattice is not None # is structures + and trajectory.lattice is None # is molecules + ): raise ValueError("Cannot combine `Molecule`- and `Structure`-based `Trajectory`. " "objects.") if self.time_step != trajectory.time_step: @@ -288,7 +288,7 @@ def extend(self, trajectory: Trajectory) -> None: len(trajectory), ) - if not self.use_molecule and self.lattice is not None and trajectory.lattice is not None: + if self.lattice is not None and trajectory.lattice is not None: self.lattice, self.constant_lattice = self._combine_lattice( self.lattice, trajectory.lattice, @@ -335,7 +335,7 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | if frames >= len(self): raise IndexError(f"Frame index {frames} out of range.") - if self.use_molecule or self.lattice is None: + if self.lattice is None: if self.charge is not None: charge = int(self.charge) if self.spin_multiplicity is not None: @@ -378,11 +378,10 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | else: frame_properties = None - if self.use_molecule or self.lattice is None: + if self.lattice is None: return Trajectory( species=self.species, coords=coords, - use_molecule=True, charge=self.charge, spin_multiplicity=self.spin_multiplicity, site_properties=self._get_site_props(selected), @@ -398,7 +397,6 @@ def __getitem__(self, frames: int | slice | list[int]) -> Molecule | Structure | return Trajectory( species=self.species, coords=coords, - use_molecule=False, lattice=lattice, site_properties=self._get_site_props(selected), frame_properties=frame_properties, @@ -430,7 +428,7 @@ def write_Xdatcar( system: Description of system (e.g. 2D MoS2). significant_figures: Significant figures in the output file. """ - if self.use_molecule: + if self.lattice is None: raise TypeError("`write_Xdatcar` can only be used with `Structure`-based `Trajectory` objects!") # Ensure trajectory is in position form @@ -481,7 +479,6 @@ def as_dict(self) -> dict: "@class": type(self).__name__, "species": self.species, "coords": self.coords.tolist(), - "use_molecule": self.use_molecule, "charge": self.charge, "spin_multiplicity": self.spin_multiplicity, "lattice": lat, @@ -525,7 +522,6 @@ def from_structures( return cls( species=species, # type: ignore coords=coords, - use_molecule=False, lattice=lattice, site_properties=site_properties, # type: ignore constant_lattice=constant_lattice, @@ -556,7 +552,6 @@ def from_molecules( return cls( species=species, # type: ignore coords=coords, - use_molecule=True, charge=int(molecules[0].charge), spin_multiplicity=int(molecules[0].spin_multiplicity), site_properties=site_properties, # type: ignore From 176eb473fd7ab4e8ead62786fdcaa0a8257938a1 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 22 Apr 2023 07:44:25 -0700 Subject: [PATCH 26/26] fix mypy, refactor to_positions() and to_displacements() --- pymatgen/core/trajectory.py | 47 ++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/pymatgen/core/trajectory.py b/pymatgen/core/trajectory.py index 920774a846d..602368046bf 100644 --- a/pymatgen/core/trajectory.py +++ b/pymatgen/core/trajectory.py @@ -204,11 +204,12 @@ def to_positions(self) -> None: This is the opposite operation of `to_displacements()`. """ - if self.coords_are_displacement: - cumulative_displacements = np.cumsum(self.coords, axis=0) - positions = self.base_positions + cumulative_displacements - self.coords = positions - self.coords_are_displacement = False + if not self.coords_are_displacement: + return + cumulative_displacements = np.cumsum(self.coords, axis=0) + positions = self.base_positions + cumulative_displacements + self.coords = positions + self.coords_are_displacement = False def to_displacements(self) -> None: """ @@ -220,25 +221,23 @@ def to_displacements(self) -> None: This is the opposite operation of `to_positions()`. """ - if not self.coords_are_displacement: - displacements = np.subtract( - self.coords, - np.roll(self.coords, 1, axis=0), - ) - displacements[0] = np.zeros(np.shape(self.coords[0])) - - # check if the trajectory is periodic - if self.lattice is not None: - # Deal with PBC. - # For example - If in one frame an atom has fractional coordinates of - # [0, 0, 0.98] and in the next its coordinates are [0, 0, 0.01], this atom - # will have moved 0.03*c, but if we only subtract the positions, we would - # get a displacement vector of [0, 0, -0.97]. Therefore, we can correct for - # this by adding or subtracting 1 from the value. - displacements = [np.subtract(d, np.around(d)) for d in displacements] - - self.coords = displacements - self.coords_are_displacement = True + if self.coords_are_displacement: + return + displacements = np.subtract(self.coords, np.roll(self.coords, 1, axis=0)) + displacements[0] = np.zeros(np.shape(self.coords[0])) + + # check if the trajectory is periodic + if self.lattice is not None: + # Deal with PBC. + # For example - If in one frame an atom has fractional coordinates of + # [0, 0, 0.98] and in the next its coordinates are [0, 0, 0.01], this atom + # will have moved 0.03*c, but if we only subtract the positions, we would + # get a displacement vector of [0, 0, -0.97]. Therefore, we can correct for + # this by adding or subtracting 1 from the value. + displacements = np.subtract(displacements, np.around(displacements)) + + self.coords = displacements + self.coords_are_displacement = True def extend(self, trajectory: Trajectory) -> None: """