Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neighbour analysis for the entire trajectory #251

Merged
merged 38 commits into from
Jul 19, 2021
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
f7648be
:zap: Neighbour analysis for the entire trajectory
sudarsan-surendralal Jun 27, 2021
f8f2a95
pep8
sudarsan-surendralal Jun 27, 2021
bb8b781
:bug: Set the correct quantity
sudarsan-surendralal Jun 27, 2021
84c82ac
Reduce lines
sudarsan-surendralal Jun 27, 2021
2308729
Make the attributes available as properties
sudarsan-surendralal Jun 27, 2021
b6456c1
Move the functions to a new class `NeighborTraj`
sudarsan-surendralal Jun 27, 2021
aa5f24f
Implement functions
sudarsan-surendralal Jun 27, 2021
02611d1
Important fixes
sudarsan-surendralal Jun 28, 2021
6bc829b
Fix and data type conversion!
sudarsan-surendralal Jun 28, 2021
4e1cb35
Introducing tests
sudarsan-surendralal Jun 28, 2021
cf219f8
Improving docstrings
sudarsan-surendralal Jun 28, 2021
3341f0d
pep8
sudarsan-surendralal Jun 28, 2021
cc0ec61
More functions in the job class!
sudarsan-surendralal Jun 28, 2021
80cb83e
Updating tests
sudarsan-surendralal Jun 28, 2021
9614ef8
Add option to store and retrieve from HDF5 files
sudarsan-surendralal Jun 30, 2021
7462d71
Merge remote-tracking branch 'origin/master' into neighbors_traj
sudarsan-surendralal Jul 2, 2021
f349cf7
Derive class from DataContainer
sudarsan-surendralal Jul 2, 2021
13c5978
Use HasStructure for _get_neighbors instead of re-creating structures…
pmrv Jul 6, 2021
2e12c1e
Update pyiron_atomistics/atomistics/structure/neighbors.py
sudarsan-surendralal Jul 6, 2021
5f407a2
Merge remote-tracking branch 'origin/master' into neighbors_traj
sudarsan-surendralal Jul 7, 2021
3d9792e
Set table name and rely on base class for writing to hdf
sudarsan-surendralal Jul 7, 2021
e5cfc43
Define index variable
pmrv Jul 8, 2021
b278379
Fix typo
pmrv Jul 8, 2021
d9cc947
Merge remote-tracking branch 'origin/nj_has' into neighbors_traj
sudarsan-surendralal Jul 9, 2021
9acaa44
Implement `__getitem__` to clice trajectories
sudarsan-surendralal Jul 14, 2021
7c00690
Disable loading checks!
sudarsan-surendralal Jul 14, 2021
02413e0
cleanup!
sudarsan-surendralal Jul 15, 2021
ecad9c0
Update pyiron_atomistics/atomistics/structure/neighbors.py
sudarsan-surendralal Jul 16, 2021
4aa7856
Update pyiron_atomistics/atomistics/structure/neighbors.py
sudarsan-surendralal Jul 16, 2021
7f23b96
minor changes
sudarsan-surendralal Jul 16, 2021
ed5e4f5
Merge branch 'neighbors_traj' of https://github.com/pyiron/pyiron_ato…
sudarsan-surendralal Jul 16, 2021
c6b9fed
:bug: Fix constructor
sudarsan-surendralal Jul 19, 2021
ae725e5
Restoring some checks
sudarsan-surendralal Jul 19, 2021
1b74ecc
Refactoring class name and attributes (Marvin's suggestion)
sudarsan-surendralal Jul 19, 2021
62e39a4
Remove unused import
sudarsan-surendralal Jul 19, 2021
6d51b09
Merge remote-tracking branch 'origin/master' into neighbors_traj
sudarsan-surendralal Jul 19, 2021
853b376
Merge remote-tracking branch 'origin/master' into neighbors_traj
sudarsan-surendralal Jul 19, 2021
2d507db
Merging new neighbor function changes
sudarsan-surendralal Jul 19, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 86 additions & 4 deletions pyiron_atomistics/atomistics/job/atomistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@

import copy
import warnings
from functools import wraps

import numpy as np
from ase.io import write as ase_write

from pyiron_atomistics.atomistics.structure.atoms import Atoms
from pyiron_atomistics.atomistics.structure.neighbors import NeighborsTraj
from pyiron_atomistics.atomistics.structure.has_structure import HasStructure
from pyiron_base import GenericParameters, GenericMaster, GenericJob as GenericJobCore, deprecate

Expand Down Expand Up @@ -464,6 +462,8 @@ def trajectory(
snapshot_indices=None, overwrite_positions=None, overwrite_cells=None
):
"""
Returns a `Trajectory` instance containing the necessary information to describe the evolution of the atomic
structure during the atomistic simulation.

Args:
stride (int): The trajectories are generated with every 'stride' steps
Expand All @@ -477,7 +477,7 @@ def trajectory(
have the same length of `overwrite_positions`

Returns:
pyiron.atomistics.job.atomistic.Trajectory: Trajectory instance
pyiron_atomistics.atomistics.job.atomistic.Trajectory: Trajectory instance

"""
cells = self.output.cells
Expand Down Expand Up @@ -598,6 +598,43 @@ def write_traj(
**kwargs
)

def get_neighbors_snapshots(self, snapshot_indices=None, num_neighbors=12, **kwargs):
pmrv marked this conversation as resolved.
Show resolved Hide resolved
"""
Get the neighbors only for the required snapshots from the trajectory

Args:
snapshot_indices (list/numpy.ndarray): Snapshots for which the the neighbors need to be computed
(eg. [1, 5, 10,..., 100]
num_neighbors (int): The cutoff for the number of neighbors
**kwargs (dict): Additional arguments to be passed to the `get_neighbors()` routine
(eg. cutoff_radius, norm_order , etc.)

Returns:
pyiron_atomistics.atomistics.structure.neighbors.NeighborsTraj: `NeighborsTraj` instances
containing the neighbor information.
"""
return self.trajectory().get_neighbors_snapshots(snapshot_indices=snapshot_indices,
num_neighbors=num_neighbors, **kwargs)

def get_neighbors(self, start=0, stop=-1, stride=1, num_neighbors=12, **kwargs):
"""
Get the neighbors for a given section of the trajectory

Args:
start (int): Start point of the slice of the trajectory to be sampled
stop (int): End point of of the slice of the trajectory to be sampled
stride (int): Samples the snapshots evert `stride` steps
num_neighbors (int): The cutoff for the number of neighbors
**kwargs (dict): Additional arguments to be passed to the `get_neighbors()` routine
(eg. cutoff_radius, norm_order , etc.)

Returns:
pyiron_atomistics.atomistics.structure.neighbors.NeighborsTraj: `NeighborsTraj` instances
containing the neighbor information.
"""
return self.trajectory().get_neighbors(start=start, stop=stop, stride=stride,
num_neighbors=num_neighbors, **kwargs)

# Compatibility functions
@deprecate("Use get_structure()")
def get_final_structure(self):
Expand Down Expand Up @@ -744,6 +781,50 @@ def __getitem__(self, item):
def __len__(self):
return len(self._positions)

def get_neighbors_snapshots(self, snapshot_indices=None, num_neighbors=12, **kwargs):
"""
Get the neighbors only for the required snapshots from the trajectory

Args:
snapshot_indices (list/numpy.ndarray): Snapshots for which the the neighbors need to be computed
(eg. [1, 5, 10,..., 100]
num_neighbors (int): The cutoff for the number of neighbors
**kwargs (dict): Additional arguments to be passed to the `get_neighbors()` routine
(eg. cutoff_radius, norm_order , etc.)

Returns:
pyiron_atomistics.atomistics.structure.neighbors.NeighborsTraj: `NeighborsTraj` instances
containing the neighbor information.
"""
if snapshot_indices is None:
snapshot_indices = np.arange(len(self), dtype=int)

n_obj = NeighborsTraj(init_structure=self._structure,
positions=self._positions[snapshot_indices],
cells=self._cells[snapshot_indices],
num_neighbors=num_neighbors, **kwargs)
n_obj.compute_neighbors()
return n_obj

def get_neighbors(self, start=0, stop=-1, stride=1, num_neighbors=12, **kwargs):
"""
Get the neighbors for a given section of the trajectory

Args:
start (int): Start point of the slice of the trajectory to be sampled
stop (int): End point of of the slice of the trajectory to be sampled
stride (int): Samples the snapshots evert `stride` steps
num_neighbors (int): The cutoff for the number of neighbors
**kwargs (dict): Additional arguments to be passed to the `get_neighbors()` routine
(eg. cutoff_radius, norm_order , etc.)

Returns:
pyiron_atomistics.atomistics.structure.neighbors.NeighborsTraj: `NeighborsTraj` instances
containing the neighbor information.
"""
snapshot_indices = np.arange(len(self))[start:stop:stride]
return self.get_neighbors_snapshots(snapshot_indices=snapshot_indices, num_neighbors=num_neighbors, **kwargs)


class GenericInput(GenericParameters):
def __init__(self, input_file_name=None, table_name="generic"):
Expand Down Expand Up @@ -889,3 +970,4 @@ def __dir__(self):
return hdf5_path.list_nodes()
else:
return []

144 changes: 144 additions & 0 deletions pyiron_atomistics/atomistics/structure/neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1047,3 +1047,147 @@ def get_cluster(dist_vec, ind_vec, prec=prec):
ind_shell.append(ia_shells_dict)
return ind_shell


class NeighborsTraj:

"""
This class generates the neighbors for a given atomistic trajectory. The assumption here is that the trajectory is
canonical (no change in the number and types of species throughout the trajectory). The resulting indices,
distances, and vectors are stored as numpy arrays.

"""

sudarsan-surendralal marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, init_structure=None, positions=None, cells=None, num_neighbors=12, **kwargs):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it'd be extremely useful for this to take any object that implements HasStructure instead of passing all of these things separately. Calling get_structure every step might be a bit slower, but since you're doing essentially what AtomisticGenericJob.get_structure does anyway, it shouldn't be that much.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it turns out much slower, then I'd say we add a method roughly like this

@classmethod
def from_structures(cls, has_structure):
  structs = list(has_structure.iter_structures())
  return cls(structs[0], [s.positions for s in structs], [s.cell for s in structs])

to NeighborTraj to achieve the same generality, but offer a small escape hatch for cases where we know get_structure is too slow.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it turns out much slower, then I'd say we add a method roughly like this

@classmethod
def from_structures(cls, has_structure):
  structs = list(has_structure.iter_structures())
  return cls(structs[0], [s.positions for s in structs], [s.cell for s in structs])

to NeighborTraj to achieve the same generality, but offer a small escape hatch for cases where we know get_structure is too slow.

I think this is a great way to generalize this for any derivative of HasStructure. However, I don't think building the positions and cells from the individual structures is not that efficient.

So how about something like this:

def __init__(self, has_structure=None, init_struct=None, positions=None, cells=None):
    if has_structure is None:
        if positions is None or init_struct is None:
            raise ValueError()

and then

def _get_neighbors_hs(has_struct, num_neighbors=20, **kwargs):
    [struct.get_neighbors() for struct in has_struct.iter_structures()]

which gets called instead of _get_neighbors

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should simply give it a try to use get_structure() everywhere and see if it really is slower. Checking Trajectory._get_structure and your code in _get_neighbors() looks essentially the same to me, so I don't expect a big hit.

If you don't want to do it, I can give it a go tonight.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also __init__ needs to accept an init argument as the first argument and pass it to super().__init__(), otherwise recursive loading from HDF will not work.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should simply give it a try to use get_structure() everywhere and see if it really is slower. Checking Trajectory._get_structure and your code in _get_neighbors() looks essentially the same to me, so I don't expect a big hit.

If you don't want to do it, I can give it a go tonight.

Sure I'll merge your PR into this one and see what happens!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also __init__ needs to accept an init argument as the first argument and pass it to super().__init__(), otherwise recursive loading from HDF will not work.

Not sure what you mean by this. Could you clarify?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also __init__ needs to accept an init argument as the first argument and pass it to super().__init__(), otherwise recursive loading from HDF will not work.

Not sure what you mean by this. Could you clarify?

When you derive from DataContainer the new __init__ method must be compatible to the original one. One of the ways DataContainer is instantiated is by passing a collection to initialize it, like so

d = DataContainer({'a': 1, 'b': 3, 2: 42})
assert d[1] == 3

Because this is used by the DataContainer internally when loading from hdf, subclasses also need to offer this. There's a little bit more explanation here in the attention box.

"""

Args:
init_structure (pyiron_atomistics.atomistics.structure.atoms.Atoms): Any given structure of the trajectory
positions (numpy.ndarray): The cartesian positions of the trajectories
cells (numpy.ndarray/None): The varying cell shapes
num_neighbors (int): The cutoff for the number of neighbors
**kwargs (dict): Additional arguments to be passed to the `get_neighbors()` routine
(eg. cutoff_radius, norm_order , etc.)
"""
self._init_structure = init_structure
self._neighbor_indices = None
self._neighbor_distances = None
self._neighbor_vectors = None
self._positions = positions
self._cells = cells
self._num_neighbors = num_neighbors
self._get_neighbors_kwargs = kwargs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For __init__ to be compatible a change like this would be necessary. When init is given the other parameters are not set, because presumably they are set in the init read from HDF5. If you want to be complete, you can check that init really contains all the attributes that __init__ normally sets and set those manually that are not in `init.

Suggested change
def __init__(self, init_structure=None, positions=None, cells=None, num_neighbors=12, **kwargs):
"""
Args:
init_structure (pyiron_atomistics.atomistics.structure.atoms.Atoms): Any given structure of the trajectory
positions (numpy.ndarray): The cartesian positions of the trajectories
cells (numpy.ndarray/None): The varying cell shapes
num_neighbors (int): The cutoff for the number of neighbors
**kwargs (dict): Additional arguments to be passed to the `get_neighbors()` routine
(eg. cutoff_radius, norm_order , etc.)
"""
self._init_structure = init_structure
self._neighbor_indices = None
self._neighbor_distances = None
self._neighbor_vectors = None
self._positions = positions
self._cells = cells
self._num_neighbors = num_neighbors
self._get_neighbors_kwargs = kwargs
def __init__(self, init=None, init_structure=None, positions=None, cells=None, num_neighbors=12, **kwargs):
"""
Args:
init_structure (pyiron_atomistics.atomistics.structure.atoms.Atoms): Any given structure of the trajectory
positions (numpy.ndarray): The cartesian positions of the trajectories
cells (numpy.ndarray/None): The varying cell shapes
num_neighbors (int): The cutoff for the number of neighbors
**kwargs (dict): Additional arguments to be passed to the `get_neighbors()` routine
(eg. cutoff_radius, norm_order , etc.)
"""
super().__init__(init=init)
if init is None:
self._init_structure = init_structure
self._neighbor_indices = None
self._neighbor_distances = None
self._neighbor_vectors = None
self._positions = positions
self._cells = cells
self._num_neighbors = num_neighbors
self._get_neighbors_kwargs = kwargs


@property
def neighbor_indices(self):
"""
Neighbour indices (excluding itself) of each atom computed using the get_neighbors_traj() method

Returns:

numpy.ndarray: An array of dimension N_steps / stride x N_atoms x N_neighbors

"""
return self._neighbor_indices

@property
def neighbor_distances(self):
"""
Neighbour distances (excluding itself) of each atom computed using the get_neighbors_traj() method

Returns:

numpy.ndarray: An array of dimension N_steps / stride x N_atoms x N_neighbors

"""
return self._neighbor_distances

@property
def neighbor_vectors(self):
"""
Neighbour vectors (excluding itself) of each atom computed using the get_neighbors_traj() method

Returns:

numpy.ndarray: An array of dimension N_steps / stride x N_atoms x N_neighbors x 3

"""
return self._neighbor_vectors

@property
def num_neighbors(self):
"""
The maximum number of neighbors to be computed

Returns:

int: The max number of neighbors
"""
return self._num_neighbors

def compute_neighbors(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the only purpose of this class is to call this method, can we just call it in __init__ automatically (when it is not loaded from HDF)?

"""
Compute the neighbors across the trajectory
"""
self._neighbor_indices, self._neighbor_distances, self._neighbor_vectors = \
_get_neighbors(self._init_structure,
positions=self._positions,
cells=self._cells,
num_neighbors=self._num_neighbors, **self._get_neighbors_kwargs)

def to_hdf(self, hdf, group_name="neighbors_traj"):
sudarsan-surendralal marked this conversation as resolved.
Show resolved Hide resolved
"""
Store this `NeighborTraj` instance in a .h5 file.

Args:
hdf (pyiron_base.generic.hdfio.FileHDFio): HDF5 group or file
group_name (str): Name of the HDF5 group

"""
with hdf.open(group_name) as h_g:
h_g["TYPE"] = str(type(self))
h_g["neighbor_vectors"] = self._neighbor_vectors
h_g["neighbor_distances"] = self._neighbor_distances
h_g["neighbor_indices"] = self._neighbor_indices
h_g["positions"] = self._positions
h_g["cells"] = self._cells
h_g["num_neighbors"] = self._num_neighbors
h_g["get_neighbors_kwargs"] = self._get_neighbors_kwargs
self._init_structure.to_hdf(h_g, group_name="structure")

def from_hdf(self, hdf, group_name="neighbors_traj"):
"""
Recreate the `NeighborTraj` instance stored in a .h5 file.

Args:
hdf (pyiron_base.generic.hdfio.FileHDFio): HDF5 group or file
group_name (str): Name of the HDF5 group

"""
with hdf.open(group_name) as h_g:
self._neighbor_vectors = h_g["neighbor_vectors"]
self._neighbor_distances = h_g["neighbor_distances"]
self._neighbor_indices = h_g["neighbor_indices"]
self._positions = h_g["positions"]
self._cells = h_g["cells"]
self._num_neighbors = h_g["num_neighbors"]
self._get_neighbors_kwargs = h_g["get_neighbors_kwargs"]
self._init_structure = h_g["structure"].to_object()


def _get_neighbors(struct, positions, cells=None, num_neighbors=20, **kwargs):
sudarsan-surendralal marked this conversation as resolved.
Show resolved Hide resolved
[n_steps, n_atoms, _] = positions.shape
indices, distances = [np.zeros((n_steps, n_atoms, num_neighbors)) for _ in range(2)]
vecs = np.zeros((n_steps, n_atoms, num_neighbors, 3))
struct = struct.copy()
for t, pos in enumerate(positions):
struct.positions = pos
# Do cell lookup only if its shape changes
if cells is not None:
struct.cell = cells[t]
# Change the `allow_ragged` based on the changes in get_neighbors()
neigh = struct.get_neighbors(num_neighbors=num_neighbors, allow_ragged=False, **kwargs)
indices[t, :, :] = neigh.indices
distances[t, :, :] = neigh.distances
vecs[t, :, :, :] = neigh.vecs
indices = np.array(indices, dtype=int)
return indices, distances, vecs
20 changes: 19 additions & 1 deletion tests/lammps/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,11 +404,29 @@ def test_dump_parser_water(self):
snapshot_indices=snap_indices)._positions,
orig_pos[snap_indices][:, atom_indices, :]))
with self.job_water_dump.project_hdf5.open("output/generic") as h_out:
h_out["cells"] = np.array([np.zeros((3, 3))] * len(h_out["positions"]))
h_out["cells"] = np.repeat([np.array(water.cell)], len(h_out["positions"]), axis=0)
self.assertTrue(np.array_equal(
self.job_water_dump.trajectory(atom_indices=atom_indices,
snapshot_indices=snap_indices)._positions,
orig_pos[snap_indices][:, atom_indices, :]))
neigh_traj_obj = self.job_water_dump.get_neighbors()
self.assertTrue(np.allclose(np.linalg.norm(neigh_traj_obj.neighbor_vectors, axis=-1),
neigh_traj_obj.neighbor_distances))
h_indices = self.job_water_dump.structure.select_index("H")
o_indices = self.job_water_dump.structure.select_index("O")
self.assertLessEqual(neigh_traj_obj.neighbor_distances[:, o_indices, :2].max(), 1.2)
self.assertGreaterEqual(neigh_traj_obj.neighbor_distances[:, o_indices, :2].min(), 0.8)
self.assertTrue(np.alltrue([np.in1d(np.unique(ind_mat.flatten()), h_indices) for ind_mat in
neigh_traj_obj.neighbor_indices[:, o_indices, :2]]))
neigh_traj_obj_snaps = self.job_water_dump.get_neighbors_snapshots(snapshot_indices=[2, 3, 4])
self.assertTrue(np.allclose(neigh_traj_obj.neighbor_vectors[2:], neigh_traj_obj_snaps.neighbor_vectors))
neigh_traj_obj.to_hdf(self.job_water_dump.project_hdf5)
neigh_traj_obj_loaded = self.job_water_dump["neighbors_traj"].to_object()
self.assertEqual(neigh_traj_obj._init_structure, neigh_traj_obj_loaded._init_structure)
self.assertEqual(neigh_traj_obj._num_neighbors, neigh_traj_obj_loaded._num_neighbors)
self.assertTrue(np.allclose(neigh_traj_obj.neighbor_indices, neigh_traj_obj_loaded.neighbor_indices))
self.assertTrue(np.allclose(neigh_traj_obj.neighbor_distances, neigh_traj_obj_loaded.neighbor_distances))
self.assertTrue(np.allclose(neigh_traj_obj.neighbor_vectors, neigh_traj_obj_loaded.neighbor_vectors))

def test_dump_parser(self):
structure = Atoms(
Expand Down