From 0d2d3b61abd22268a356ca8ea2390db4fe382ca1 Mon Sep 17 00:00:00 2001
From: Aliaksandr Yakutovich <yakutovicha@gmail.com>
Date: Wed, 31 Jan 2024 11:18:31 +0100
Subject: [PATCH 1/9] Fix: retrieve the last structure even if a calculation
 has failed (#205)

I also modified one of the tests to ensure this behaviour is checked.
---
 aiida_cp2k/parsers/__init__.py                | 60 +++++++++----------
 aiida_cp2k/utils/parser.py                    |  7 +--
 .../single_calculations/example_restart.py    | 33 +++++-----
 3 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/aiida_cp2k/parsers/__init__.py b/aiida_cp2k/parsers/__init__.py
index 26e133d..0da7111 100644
--- a/aiida_cp2k/parsers/__init__.py
+++ b/aiida_cp2k/parsers/__init__.py
@@ -6,23 +6,16 @@
 ###############################################################################
 """AiiDA-CP2K output parser."""
 
-from aiida.common import exceptions
-from aiida.engine import ExitCode
-from aiida.orm import Dict
+import ase
+from aiida import common, engine, orm, parsers, plugins
 
-# +
-from aiida.parsers import Parser
-from aiida.plugins import DataFactory
+from .. import utils
 
-from aiida_cp2k import utils
+StructureData = plugins.DataFactory("core.structure")
+BandsData = plugins.DataFactory("core.array.bands")
 
-# -
 
-StructureData = DataFactory("core.structure")
-BandsData = DataFactory("core.array.bands")
-
-
-class Cp2kBaseParser(Parser):
+class Cp2kBaseParser(parsers.Parser):
     """Basic AiiDA parser for the output of CP2K."""
 
     def parse(self, **kwargs):
@@ -30,23 +23,25 @@ def parse(self, **kwargs):
 
         try:
             _ = self.retrieved
-        except exceptions.NotExistent:
+        except common.NotExistent:
             return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER
 
         exit_code = self._parse_stdout()
-        if exit_code is not None:
-            return exit_code
 
+        # Even though the simpulation might have failed, we still want to parse the output structure.
         try:
-            returned = self._parse_trajectory()
-            if isinstance(returned, StructureData):
-                self.out("output_structure", returned)
-            else:  # in case this is an error code
-                return returned
-        except exceptions.NotExistent:
-            pass
+            last_structure = self._parse_final_structure()
+            if isinstance(last_structure, StructureData):
+                self.out("output_structure", last_structure)
+        except common.NotExistent:
+            last_structure = None
+            self.logger.warning("No Restart file found in the retrieved folder.")
 
-        return ExitCode(0)
+        if exit_code is not None:
+            return exit_code
+        if isinstance(last_structure, engine.ExitCode):
+            return last_structure
+        return engine.ExitCode(0)
 
     def _parse_stdout(self):
         """Basic CP2K output file parser."""
@@ -63,19 +58,16 @@ def _parse_stdout(self):
 
         # Parse the standard output.
         result_dict = utils.parse_cp2k_output(output_string)
-        self.out("output_parameters", Dict(dict=result_dict))
+        self.out("output_parameters", orm.Dict(dict=result_dict))
         return None
 
-    def _parse_trajectory(self):
+    def _parse_final_structure(self):
         """CP2K trajectory parser."""
-
-        from ase import Atoms
-
         fname = self.node.process_class._DEFAULT_RESTART_FILE_NAME
 
         # Check if the restart file is present.
         if fname not in self.retrieved.base.repository.list_object_names():
-            raise exceptions.NotExistent(
+            raise common.NotExistent(
                 "No restart file available, so the output trajectory can't be extracted"
             )
 
@@ -85,7 +77,9 @@ def _parse_trajectory(self):
         except OSError:
             return self.exit_codes.ERROR_OUTPUT_STDOUT_READ
 
-        return StructureData(ase=Atoms(**utils.parse_cp2k_trajectory(output_string)))
+        return StructureData(
+            ase=ase.Atoms(**utils.parse_cp2k_trajectory(output_string))
+        )
 
     def _check_stdout_for_errors(self, output_string):
         """This function checks the CP2K output file for some basic errors."""
@@ -169,7 +163,7 @@ def _parse_stdout(self):
             )
             self.out("output_bands", bnds)
 
-        self.out("output_parameters", Dict(dict=result_dict))
+        self.out("output_parameters", orm.Dict(dict=result_dict))
         return None
 
 
@@ -208,5 +202,5 @@ def _parse_stdout(self):
         except KeyError:
             pass
 
-        self.out("output_parameters", Dict(dict=result_dict))
+        self.out("output_parameters", orm.Dict(dict=result_dict))
         return None
diff --git a/aiida_cp2k/utils/parser.py b/aiida_cp2k/utils/parser.py
index 774f4f1..aca942b 100644
--- a/aiida_cp2k/utils/parser.py
+++ b/aiida_cp2k/utils/parser.py
@@ -9,6 +9,8 @@
 import math
 import re
 
+import numpy as np
+
 
 def parse_cp2k_output(fstring):
     """Parse CP2K output into a dictionary."""
@@ -323,8 +325,6 @@ def _parse_bands_cp2k_greater_81(lines, line_n):
 def _parse_bands(lines, n_start, cp2k_version):
     """Parse band structure from the CP2K output."""
 
-    import numpy as np
-
     kpoints = []
     labels = []
     bands_s1 = []
@@ -377,9 +377,6 @@ def _parse_bands(lines, n_start, cp2k_version):
 
 def parse_cp2k_trajectory(content):
     """CP2K trajectory parser."""
-
-    import numpy as np
-
     # Parse coordinate section
     match = re.search(r"\n\s*&COORD\n(.*?)\n\s*&END COORD\n", content, re.DOTALL)
     coord_lines = [line.strip().split() for line in match.group(1).splitlines()]
diff --git a/examples/single_calculations/example_restart.py b/examples/single_calculations/example_restart.py
index 2c0d049..6b9d3cd 100644
--- a/examples/single_calculations/example_restart.py
+++ b/examples/single_calculations/example_restart.py
@@ -13,12 +13,9 @@
 
 import ase.io
 import click
-from aiida.common import NotExistent
-from aiida.engine import run, run_get_node
-from aiida.orm import Dict, SinglefileData, load_code
-from aiida.plugins import DataFactory
+from aiida import common, engine, orm, plugins
 
-StructureData = DataFactory("core.structure")
+StructureData = plugins.DataFactory("core.structure")
 
 
 def example_restart(cp2k_code):
@@ -34,17 +31,17 @@ def example_restart(cp2k_code):
     )
 
     # Basis set.
-    basis_file = SinglefileData(
+    basis_file = orm.SinglefileData(
         file=os.path.join(thisdir, "..", "files", "BASIS_MOLOPT")
     )
 
     # Pseudopotentials.
-    pseudo_file = SinglefileData(
+    pseudo_file = orm.SinglefileData(
         file=os.path.join(thisdir, "..", "files", "GTH_POTENTIALS")
     )
 
     # CP2K input.
-    params1 = Dict(
+    params1 = orm.Dict(
         {
             "GLOBAL": {
                 "RUN_TYPE": "GEO_OPT",
@@ -100,7 +97,6 @@ def example_restart(cp2k_code):
         }
     )
 
-    # ------------------------------------------------------------------------------
     # Construct process builder.
     builder = cp2k_code.get_builder()
 
@@ -119,7 +115,7 @@ def example_restart(cp2k_code):
     builder.metadata.options.max_wallclock_seconds = 1 * 2 * 60
 
     print("Submitted calculation 1.")
-    calc1_outputs, calc1 = run_get_node(builder)
+    calc1_outputs, calc1 = engine.run_get_node(builder)
 
     # Check walltime exceeded.
     if calc1.exit_status == 400:
@@ -128,7 +124,10 @@ def example_restart(cp2k_code):
         print("FAIL, walltime wasn't exceeded as it should.")
         sys.exit(1)
 
-    # ------------------------------------------------------------------------------
+    print(calc1_outputs)
+    assert "output_structure" in calc1_outputs, "The output_structure is missing."
+    print("OK, output_structure is present, even though the calculation has failed.")
+
     # Set up and start the second calculation.
 
     # Parameters.
@@ -139,7 +138,7 @@ def example_restart(cp2k_code):
     params2["FORCE_EVAL"]["DFT"]["RESTART_FILE_NAME"] = restart_wfn_fn
     params2["FORCE_EVAL"]["DFT"]["SCF"]["SCF_GUESS"] = "RESTART"
     params2["EXT_RESTART"] = {"RESTART_FILE_NAME": "./parent_calc/aiida-1.restart"}
-    params2 = Dict(params2)
+    params2 = orm.Dict(params2)
 
     # Structure.
     atoms2 = ase.io.read(os.path.join(thisdir, "..", "files", "h2o.xyz"))
@@ -152,7 +151,7 @@ def example_restart(cp2k_code):
     builder.parent_calc_folder = calc1_outputs["remote_folder"]
 
     print("Submitted calculation 2.")
-    calc2 = run(builder)
+    calc2 = engine.run(builder)
 
     # Check energy.
     expected_energy = -17.1566455959
@@ -161,7 +160,9 @@ def example_restart(cp2k_code):
 
     # Ensure that this warning originates from overwritting coordinates.
     output = calc2["retrieved"].base.repository.get_object_content("aiida.out")
-    assert re.search("WARNING .* :: Overwriting coordinates", output)
+    assert re.search(
+        "WARNING .* :: Overwriting coordinates", output
+    ), "No warning about overwritting coordinates."
 
 
 @click.command("cli")
@@ -169,8 +170,8 @@ def example_restart(cp2k_code):
 def cli(codelabel):
     """Click interface."""
     try:
-        code = load_code(codelabel)
-    except NotExistent:
+        code = orm.load_code(codelabel)
+    except common.NotExistent:
         print(f"The code '{codelabel}' does not exist.")
         sys.exit(1)
     example_restart(code)

From 01564a7f8648da615bfea64bacc34478e5280b2d Mon Sep 17 00:00:00 2001
From: Aliaksandr Yakutovich <yakutovicha@gmail.com>
Date: Thu, 8 Feb 2024 16:59:25 +0100
Subject: [PATCH 2/9] Implement trajectory parser, provide example of MD
 simulations (#206)

In this PR, I implemented an initial version of the trajectory parser. CP2K produces
several "trajectories": coordinates, forces, velocities, etc. Currently, I parse
only a few (coordinates, cells, forces) into one AiiDA object `output_trajectory`.
Later, if that is needed, more things can be added to the trajectory parser.

Also, this PR includes a new example of MD simulation of a water molecule.
---
 aiida_cp2k/calculations/__init__.py           |  24 +++
 aiida_cp2k/parsers/__init__.py                |  89 +++++++++-
 examples/single_calculations/example_mm_md.py | 153 ++++++++++++++++++
 3 files changed, 263 insertions(+), 3 deletions(-)
 create mode 100644 examples/single_calculations/example_mm_md.py

diff --git a/aiida_cp2k/calculations/__init__.py b/aiida_cp2k/calculations/__init__.py
index e495158..5dea1bf 100644
--- a/aiida_cp2k/calculations/__init__.py
+++ b/aiida_cp2k/calculations/__init__.py
@@ -40,6 +40,9 @@ class Cp2kCalculation(CalcJob):
     _DEFAULT_PROJECT_NAME = "aiida"
     _DEFAULT_RESTART_FILE_NAME = _DEFAULT_PROJECT_NAME + "-1.restart"
     _DEFAULT_TRAJECT_FILE_NAME = _DEFAULT_PROJECT_NAME + "-pos-1.dcd"
+    _DEFAULT_TRAJECT_XYZ_FILE_NAME = _DEFAULT_PROJECT_NAME + "-pos-1.xyz"
+    _DEFAULT_TRAJECT_FORCES_FILE_NAME = _DEFAULT_PROJECT_NAME + "-frc-1.xyz"
+    _DEFAULT_TRAJECT_CELL_FILE_NAME = _DEFAULT_PROJECT_NAME + "-1.cell"
     _DEFAULT_PARENT_CALC_FLDR_NAME = "parent_calc/"
     _DEFAULT_COORDS_FILE_NAME = "aiida.coords.xyz"
     _DEFAULT_PARSER = "cp2k_base_parser"
@@ -162,6 +165,24 @@ def define(cls, spec):
             "ERROR_STRUCTURE_PARSE",
             message="The output structure could not be parsed.",
         )
+        spec.exit_code(
+            321,
+            "ERROR_COORDINATES_TRAJECTORY_READ",
+            message="The coordinates trajectory file could not be read.",
+        )
+
+        spec.exit_code(
+            323,
+            "ERROR_FORCES_TRAJECTORY_READ",
+            message="The forces trajectory file could not be read.",
+        )
+
+        spec.exit_code(
+            325,
+            "ERROR_CELLS_TRAJECTORY_READ",
+            message="The cells trajectory file could not be read.",
+        )
+
         spec.exit_code(
             350,
             "ERROR_UNEXPECTED_PARSER_EXCEPTION",
@@ -329,6 +350,9 @@ def prepare_for_submission(self, folder):
             self._DEFAULT_OUTPUT_FILE,
             self._DEFAULT_RESTART_FILE_NAME,
             self._DEFAULT_TRAJECT_FILE_NAME,
+            self._DEFAULT_TRAJECT_XYZ_FILE_NAME,
+            self._DEFAULT_TRAJECT_FORCES_FILE_NAME,
+            self._DEFAULT_TRAJECT_CELL_FILE_NAME,
         ]
         calcinfo.retrieve_list += settings.pop("additional_retrieve_list", [])
 
diff --git a/aiida_cp2k/parsers/__init__.py b/aiida_cp2k/parsers/__init__.py
index 0da7111..8b7bc18 100644
--- a/aiida_cp2k/parsers/__init__.py
+++ b/aiida_cp2k/parsers/__init__.py
@@ -7,6 +7,7 @@
 """AiiDA-CP2K output parser."""
 
 import ase
+import numpy as np
 from aiida import common, engine, orm, parsers, plugins
 
 from .. import utils
@@ -29,18 +30,30 @@ def parse(self, **kwargs):
         exit_code = self._parse_stdout()
 
         # Even though the simpulation might have failed, we still want to parse the output structure.
+        last_structure = None
         try:
             last_structure = self._parse_final_structure()
             if isinstance(last_structure, StructureData):
                 self.out("output_structure", last_structure)
         except common.NotExistent:
-            last_structure = None
-            self.logger.warning("No Restart file found in the retrieved folder.")
+            self.logger.warning("No restart file found in the retrieved folder.")
+
+        trajectory = None
+        try:
+            if last_structure is not None:
+                trajectory = self._parse_trajectory(last_structure)
+                if isinstance(trajectory, orm.TrajectoryData):
+                    self.out("output_trajectory", trajectory)
+        except common.NotExistent:
+            self.logger.warning("No trajectory file found in the retrieved folder.")
 
         if exit_code is not None:
             return exit_code
         if isinstance(last_structure, engine.ExitCode):
             return last_structure
+        if isinstance(trajectory, engine.ExitCode):
+            return trajectory
+
         return engine.ExitCode(0)
 
     def _parse_stdout(self):
@@ -108,10 +121,80 @@ def _read_stdout(self):
         try:
             output_string = self.retrieved.base.repository.get_object_content(fname)
         except OSError:
-            return self.exit_codes.ERROR_OUTPUT_STDOUT_READ, None
+            return self.exit_codes.ERROR_OUTPUT_READ, None
 
         return None, output_string
 
+    def _parse_trajectory(self, structure):
+        """CP2K trajectory parser."""
+
+        symbols = [str(site.kind_name) for site in structure.sites]
+
+        # Handle the positions trajectory
+        xyz_traj_fname = self.node.process_class._DEFAULT_TRAJECT_XYZ_FILE_NAME
+
+        # Read the trajectory file.
+        try:
+            output_xyz_pos = self.retrieved.base.repository.get_object_content(
+                xyz_traj_fname
+            )
+        except OSError:
+            return self.exit_codes.ERROR_COORDINATES_TRAJECTORY_READ
+
+        from cp2k_output_tools.trajectories.xyz import parse
+
+        positions_traj = []
+        stepids_traj = []
+        for frame in parse(output_xyz_pos):
+            _, positions = zip(*frame["atoms"])
+            positions_traj.append(positions)
+            stepids_traj.append(int(frame["comment"].split()[2][:-1]))
+        positions_traj = np.array(positions_traj)
+        stepids_traj = np.array(stepids_traj)
+
+        cell_traj = None
+        cell_traj_fname = self.node.process_class._DEFAULT_TRAJECT_CELL_FILE_NAME
+        try:
+            if cell_traj_fname in self.retrieved.base.repository.list_object_names():
+                output_cell_pos = self.retrieved.base.repository.get_object_content(
+                    cell_traj_fname
+                )
+                cell_traj = np.array(
+                    [
+                        np.fromstring(line, sep=" ")[2:-1].reshape(3, 3)
+                        for line in output_cell_pos.splitlines()[1:]
+                    ]
+                )
+        except OSError:
+            return self.exit_codes.ERROR_CELLS_TRAJECTORY_READ
+
+        forces_traj = None
+        forces_traj_fname = self.node.process_class._DEFAULT_TRAJECT_FORCES_FILE_NAME
+        try:
+            if forces_traj_fname in self.retrieved.base.repository.list_object_names():
+                output_forces = self.retrieved.base.repository.get_object_content(
+                    forces_traj_fname
+                )
+                forces_traj = []
+                for frame in parse(output_forces):
+                    _, forces = zip(*frame["atoms"])
+                    forces_traj.append(forces)
+                forces_traj = np.array(forces_traj)
+        except OSError:
+            return self.exit_codes.ERROR_FORCES_TRAJECTORY_READ
+
+        trajectory = orm.TrajectoryData()
+        trajectory.set_trajectory(
+            stepids=stepids_traj,
+            cells=cell_traj,
+            symbols=symbols,
+            positions=positions_traj,
+        )
+        if forces_traj is not None:
+            trajectory.set_array("forces", forces_traj)
+
+        return trajectory
+
 
 class Cp2kAdvancedParser(Cp2kBaseParser):
     """Advanced AiiDA parser class for the output of CP2K."""
diff --git a/examples/single_calculations/example_mm_md.py b/examples/single_calculations/example_mm_md.py
new file mode 100644
index 0000000..f94e757
--- /dev/null
+++ b/examples/single_calculations/example_mm_md.py
@@ -0,0 +1,153 @@
+###############################################################################
+# Copyright (c), The AiiDA-CP2K authors.                                      #
+# SPDX-License-Identifier: MIT                                                #
+# AiiDA-CP2K is hosted on GitHub at https://github.com/aiidateam/aiida-cp2k   #
+# For further information on the license, see the LICENSE.txt file.           #
+###############################################################################
+"""Run molecular dynamics calculation."""
+
+import os
+import sys
+
+import ase.io
+import click
+from aiida import common, engine, orm
+
+
+def example_mm(cp2k_code):
+    """Run molecular mechanics calculation."""
+
+    print("Testing CP2K ENERGY on H2O (MM) ...")
+
+    # Force field.
+    with open(os.path.join("/tmp", "water.pot"), "w") as f:
+        f.write(
+            """BONDS
+    H    H       0.000     1.5139
+    O    H     450.000     0.9572
+
+    ANGLES
+    H    O    H      55.000   104.5200
+
+    DIHEDRALS
+
+    IMPROPER
+
+    NONBONDED
+    H      0.000000  -0.046000     0.224500
+    O      0.000000  -0.152100     1.768200
+
+    HBOND CUTHB 0.5
+
+    END"""
+        )
+
+    water_pot = orm.SinglefileData(file=os.path.join("/tmp", "water.pot"))
+
+    thisdir = os.path.dirname(os.path.realpath(__file__))
+
+    # structure using pdb format, because it also carries topology information
+    atoms = ase.io.read(os.path.join(thisdir, "..", "files", "h2o.xyz"))
+    atoms.center(vacuum=10.0)
+    atoms.write(os.path.join("/tmp", "coords.pdb"), format="proteindatabank")
+    coords_pdb = orm.SinglefileData(file=os.path.join("/tmp", "coords.pdb"))
+
+    # Parameters.
+    # Based on cp2k/tests/Fist/regtest-1-1/water_1.inp
+    parameters = orm.Dict(
+        {
+            "FORCE_EVAL": {
+                "METHOD": "fist",
+                "STRESS_TENSOR": "analytical",
+                "MM": {
+                    "FORCEFIELD": {
+                        "PARM_FILE_NAME": "water.pot",
+                        "PARMTYPE": "CHM",
+                        "CHARGE": [
+                            {"ATOM": "O", "CHARGE": -0.8476},
+                            {"ATOM": "H", "CHARGE": 0.4238},
+                        ],
+                    },
+                    "POISSON": {
+                        "EWALD": {
+                            "EWALD_TYPE": "spme",
+                            "ALPHA": 0.44,
+                            "GMAX": 24,
+                            "O_SPLINE": 6,
+                        }
+                    },
+                },
+                "SUBSYS": {
+                    "CELL": {
+                        "ABC": "%f  %f  %f" % tuple(atoms.cell.diagonal()),
+                    },
+                    "TOPOLOGY": {
+                        "COORD_FILE_NAME": "coords.pdb",
+                        "COORD_FILE_FORMAT": "PDB",
+                    },
+                },
+            },
+            "MOTION": {
+                "CONSTRAINT": {},
+                "MD": {
+                    "THERMOSTAT": {"CSVR": {}, "TYPE": "csvr"},
+                    "BAROSTAT": {},
+                    "STEPS": 1000,
+                    "ENSEMBLE": "npt_f",
+                    "TEMPERATURE": 300.0,
+                },
+                "PRINT": {
+                    "TRAJECTORY": {"EACH": {"MD": 5}},
+                    "RESTART": {"EACH": {"MD": 5}},
+                    "RESTART_HISTORY": {"_": "OFF"},
+                    "CELL": {"EACH": {"MD": 5}},
+                    "FORCES": {"EACH": {"MD": 5}, "FORMAT": "XYZ"},
+                },
+            },
+            "GLOBAL": {
+                "CALLGRAPH": "master",
+                "CALLGRAPH_FILE_NAME": "runtime",
+                "PRINT_LEVEL": "medium",
+                "RUN_TYPE": "MD",
+            },
+        }
+    )
+
+    # Construct process builder.
+    builder = cp2k_code.get_builder()
+    builder.parameters = parameters
+    builder.code = cp2k_code
+    builder.file = {
+        "water_pot": water_pot,
+        "coords_pdb": coords_pdb,
+    }
+    builder.metadata.options.resources = {
+        "num_machines": 1,
+        "num_mpiprocs_per_machine": 1,
+    }
+    builder.metadata.options.max_wallclock_seconds = 1 * 3 * 60
+
+    print("Submitted calculation...")
+    results = engine.run(builder)
+    assert "output_trajectory" in results, "Output trajectory not found among results."
+    traj = results["output_trajectory"]
+
+    assert traj.get_cells().shape == (201, 3, 3), "Unexpected shape of cells."
+    assert traj.get_positions().shape == (201, 3, 3), "Unexpected shape of positions."
+    assert traj.get_array("forces").shape == (201, 3, 3), "Unexpected shape of forces."
+
+
+@click.command("cli")
+@click.argument("codelabel")
+def cli(codelabel):
+    """Click interface."""
+    try:
+        code = orm.load_code(codelabel)
+    except common.NotExistent:
+        print(f"The code '{codelabel}' does not exist.")
+        sys.exit(1)
+    example_mm(code)
+
+
+if __name__ == "__main__":
+    cli()

From 33fd9944f15aa78793848cb1560788e55a776f07 Mon Sep 17 00:00:00 2001
From: Carlo Pignedoli <c.pignedoli@gmail.com>
Date: Wed, 6 Mar 2024 17:28:59 +0100
Subject: [PATCH 3/9] Implement support for the `REFTRAJ` simpulation (#207)

- Add optional `trajectory` input of type`TrajectoryData` to the inputs to the cp2k calculation,
which will further be transformed into `aiida-reftraj.xyz` and `aiida-reftraj.cell`.
- Update the restart handler that specifies the `EXT_RESTART` sections explicitly.
- Update the restart handler to make it understand that the MD simulation produced some steps.
- Add an example of a reftraj calculation that also does a restart.

---------
Co-authored-by: Aliaksandr Yakutovich <yakutovicha@gmail.com>
---
 aiida_cp2k/calculations/__init__.py           |  71 +++++-
 aiida_cp2k/utils/__init__.py                  |   8 +-
 aiida_cp2k/utils/input_generator.py           |  19 +-
 aiida_cp2k/utils/parser.py                    |   2 +-
 aiida_cp2k/workchains/base.py                 |  42 ++--
 .../example_base_md_reftraj_restart.py        | 205 ++++++++++++++++++
 6 files changed, 320 insertions(+), 27 deletions(-)
 create mode 100644 examples/workchains/example_base_md_reftraj_restart.py

diff --git a/aiida_cp2k/calculations/__init__.py b/aiida_cp2k/calculations/__init__.py
index 5dea1bf..f6f928c 100644
--- a/aiida_cp2k/calculations/__init__.py
+++ b/aiida_cp2k/calculations/__init__.py
@@ -8,6 +8,7 @@
 
 from operator import add
 
+import numpy as np
 from aiida.common import CalcInfo, CodeInfo, InputValidationError
 from aiida.engine import CalcJob
 from aiida.orm import Dict, RemoteData, SinglefileData
@@ -25,6 +26,7 @@
 
 BandsData = DataFactory("core.array.bands")
 StructureData = DataFactory("core.structure")
+TrajectoryData = DataFactory("core.array.trajectory")
 KpointsData = DataFactory("core.array.kpoints")
 
 
@@ -44,7 +46,9 @@ class Cp2kCalculation(CalcJob):
     _DEFAULT_TRAJECT_FORCES_FILE_NAME = _DEFAULT_PROJECT_NAME + "-frc-1.xyz"
     _DEFAULT_TRAJECT_CELL_FILE_NAME = _DEFAULT_PROJECT_NAME + "-1.cell"
     _DEFAULT_PARENT_CALC_FLDR_NAME = "parent_calc/"
-    _DEFAULT_COORDS_FILE_NAME = "aiida.coords.xyz"
+    _DEFAULT_COORDS_FILE_NAME = _DEFAULT_PROJECT_NAME + ".coords.xyz"
+    _DEFAULT_INPUT_TRAJECT_XYZ_FILE_NAME = _DEFAULT_PROJECT_NAME + "-reftraj.xyz"
+    _DEFAULT_INPUT_CELL_FILE_NAME = _DEFAULT_PROJECT_NAME + "-reftraj.cell"
     _DEFAULT_PARSER = "cp2k_base_parser"
 
     @classmethod
@@ -59,6 +63,12 @@ def define(cls, spec):
             required=False,
             help="The main input structure.",
         )
+        spec.input(
+            "trajectory",
+            valid_type=TrajectoryData,
+            required=False,
+            help="Input trajectory for a REFTRAJ simulation.",
+        )
         spec.input(
             "settings",
             valid_type=Dict,
@@ -219,6 +229,12 @@ def define(cls, spec):
             required=False,
             help="The relaxed output structure.",
         )
+        spec.output(
+            "output_trajectory",
+            valid_type=TrajectoryData,
+            required=False,
+            help="The output trajectory.",
+        )
         spec.output(
             "output_bands",
             valid_type=BandsData,
@@ -270,6 +286,15 @@ def prepare_for_submission(self, folder):
                 conflicting_keys=["COORDINATE"],
             )
 
+        # Create input trajectory files
+        if "trajectory" in self.inputs:
+            self._write_trajectories(
+                self.inputs.trajectory,
+                folder,
+                self._DEFAULT_INPUT_TRAJECT_XYZ_FILE_NAME,
+                self._DEFAULT_INPUT_CELL_FILE_NAME,
+            )
+
         if "basissets" in self.inputs:
             validate_basissets(
                 inp,
@@ -388,6 +413,19 @@ def _write_structure(structure, folder, name):
         with open(folder.get_abs_path(name), mode="w", encoding="utf-8") as fobj:
             fobj.write(xyz)
 
+    @staticmethod
+    def _write_trajectories(trajectory, folder, name_pos, name_cell):
+        """Function that writes a structure and takes care of element tags."""
+
+        (xyz, cell) = _trajectory_to_xyz_and_cell(trajectory)
+        with open(folder.get_abs_path(name_pos), mode="w", encoding="utf-8") as fobj:
+            fobj.write(xyz)
+        if cell is not None:
+            with open(
+                folder.get_abs_path(name_cell), mode="w", encoding="utf-8"
+            ) as fobj:
+                fobj.write(cell)
+
 
 def kind_names(atoms):
     """Get atom kind names from ASE atoms based on tags.
@@ -402,7 +440,7 @@ def kind_names(atoms):
     return list(map(add, atoms.get_chemical_symbols(), elem_tags))
 
 
-def _atoms_to_xyz(atoms):
+def _atoms_to_xyz(atoms, infoline="No info"):
     """Converts ASE atoms to string, taking care of element tags.
 
     :param atoms: ASE Atoms instance
@@ -412,6 +450,33 @@ def _atoms_to_xyz(atoms):
     elem_coords = [
         f"{p[0]:25.16f} {p[1]:25.16f} {p[2]:25.16f}" for p in atoms.get_positions()
     ]
-    xyz = f"{len(elem_coords)}\n\n"
+    xyz = f"{len(elem_coords)}\n"
+    xyz += f"{infoline}\n"
     xyz += "\n".join(map(add, elem_symbols, elem_coords))
     return xyz
+
+
+def _trajectory_to_xyz_and_cell(trajectory):
+    """Converts postions and cell from a TrajectoryData  to string, taking care of element tags from ASE atoms.
+
+    :param atoms: ASE Atoms instance
+    :param trajectory: TrajectoryData instance
+    :returns: positions str (in xyz format) and cell str
+    """
+    cell = None
+    xyz = ""
+    stepids = trajectory.get_stepids()
+    for i, step in enumerate(stepids):
+        xyz += _atoms_to_xyz(
+            trajectory.get_step_structure(i).get_ase(),
+            infoline=f"i = {step+1} , time = {(step+1)*0.5}",  # reftraj trajectories cannot start from STEP 0
+        )
+        xyz += "\n"
+    if "cells" in trajectory.get_arraynames():
+        cell = "#   Step   Time [fs]       Ax [Angstrom]       Ay [Angstrom]       Az [Angstrom]       Bx [Angstrom]       By [Angstrom]       Bz [Angstrom]       Cx [Angstrom]       Cy [Angstrom]       Cz [Angstrom]      Volume [Angstrom^3]\n"
+        cell_vecs = [
+            f"{stepid+1} {(stepid+1)*0.5:6.3f} {cellvec[0][0]:25.16f} {cellvec[0][1]:25.16f} {cellvec[0][2]:25.16f} {cellvec[1][0]:25.16f} {cellvec[1][1]:25.16f} {cellvec[1][2]:25.16f} {cellvec[2][0]:25.16f} {cellvec[2][1]:25.16f} {cellvec[2][2]:25.16f} {np.dot(cellvec[0],np.cross(cellvec[1],cellvec[2]))}"
+            for (stepid, cellvec) in zip(stepids, trajectory.get_array("cells"))
+        ]
+        cell += "\n".join(cell_vecs)
+    return xyz, cell
diff --git a/aiida_cp2k/utils/__init__.py b/aiida_cp2k/utils/__init__.py
index 2c27439..285483d 100644
--- a/aiida_cp2k/utils/__init__.py
+++ b/aiida_cp2k/utils/__init__.py
@@ -6,7 +6,12 @@
 ###############################################################################
 """AiiDA-CP2K utils"""
 
-from .input_generator import Cp2kInput, add_ext_restart_section, add_wfn_restart_section
+from .input_generator import (
+    Cp2kInput,
+    add_ext_restart_section,
+    add_first_snapshot_in_reftraj_section,
+    add_wfn_restart_section,
+)
 from .parser import parse_cp2k_output, parse_cp2k_output_advanced, parse_cp2k_trajectory
 from .workchains import (
     HARTREE2EV,
@@ -23,6 +28,7 @@
 __all__ = [
     "Cp2kInput",
     "add_ext_restart_section",
+    "add_first_snapshot_in_reftraj_section",
     "add_wfn_restart_section",
     "parse_cp2k_output",
     "parse_cp2k_output_advanced",
diff --git a/aiida_cp2k/utils/input_generator.py b/aiida_cp2k/utils/input_generator.py
index 975e08d..1e9d1e5 100644
--- a/aiida_cp2k/utils/input_generator.py
+++ b/aiida_cp2k/utils/input_generator.py
@@ -211,5 +211,22 @@ def add_ext_restart_section(input_dict):
     """Add external restart section to the input dictionary."""
     params = input_dict.get_dict()
     # overwrite the complete EXT_RESTART section if present
-    params["EXT_RESTART"] = {"RESTART_FILE_NAME": "./parent_calc/aiida-1.restart"}
+    params["EXT_RESTART"] = {
+        "RESTART_FILE_NAME": "./parent_calc/aiida-1.restart",
+        "RESTART_DEFAULT": ".TRUE.",
+        "RESTART_COUNTERS": ".TRUE.",
+        "RESTART_POS": ".TRUE.",
+        "RESTART_VEL": ".TRUE.",
+        "RESTART_CELL": ".TRUE.",
+        "RESTART_THERMOSTAT": ".TRUE.",
+        "RESTART_CONSTRAINT": ".FALSE.",
+    }
+    return Dict(params)
+
+
+@calcfunction
+def add_first_snapshot_in_reftraj_section(input_dict, first_snapshot):
+    """Add first_snapshot in REFTRAJ section to the input dictionary."""
+    params = input_dict.get_dict()
+    params["MOTION"]["MD"]["REFTRAJ"]["FIRST_SNAPSHOT"] = first_snapshot
     return Dict(params)
diff --git a/aiida_cp2k/utils/parser.py b/aiida_cp2k/utils/parser.py
index aca942b..b9d7518 100644
--- a/aiida_cp2k/utils/parser.py
+++ b/aiida_cp2k/utils/parser.py
@@ -124,7 +124,7 @@ def parse_cp2k_output_advanced(
 
         # If a tag has been detected, now read the following line knowing what they are
         if line_is in ["eigen_spin1_au", "eigen_spin2_au"]:
-            if "------" in line:
+            if "------" in line or "*** WARNING" in line:
                 continue
             splitted_line = line.split()
             try:
diff --git a/aiida_cp2k/workchains/base.py b/aiida_cp2k/workchains/base.py
index 4a1412c..1ca625f 100644
--- a/aiida_cp2k/workchains/base.py
+++ b/aiida_cp2k/workchains/base.py
@@ -1,21 +1,13 @@
 """Base work chain to run a CP2K calculation."""
 
-from aiida.common import AttributeDict
-from aiida.engine import (
-    BaseRestartWorkChain,
-    ProcessHandlerReport,
-    process_handler,
-    while_,
-)
-from aiida.orm import Bool, Dict
-from aiida.plugins import CalculationFactory
+from aiida import common, engine, orm, plugins
 
-from ..utils import add_ext_restart_section, add_wfn_restart_section
+from .. import utils
 
-Cp2kCalculation = CalculationFactory('cp2k')
+Cp2kCalculation = plugins.CalculationFactory('cp2k')
 
 
-class Cp2kBaseWorkChain(BaseRestartWorkChain):
+class Cp2kBaseWorkChain(engine.BaseRestartWorkChain):
     """Workchain to run a CP2K calculation with automated error handling and restarts."""
 
     _process_class = Cp2kCalculation
@@ -28,7 +20,7 @@ def define(cls, spec):
 
         spec.outline(
             cls.setup,
-            while_(cls.should_run_process)(
+            engine.while_(cls.should_run_process)(
                 cls.run_process,
                 cls.inspect_process,
                 cls.overwrite_input_structure,
@@ -37,7 +29,7 @@ def define(cls, spec):
         )
 
         spec.expose_outputs(Cp2kCalculation)
-        spec.output('final_input_parameters', valid_type=Dict, required=False,
+        spec.output('final_input_parameters', valid_type=orm.Dict, required=False,
                     help='The input parameters used for the final calculation.')
         spec.exit_code(400, 'NO_RESTART_DATA', message="The calculation didn't produce any data to restart from.")
         spec.exit_code(300, 'ERROR_UNRECOVERABLE_FAILURE',
@@ -52,7 +44,7 @@ def setup(self):
         internal loop.
         """
         super().setup()
-        self.ctx.inputs = AttributeDict(self.exposed_inputs(Cp2kCalculation, 'cp2k'))
+        self.ctx.inputs = common.AttributeDict(self.exposed_inputs(Cp2kCalculation, 'cp2k'))
 
     def results(self):
         super().results()
@@ -63,7 +55,7 @@ def overwrite_input_structure(self):
         if "output_structure" in self.ctx.children[self.ctx.iteration-1].outputs:
             self.ctx.inputs.structure = self.ctx.children[self.ctx.iteration-1].outputs.output_structure
 
-    @process_handler(priority=401, exit_codes=[
+    @engine.process_handler(priority=401, exit_codes=[
         Cp2kCalculation.exit_codes.ERROR_OUT_OF_WALLTIME,
         Cp2kCalculation.exit_codes.ERROR_OUTPUT_INCOMPLETE,
     ], enabled=False)
@@ -72,7 +64,7 @@ def restart_incomplete_calculation(self, calc):
         content_string = calc.outputs.retrieved.base.repository.get_object_content(calc.base.attributes.get('output_filename'))
 
         # CP2K was updating geometry - continue with that.
-        restart_geometry_transformation = "Max. gradient              =" in content_string
+        restart_geometry_transformation = "Max. gradient              =" in content_string or "MD| Step number" in content_string
         end_inner_scf_loop = "Total energy: " in content_string
         # The message is written in the log file when the CP2K input parameter `LOG_PRINT_KEY` is set to True.
         if not (restart_geometry_transformation or end_inner_scf_loop or "Writing RESTART" in content_string):
@@ -81,18 +73,26 @@ def restart_incomplete_calculation(self, calc):
                         "Sending a signal to stop the Base work chain.")
 
             # Signaling to the base work chain that the problem could not be recovered.
-            return ProcessHandlerReport(True, self.exit_codes.NO_RESTART_DATA)
+            return engine.ProcessHandlerReport(True, self.exit_codes.NO_RESTART_DATA)
 
         self.ctx.inputs.parent_calc_folder = calc.outputs.remote_folder
         params = self.ctx.inputs.parameters
 
-        params = add_wfn_restart_section(params, Bool('kpoints' in self.ctx.inputs))
+        params = utils.add_wfn_restart_section(params, orm.Bool('kpoints' in self.ctx.inputs))
 
         if restart_geometry_transformation:
-            params = add_ext_restart_section(params)
+            # Check if we need to fix restart snapshot in REFTRAJ MD
+            first_snapshot = None
+            try:
+                first_snapshot = int(params['MOTION']['MD']['REFTRAJ']['FIRST_SNAPSHOT']) + calc.outputs.output_trajectory.get_shape('positions')[0]
+                if first_snapshot:
+                    params = utils.add_first_snapshot_in_reftraj_section(params, first_snapshot)
+            except KeyError:
+                pass
+            params = utils.add_ext_restart_section(params)
 
         self.ctx.inputs.parameters = params  # params (new or old ones) that include the necessary restart information.
         self.report(
             "The CP2K calculation wasn't completed. The restart of the calculation might be able to "
             "fix the problem.")
-        return ProcessHandlerReport(False)
+        return engine.ProcessHandlerReport(False)
diff --git a/examples/workchains/example_base_md_reftraj_restart.py b/examples/workchains/example_base_md_reftraj_restart.py
new file mode 100644
index 0000000..b0cbcfc
--- /dev/null
+++ b/examples/workchains/example_base_md_reftraj_restart.py
@@ -0,0 +1,205 @@
+###############################################################################
+# Copyright (c), The AiiDA-CP2K authors.                                      #
+# SPDX-License-Identifier: MIT                                                #
+# AiiDA-CP2K is hosted on GitHub at https://github.com/aiidateam/aiida-cp2k   #
+# For further information on the license, see the LICENSE.txt file.           #
+###############################################################################
+"""An example testing the restart calculation handler for geo_opt run in CP2K."""
+
+import os
+import random
+import sys
+
+import ase.io
+import click
+import numpy as np
+from aiida import common, engine, orm, plugins
+
+Cp2kBaseWorkChain = plugins.WorkflowFactory("cp2k.base")
+StructureData = plugins.DataFactory("core.structure")
+TrajectoryData = plugins.DataFactory("core.array.trajectory")
+
+
+def example_base(cp2k_code):
+    """Run simple DFT calculation through a workchain."""
+
+    thisdir = os.path.dirname(os.path.realpath(__file__))
+
+    print("Testing CP2K MD REFTRAJ on H2 (DFT) through a workchain...")
+
+    # Basis set.
+    basis_file = orm.SinglefileData(
+        file=os.path.join(thisdir, "..", "files", "BASIS_MOLOPT")
+    )
+
+    # Pseudopotentials.
+    pseudo_file = orm.SinglefileData(
+        file=os.path.join(thisdir, "..", "files", "GTH_POTENTIALS")
+    )
+
+    # Structure.
+    structure = StructureData(
+        ase=ase.io.read(os.path.join(thisdir, "..", "files", "h2.xyz"))
+    )
+
+    # Trajectory.
+    steps = 20
+    positions = np.array(
+        [[[2, 2, 2.73 + 0.05 * random.random()], [2, 2, 2]] for i in range(steps)]
+    )
+    cells = np.array(
+        [
+            [[4, 0, 0], [0, 4, 0], [0, 0, 4.75 + 0.05 * random.random()]]
+            for i in range(steps)
+        ]
+    )
+    symbols = ["H", "H"]
+    trajectory = TrajectoryData()
+    trajectory.set_trajectory(symbols, positions, cells=cells)
+
+    # Parameters.
+    parameters = orm.Dict(
+        {
+            "GLOBAL": {
+                "RUN_TYPE": "MD",
+                "PRINT_LEVEL": "LOW",
+                "WALLTIME": 4,
+                "PROJECT": "aiida",
+            },
+            "MOTION": {
+                "MD": {
+                    "ENSEMBLE": "REFTRAJ",
+                    "STEPS": steps,
+                    "REFTRAJ": {
+                        "FIRST_SNAPSHOT": 1,
+                        "LAST_SNAPSHOT": steps,
+                        "EVAL_FORCES": ".TRUE.",
+                        "TRAJ_FILE_NAME": "aiida-reftraj.xyz",
+                        "CELL_FILE_NAME": "aiida-reftraj.cell",
+                        "VARIABLE_VOLUME": ".TRUE.",
+                    },
+                },
+                "PRINT": {
+                    "RESTART": {
+                        "EACH": {
+                            "MD": 1,
+                        },
+                    },
+                    "FORCES": {
+                        "EACH": {
+                            "MD": 1,
+                        },
+                    },
+                    "CELL": {
+                        "EACH": {
+                            "MD": 1,
+                        },
+                    },
+                },
+            },
+            "FORCE_EVAL": {
+                "METHOD": "Quickstep",
+                "DFT": {
+                    "BASIS_SET_FILE_NAME": "BASIS_MOLOPT",
+                    "POTENTIAL_FILE_NAME": "GTH_POTENTIALS",
+                    "QS": {
+                        "EPS_DEFAULT": 1.0e-12,
+                        "WF_INTERPOLATION": "ps",
+                        "EXTRAPOLATION_ORDER": 3,
+                    },
+                    "MGRID": {
+                        "NGRIDS": 4,
+                        "CUTOFF": 280,
+                        "REL_CUTOFF": 30,
+                    },
+                    "XC": {
+                        "XC_FUNCTIONAL": {
+                            "_": "LDA",
+                        },
+                    },
+                    "POISSON": {
+                        "PERIODIC": "none",
+                        "PSOLVER": "MT",
+                    },
+                },
+                "SUBSYS": {
+                    "KIND": [
+                        {
+                            "_": "O",
+                            "BASIS_SET": "DZVP-MOLOPT-SR-GTH",
+                            "POTENTIAL": "GTH-LDA-q6",
+                        },
+                        {
+                            "_": "H",
+                            "BASIS_SET": "DZVP-MOLOPT-SR-GTH",
+                            "POTENTIAL": "GTH-LDA-q1",
+                        },
+                    ],
+                },
+            },
+        }
+    )
+
+    # Construct process builder.
+    builder = Cp2kBaseWorkChain.get_builder()
+
+    # Switch on resubmit_unconverged_geometry disabled by default.
+    builder.handler_overrides = orm.Dict(
+        {"restart_incomplete_calculation": {"enabled": True}}
+    )
+
+    # Input structure.
+    builder.cp2k.structure = structure
+    builder.cp2k.trajectory = trajectory
+    builder.cp2k.parameters = parameters
+    builder.cp2k.code = cp2k_code
+    builder.cp2k.file = {
+        "basis": basis_file,
+        "pseudo": pseudo_file,
+    }
+    builder.cp2k.metadata.options.resources = {
+        "num_machines": 1,
+        "num_mpiprocs_per_machine": 1,
+    }
+
+    print("Submitted calculation...")
+    outputs, calc_node = engine.run_get_node(builder)
+
+    if "EXT_RESTART" in outputs["final_input_parameters"].dict:
+        print("OK, EXT_RESTART section is present in the final_input_parameters.")
+    else:
+        print(
+            "ERROR, EXT_RESTART section is NOT present in the final_input_parameters."
+        )
+        sys.exit(1)
+    stepids = np.concatenate(
+        [
+            called.outputs.output_trajectory.get_stepids()
+            for called in calc_node.called
+            if isinstance(called, orm.CalcJobNode)
+        ]
+    )
+
+    if np.all(stepids == np.arange(1, steps + 1)):
+        print("OK, stepids are correct.")
+    else:
+        print(
+            f"ERROR, stepids are NOT correct. Expected: {np.arange(1, steps + 1)} but got:  {stepids}"
+        )
+        sys.exit(1)
+
+
+@click.command("cli")
+@click.argument("codelabel")
+def cli(codelabel):
+    """Click interface."""
+    try:
+        code = orm.load_code(codelabel)
+    except common.NotExistent:
+        print(f"The code '{codelabel}' does not exist")
+        sys.exit(1)
+    example_base(code)
+
+
+if __name__ == "__main__":
+    cli()

From 47148254912dad64dc77435a28a2aeb4900c066c Mon Sep 17 00:00:00 2001
From: Aliaksandr Yakutovich <yakutovicha@gmail.com>
Date: Wed, 13 Mar 2024 09:26:12 +0100
Subject: [PATCH 4/9] Update base docker image and README for testing (#210)

---
 .docker/cp2k-code.yml                         |  1 -
 .docker/{opt => init}/add-codes.sh            |  2 +-
 .docker/my_init.d/add-codes.sh                |  7 -----
 .docker/my_init.d/add-pgsql-bin-to-path.sh    |  8 -----
 .../dependencies.d/aiida-prepare              |  0
 .docker/s6-rc.d/cp2k-code-setup/timeout-up    |  1 +
 .docker/s6-rc.d/cp2k-code-setup/type          |  1 +
 .docker/s6-rc.d/cp2k-code-setup/up            |  7 +++++
 .docker/user/cp2k-code-setup                  |  0
 .github/workflows/ci.yml                      |  4 +--
 Dockerfile                                    | 15 +++++-----
 README.md                                     | 29 +++++++++++++++++++
 12 files changed, 48 insertions(+), 27 deletions(-)
 rename .docker/{opt => init}/add-codes.sh (66%)
 delete mode 100755 .docker/my_init.d/add-codes.sh
 delete mode 100755 .docker/my_init.d/add-pgsql-bin-to-path.sh
 create mode 100644 .docker/s6-rc.d/cp2k-code-setup/dependencies.d/aiida-prepare
 create mode 100644 .docker/s6-rc.d/cp2k-code-setup/timeout-up
 create mode 100644 .docker/s6-rc.d/cp2k-code-setup/type
 create mode 100644 .docker/s6-rc.d/cp2k-code-setup/up
 create mode 100644 .docker/user/cp2k-code-setup

diff --git a/.docker/cp2k-code.yml b/.docker/cp2k-code.yml
index eccd1b8..48cdb68 100644
--- a/.docker/cp2k-code.yml
+++ b/.docker/cp2k-code.yml
@@ -2,7 +2,6 @@ label: cp2k
 computer: localhost
 description:
 default_calc_job_plugin: cp2k
-on_computer: True
 filepath_executable: /usr/bin/cp2k
 append_text:
 prepend_text:
diff --git a/.docker/opt/add-codes.sh b/.docker/init/add-codes.sh
similarity index 66%
rename from .docker/opt/add-codes.sh
rename to .docker/init/add-codes.sh
index a4f0d53..5e19b3c 100755
--- a/.docker/opt/add-codes.sh
+++ b/.docker/init/add-codes.sh
@@ -7,4 +7,4 @@ set -x
 export SHELL=/bin/bash
 
 # Install cp2k code.
-verdi code show cp2k@localhost || verdi code create core.code.installed --config /opt/aiida-cp2k/.docker/cp2k-code.yml --non-interactive
+verdi code show cp2k@localhost || verdi code create core.code.installed --config /home/aiida/aiida-cp2k/.docker/cp2k-code.yml --non-interactive
diff --git a/.docker/my_init.d/add-codes.sh b/.docker/my_init.d/add-codes.sh
deleted file mode 100755
index 96ce569..0000000
--- a/.docker/my_init.d/add-codes.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/bin/bash
-set -em
-
-su -c /opt/add-codes.sh aiida
-
-# Make /opt/aiida-cp2k folder editable for the $SYSTEM_USER.
-chown -R ${SYSTEM_USER}:${SYSTEM_USER} /opt/aiida-cp2k/
diff --git a/.docker/my_init.d/add-pgsql-bin-to-path.sh b/.docker/my_init.d/add-pgsql-bin-to-path.sh
deleted file mode 100755
index 4bd7168..0000000
--- a/.docker/my_init.d/add-pgsql-bin-to-path.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-set -em
-
-# The following works in non-interactive mode
-sed  -i '1i export PATH=${PATH}:/opt/conda/envs/pgsql/bin/' /home/aiida/.bashrc
-
-# The following works in interactive mode
-echo 'export PATH=${PATH}:/opt/conda/envs/pgsql/bin/' >> /home/aiida/.bashrc
diff --git a/.docker/s6-rc.d/cp2k-code-setup/dependencies.d/aiida-prepare b/.docker/s6-rc.d/cp2k-code-setup/dependencies.d/aiida-prepare
new file mode 100644
index 0000000..e69de29
diff --git a/.docker/s6-rc.d/cp2k-code-setup/timeout-up b/.docker/s6-rc.d/cp2k-code-setup/timeout-up
new file mode 100644
index 0000000..573541a
--- /dev/null
+++ b/.docker/s6-rc.d/cp2k-code-setup/timeout-up
@@ -0,0 +1 @@
+0
diff --git a/.docker/s6-rc.d/cp2k-code-setup/type b/.docker/s6-rc.d/cp2k-code-setup/type
new file mode 100644
index 0000000..bdd22a1
--- /dev/null
+++ b/.docker/s6-rc.d/cp2k-code-setup/type
@@ -0,0 +1 @@
+oneshot
diff --git a/.docker/s6-rc.d/cp2k-code-setup/up b/.docker/s6-rc.d/cp2k-code-setup/up
new file mode 100644
index 0000000..4d07d01
--- /dev/null
+++ b/.docker/s6-rc.d/cp2k-code-setup/up
@@ -0,0 +1,7 @@
+#!/command/execlineb -S0
+
+with-contenv
+
+foreground { s6-echo "Setting up CP2K code" }
+
+/etc/init/add-codes.sh
diff --git a/.docker/user/cp2k-code-setup b/.docker/user/cp2k-code-setup
new file mode 100644
index 0000000..e69de29
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5b4dc74..9a5febf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -47,9 +47,9 @@ jobs:
     - name: Create container from aiida_cp2k_test image and test the plugin inside
       run: |
         export DOCKERID=`docker run -d aiida_cp2k_test`
-        docker exec --tty $DOCKERID wait-for-services
+        sleep 5
         docker logs $DOCKERID
-        docker exec --tty --user aiida $DOCKERID /bin/bash -l -c 'cd /opt/aiida-cp2k/ && py.test --cov aiida_cp2k --cov-append .'
+        docker exec --tty --user aiida $DOCKERID /bin/bash -l -c 'cd /home/aiida/aiida-cp2k/ && py.test --cov aiida_cp2k --cov-append .'
 
 
   pre-commit:
diff --git a/Dockerfile b/Dockerfile
index edfff03..cc0efbd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,13 +5,13 @@
 # For further information on the license, see the LICENSE.txt file.           #
 ###############################################################################
 
-FROM aiidateam/aiida-core:2.1.2
+FROM aiidateam/aiida-core-with-services:2.5.0
 
 # To prevent the container to exit prematurely.
 ENV KILL_ALL_RPOCESSES_TIMEOUT=50
 
-WORKDIR /opt/
 
+USER root
 # Install statically linked CP2K which is a considerably newer release than Debian builtin.
 # The statically linked CP2K is a non-MPI binary, but we're running all tests with 1 MPI proc.
 RUN set -ex ; \
@@ -21,16 +21,15 @@ RUN set -ex ; \
   echo "1e6fccf901873ebe9c827f45fb29331f599772f6e6281e988d8956c7a3aa143c /usr/bin/cp2k" | sha256sum -c ; \
   chmod +x /usr/bin/cp2k
 
+USER aiida
 # Install aiida-cp2k plugin.
-COPY . aiida-cp2k
+COPY --chown="${SYSTEM_UID}:${SYSTEM_GID}" . /home/aiida/aiida-cp2k
 RUN pip install ./aiida-cp2k[dev,docs]
 
 # Install coverals.
 RUN pip install coveralls
 
 # Install the cp2k code.
-COPY .docker/opt/add-codes.sh /opt/
-COPY .docker/my_init.d/add-codes.sh /etc/my_init.d/50_add-codes.sh
-
-# Add PGSQL bin folder to PATH.
-COPY .docker/my_init.d/add-pgsql-bin-to-path.sh /etc/my_init.d/50_add-pgsql-bin-to-path.sh
+COPY .docker/init/add-codes.sh /etc/init/
+COPY .docker/s6-rc.d/cp2k-code-setup /etc/s6-overlay/s6-rc.d/cp2k-code-setup
+COPY .docker/user/cp2k-code-setup /etc/s6-overlay/s6-rc.d/user/contents.d/cp2k-code-setup
diff --git a/README.md b/README.md
index 01d6deb..37c89eb 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,8 @@ pip install -e .  # Also installs aiida, if missing (but not postgres/rabbitmq).
 
 ## For maintainers
 
+### Release
+
 To create a new release, clone the repository, install development dependencies with `pip install '.[dev]'`, and then execute `bumpver update --major/--minor/--patch`.
 This will:
 
@@ -43,6 +45,33 @@ Additional notes:
   - The release tag (e.g. a/b/rc) is determined from the last release.
     Use the `--tag` option to override the release tag.
 
+### Testing
+
+To run the tests, you need to have Docker installed in your system.
+Once this is done, you can build the Docker image with the following command:
+
+```bash
+docker build -t aiida_cp2k_test .
+```
+Then, you can launch the container:
+
+```bash
+DOKERID=`docker run -it aiida_cp2k_test`
+```
+This will remeber the container ID in the variable `DOKERID`.
+You can then run the tests with the following command:
+
+```bash
+docker exec --tty --user aiida $DOCKERID /bin/bash -l -c 'cd /home/aiida/aiida-cp2k/ && pytest'
+```
+
+To enter the container for manual testing do:
+
+```bash
+docker exec -it --user aiida $DOCKERID bash
+```
+
+
 ## License
 
 MIT

From 8874fb78bfbde7afef72445208a14c2f1d7fbdea Mon Sep 17 00:00:00 2001
From: Aliaksandr Yakutovich <yakutovicha@gmail.com>
Date: Wed, 13 Mar 2024 16:54:46 +0100
Subject: [PATCH 5/9] Implement output trajectory merge in Cp2kBaseWorkChain
 (#209)

* Implement output trajectory merge in Cp2kBaseWorkChain
* Add a unit test for the merge_trajectory_data function.
* Add an example of the MD restart.
* Modify the reftraj example to facilitate the check of trajectories.
---------

Co-authored-by: Carlo Antonio Pignedoli <c.pignedoli@gmail.com>
---
 Dockerfile                                    |   1 +
 README.md                                     |   2 +-
 aiida_cp2k/parsers/__init__.py                |  10 +-
 aiida_cp2k/utils/__init__.py                  |   6 +
 aiida_cp2k/utils/datatype_helpers.py          | 110 +++++++++---
 aiida_cp2k/workchains/base.py                 |  20 +++
 .../example_base_md_reftraj_restart.py        |  39 ++++-
 .../workchains/example_base_md_restart.py     | 158 ++++++++++++++++++
 test/test_datatype_helpers.py                 |  60 +++++++
 9 files changed, 373 insertions(+), 33 deletions(-)
 create mode 100644 examples/workchains/example_base_md_restart.py
 create mode 100644 test/test_datatype_helpers.py

diff --git a/Dockerfile b/Dockerfile
index cc0efbd..e245149 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,6 +5,7 @@
 # For further information on the license, see the LICENSE.txt file.           #
 ###############################################################################
 
+
 FROM aiidateam/aiida-core-with-services:2.5.0
 
 # To prevent the container to exit prematurely.
diff --git a/README.md b/README.md
index 37c89eb..498bf8a 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ docker build -t aiida_cp2k_test .
 Then, you can launch the container:
 
 ```bash
-DOKERID=`docker run -it aiida_cp2k_test`
+DOKERID=`docker run -d aiida_cp2k_test`
 ```
 This will remeber the container ID in the variable `DOKERID`.
 You can then run the tests with the following command:
diff --git a/aiida_cp2k/parsers/__init__.py b/aiida_cp2k/parsers/__init__.py
index 8b7bc18..440231b 100644
--- a/aiida_cp2k/parsers/__init__.py
+++ b/aiida_cp2k/parsers/__init__.py
@@ -145,12 +145,19 @@ def _parse_trajectory(self, structure):
 
         positions_traj = []
         stepids_traj = []
+        energies_traj = []
         for frame in parse(output_xyz_pos):
             _, positions = zip(*frame["atoms"])
             positions_traj.append(positions)
-            stepids_traj.append(int(frame["comment"].split()[2][:-1]))
+            comment_split = frame["comment"].split(",")
+            stepids_traj.append(int(comment_split[0].split()[-1]))
+            energy_index = next(
+                (i for i, s in enumerate(comment_split) if "E =" in s), None
+            )
+            energies_traj.append(float(comment_split[energy_index].split()[-1]))
         positions_traj = np.array(positions_traj)
         stepids_traj = np.array(stepids_traj)
+        energies_traj = np.array(energies_traj)
 
         cell_traj = None
         cell_traj_fname = self.node.process_class._DEFAULT_TRAJECT_CELL_FILE_NAME
@@ -190,6 +197,7 @@ def _parse_trajectory(self, structure):
             symbols=symbols,
             positions=positions_traj,
         )
+        trajectory.set_array("energies", energies_traj)
         if forces_traj is not None:
             trajectory.set_array("forces", forces_traj)
 
diff --git a/aiida_cp2k/utils/__init__.py b/aiida_cp2k/utils/__init__.py
index 285483d..fc9d772 100644
--- a/aiida_cp2k/utils/__init__.py
+++ b/aiida_cp2k/utils/__init__.py
@@ -6,6 +6,10 @@
 ###############################################################################
 """AiiDA-CP2K utils"""
 
+from .datatype_helpers import (
+    merge_trajectory_data_non_unique,
+    merge_trajectory_data_unique,
+)
 from .input_generator import (
     Cp2kInput,
     add_ext_restart_section,
@@ -42,4 +46,6 @@
     "merge_Dict",
     "ot_has_small_bandgap",
     "resize_unit_cell",
+    "merge_trajectory_data_unique",
+    "merge_trajectory_data_non_unique",
 ]
diff --git a/aiida_cp2k/utils/datatype_helpers.py b/aiida_cp2k/utils/datatype_helpers.py
index 8c12e9e..041bad6 100644
--- a/aiida_cp2k/utils/datatype_helpers.py
+++ b/aiida_cp2k/utils/datatype_helpers.py
@@ -9,8 +9,8 @@
 import re
 from collections.abc import Sequence
 
-from aiida.common import InputValidationError
-from aiida.plugins import DataFactory
+import numpy as np
+from aiida import common, engine, orm, plugins
 
 
 def _unpack(adict):
@@ -50,7 +50,9 @@ def _kind_element_from_kind_section(section):
     try:
         kind = section["_"]
     except KeyError:
-        raise InputValidationError("No default parameter '_' found in KIND section.")
+        raise common.InputValidationError(
+            "No default parameter '_' found in KIND section."
+        )
 
     try:
         element = section["ELEMENT"]
@@ -60,7 +62,7 @@ def _kind_element_from_kind_section(section):
         try:
             element = match["sym"]
         except TypeError:
-            raise InputValidationError(
+            raise common.InputValidationError(
                 f"Unable to figure out atomic symbol from KIND '{kind}'."
             )
 
@@ -125,7 +127,7 @@ def _write_gdt(inp, entries, folder, key, fname):
 def validate_basissets_namespace(basissets, _):
     """A input_namespace validator to ensure passed down basis sets have the correct type."""
     return _validate_gdt_namespace(
-        basissets, DataFactory("gaussian.basisset"), "basis set"
+        basissets, plugins.DataFactory("gaussian.basisset"), "basis set"
     )
 
 
@@ -176,7 +178,7 @@ def validate_basissets(inp, basissets, structure):
                 bsets = [(t, b) for t, s, b in basissets if s == element]
 
             if not bsets:
-                raise InputValidationError(
+                raise common.InputValidationError(
                     f"No basis set found for kind {kind} or element {element}"
                     f" in basissets input namespace and not explicitly set."
                 )
@@ -203,7 +205,7 @@ def validate_basissets(inp, basissets, structure):
                     bsets = [(t, b) for t, s, b in basissets if s == element]
 
                 if not bsets:
-                    raise InputValidationError(
+                    raise common.InputValidationError(
                         f"'BASIS_SET {bstype} {bsname}' for element {element} (from kind {kind})"
                         " not found in basissets input namespace"
                     )
@@ -213,7 +215,7 @@ def validate_basissets(inp, basissets, structure):
                         basissets_used.add(bset)
                         break
                 else:
-                    raise InputValidationError(
+                    raise common.InputValidationError(
                         f"'BASIS_SET {bstype} {bsname}' for element {element} (from kind {kind})"
                         " not found in basissets input namespace"
                     )
@@ -222,14 +224,14 @@ def validate_basissets(inp, basissets, structure):
     if not structure and any(
         bset not in basissets_used for bset in basissets_specified
     ):
-        raise InputValidationError(
+        raise common.InputValidationError(
             "No explicit structure given and basis sets not referenced in input"
         )
 
     if isinstance(inp["FORCE_EVAL"], Sequence) and any(
         kind.name not in explicit_kinds for kind in structure.kinds
     ):
-        raise InputValidationError(
+        raise common.InputValidationError(
             "Automated BASIS_SET keyword creation is not yet supported with multiple FORCE_EVALs."
             " Please explicitly reference a BASIS_SET for each KIND."
         )
@@ -250,13 +252,13 @@ def validate_basissets(inp, basissets, structure):
             bsets = [(t, b) for t, s, b in basissets if s == kind.symbol]
 
         if not bsets:
-            raise InputValidationError(
+            raise common.InputValidationError(
                 f"No basis set found in the given basissets for kind '{kind.name}' of your structure."
             )
 
         for _, bset in bsets:
             if bset.element != kind.symbol:
-                raise InputValidationError(
+                raise common.InputValidationError(
                     f"Basis set '{bset.name}' for '{bset.element}' specified"
                     f" for kind '{kind.name}' (of '{kind.symbol}')."
                 )
@@ -274,7 +276,7 @@ def validate_basissets(inp, basissets, structure):
 
     for bset in basissets_specified:
         if bset not in basissets_used:
-            raise InputValidationError(
+            raise common.InputValidationError(
                 f"Basis set '{bset.name}' ('{bset.element}') specified in the basissets"
                 f" input namespace but not referenced by either input or structure."
             )
@@ -287,7 +289,9 @@ def write_basissets(inp, basissets, folder):
 
 def validate_pseudos_namespace(pseudos, _):
     """A input_namespace validator to ensure passed down pseudopentials have the correct type."""
-    return _validate_gdt_namespace(pseudos, DataFactory("gaussian.pseudo"), "pseudo")
+    return _validate_gdt_namespace(
+        pseudos, plugins.DataFactory("gaussian.pseudo"), "pseudo"
+    )
 
 
 def validate_pseudos(inp, pseudos, structure):
@@ -318,7 +322,7 @@ def validate_pseudos(inp, pseudos, structure):
                 try:
                     pseudo = pseudos[element]
                 except KeyError:
-                    raise InputValidationError(
+                    raise common.InputValidationError(
                         f"No pseudopotential found for kind {kind} or element {element}"
                         f" in pseudos input namespace and not explicitly set."
                     )
@@ -335,19 +339,19 @@ def validate_pseudos(inp, pseudos, structure):
                 try:
                     pseudo = pseudos[element]
                 except KeyError:
-                    raise InputValidationError(
+                    raise common.InputValidationError(
                         f"'POTENTIAL {ptype} {pname}' for element {element} (from kind {kind})"
                         " not found in pseudos input namespace"
                     )
 
             if pname not in pseudo.aliases:
-                raise InputValidationError(
+                raise common.InputValidationError(
                     f"'POTENTIAL {ptype} {pname}' for element {element} (from kind {kind})"
                     " not found in pseudos input namespace"
                 )
 
         if pseudo.element != element:
-            raise InputValidationError(
+            raise common.InputValidationError(
                 f"Pseudopotential '{pseudo.name}' for '{pseudo.element}' specified"
                 f" for element '{element}'."
             )
@@ -358,14 +362,14 @@ def validate_pseudos(inp, pseudos, structure):
     if not structure and any(
         pseudo not in pseudos_used for pseudo in pseudos_specified
     ):
-        raise InputValidationError(
+        raise common.InputValidationError(
             "No explicit structure given and pseudo not referenced in input"
         )
 
     if isinstance(inp["FORCE_EVAL"], Sequence) and any(
         kind.name not in explicit_kinds for kind in structure.kinds
     ):
-        raise InputValidationError(
+        raise common.InputValidationError(
             "Automated POTENTIAL keyword creation is not yet supported with multiple FORCE_EVALs."
             " Please explicitly reference a POTENTIAL for each KIND."
         )
@@ -383,13 +387,13 @@ def validate_pseudos(inp, pseudos, structure):
             try:
                 pseudo = pseudos[kind.symbol]
             except KeyError:
-                raise InputValidationError(
+                raise common.InputValidationError(
                     f"No basis set found in the given basissets"
                     f" for kind '{kind.name}' (or '{kind.symbol}') of your structure."
                 )
 
         if pseudo.element != kind.symbol:
-            raise InputValidationError(
+            raise common.InputValidationError(
                 f"Pseudopotential '{pseudo.name}' for '{pseudo.element}' specified"
                 f" for kind '{kind.name}' (of '{kind.symbol}')."
             )
@@ -402,7 +406,7 @@ def validate_pseudos(inp, pseudos, structure):
 
     for pseudo in pseudos_specified:
         if pseudo not in pseudos_used:
-            raise InputValidationError(
+            raise common.InputValidationError(
                 f"Pseudopodential '{pseudo.name}' specified in the pseudos input namespace"
                 f" but not referenced by either input or structure."
             )
@@ -411,3 +415,63 @@ def validate_pseudos(inp, pseudos, structure):
 def write_pseudos(inp, pseudos, folder):
     """Writes the unified POTENTIAL file with the used pseudos"""
     _write_gdt(inp, pseudos, folder, "POTENTIAL_FILE_NAME", "POTENTIAL")
+
+
+def _merge_trajectories_into_dictionary(*trajectories, unique_stepids=False):
+    if len(trajectories) < 0:
+        return None
+    final_trajectory_dict = {}
+
+    array_names = trajectories[0].get_arraynames()
+
+    for array_name in array_names:
+        if any(array_name not in traj.get_arraynames() for traj in trajectories):
+            raise ValueError(
+                f"Array name '{array_name}' not found in all trajectories."
+            )
+        merged_array = np.concatenate(
+            [traj.get_array(array_name) for traj in trajectories], axis=0
+        )
+        final_trajectory_dict[array_name] = merged_array
+
+    # If unique_stepids is True, we only keep the unique stepids.
+    # The other arrays are then also reduced to the unique stepids.
+    if unique_stepids:
+        stepids = np.concatenate([traj.get_stepids() for traj in trajectories], axis=0)
+        final_trajectory_dict["stepids"], unique_indices = np.unique(
+            stepids, return_index=True
+        )
+
+        for array_name in array_names:
+            final_trajectory_dict[array_name] = final_trajectory_dict[array_name][
+                unique_indices
+            ]
+
+    return final_trajectory_dict
+
+
+def _dictionary_to_trajectory(trajectory_dict, symbols):
+    final_trajectory = orm.TrajectoryData()
+    final_trajectory.set_trajectory(
+        symbols=symbols, positions=trajectory_dict.pop("positions")
+    )
+    for array_name, array in trajectory_dict.items():
+        final_trajectory.set_array(array_name, array)
+
+    return final_trajectory
+
+
+@engine.calcfunction
+def merge_trajectory_data_unique(*trajectories):
+    trajectory_dict = _merge_trajectories_into_dictionary(
+        *trajectories, unique_stepids=True
+    )
+    return _dictionary_to_trajectory(trajectory_dict, trajectories[0].symbols)
+
+
+@engine.calcfunction
+def merge_trajectory_data_non_unique(*trajectories):
+    trajectory_dict = _merge_trajectories_into_dictionary(
+        *trajectories, unique_stepids=False
+    )
+    return _dictionary_to_trajectory(trajectory_dict, trajectories[0].symbols)
diff --git a/aiida_cp2k/workchains/base.py b/aiida_cp2k/workchains/base.py
index 1ca625f..1f26b78 100644
--- a/aiida_cp2k/workchains/base.py
+++ b/aiida_cp2k/workchains/base.py
@@ -46,11 +46,31 @@ def setup(self):
         super().setup()
         self.ctx.inputs = common.AttributeDict(self.exposed_inputs(Cp2kCalculation, 'cp2k'))
 
+    def _collect_all_trajetories(self):
+        """Collect all trajectories from the children calculations."""
+        trajectories = []
+        for called in self.ctx.children:
+            if isinstance(called, orm.CalcJobNode):
+                try:
+                    trajectories.append(called.outputs.output_trajectory)
+                except AttributeError:
+                    pass
+        return trajectories
+
     def results(self):
         super().results()
         if self.inputs.cp2k.parameters != self.ctx.inputs.parameters:
             self.out('final_input_parameters', self.ctx.inputs.parameters)
 
+        trajectories = self._collect_all_trajetories()
+        if trajectories:
+            self.report("Work chain completed successfully, collecting all trajectories")
+            if self.ctx.inputs.parameters.get("GLOBAL", {}).get("RUN_TYPE") == "GEO_OPT":
+                output_trajectory = utils.merge_trajectory_data_non_unique(*trajectories)
+            else:
+                output_trajectory = utils.merge_trajectory_data_unique(*trajectories)
+            self.out("output_trajectory", output_trajectory)
+
     def overwrite_input_structure(self):
         if "output_structure" in self.ctx.children[self.ctx.iteration-1].outputs:
             self.ctx.inputs.structure = self.ctx.children[self.ctx.iteration-1].outputs.output_structure
diff --git a/examples/workchains/example_base_md_reftraj_restart.py b/examples/workchains/example_base_md_reftraj_restart.py
index b0cbcfc..91b38c4 100644
--- a/examples/workchains/example_base_md_reftraj_restart.py
+++ b/examples/workchains/example_base_md_reftraj_restart.py
@@ -7,7 +7,6 @@
 """An example testing the restart calculation handler for geo_opt run in CP2K."""
 
 import os
-import random
 import sys
 
 import ase.io
@@ -44,14 +43,9 @@ def example_base(cp2k_code):
 
     # Trajectory.
     steps = 20
-    positions = np.array(
-        [[[2, 2, 2.73 + 0.05 * random.random()], [2, 2, 2]] for i in range(steps)]
-    )
+    positions = np.array([[[2, 2, 2.73 + 0.01 * i], [2, 2, 2]] for i in range(steps)])
     cells = np.array(
-        [
-            [[4, 0, 0], [0, 4, 0], [0, 0, 4.75 + 0.05 * random.random()]]
-            for i in range(steps)
-        ]
+        [[[4, 0, 0], [0, 4, 0], [0, 0, 4.75 + 0.01 * i]] for i in range(steps)]
     )
     symbols = ["H", "H"]
     trajectory = TrajectoryData()
@@ -172,6 +166,8 @@ def example_base(cp2k_code):
             "ERROR, EXT_RESTART section is NOT present in the final_input_parameters."
         )
         sys.exit(1)
+
+    # Check stepids extracted from each individual calculation.
     stepids = np.concatenate(
         [
             called.outputs.output_trajectory.get_stepids()
@@ -188,6 +184,33 @@ def example_base(cp2k_code):
         )
         sys.exit(1)
 
+    # Check the final trajectory.
+    final_trajectory = outputs["output_trajectory"]
+
+    if np.all(final_trajectory.get_stepids() == np.arange(1, steps + 1)):
+        print("OK, final trajectory stepids are correct.")
+    else:
+        print(
+            f"ERROR, final trajectory stepids are NOT correct. Expected: {np.arange(1, steps + 1)} but got:  {final_trajectory.get_stepids()}"
+        )
+        sys.exit(1)
+
+    if final_trajectory.get_positions().shape == (steps, len(structure.sites), 3):
+        print("OK, the shape of the positions array is correct.")
+    else:
+        print(
+            f"ERROR, the shape of the positions array is NOT correct. Expected: {(steps, len(structure.sites), 3)} but got:  {final_trajectory.get_positions().shape}"
+        )
+        sys.exit(1)
+
+    if final_trajectory.get_cells().shape == (steps, 3, 3):
+        print("OK, the shape of the cells array is correct.")
+    else:
+        print(
+            f"ERROR, the shape of the cells array is NOT correct. Expected: {(steps, 3, 3)} but got:  {final_trajectory.get_cells().shape}"
+        )
+        sys.exit(1)
+
 
 @click.command("cli")
 @click.argument("codelabel")
diff --git a/examples/workchains/example_base_md_restart.py b/examples/workchains/example_base_md_restart.py
new file mode 100644
index 0000000..b20574f
--- /dev/null
+++ b/examples/workchains/example_base_md_restart.py
@@ -0,0 +1,158 @@
+###############################################################################
+# Copyright (c), The AiiDA-CP2K authors.                                      #
+# SPDX-License-Identifier: MIT                                                #
+# AiiDA-CP2K is hosted on GitHub at https://github.com/aiidateam/aiida-cp2k   #
+# For further information on the license, see the LICENSE.txt file.           #
+###############################################################################
+"""An example testing the restart calculation handler for geo_opt run in CP2K."""
+
+import os
+import sys
+
+import ase.io
+import click
+from aiida.common import NotExistent
+from aiida.engine import run
+from aiida.orm import Dict, SinglefileData, load_code
+from aiida.plugins import DataFactory, WorkflowFactory
+
+Cp2kBaseWorkChain = WorkflowFactory("cp2k.base")
+StructureData = DataFactory("core.structure")
+
+
+def example_base(cp2k_code):
+    """Run simple DFT calculation through a workchain."""
+
+    thisdir = os.path.dirname(os.path.realpath(__file__))
+
+    print("Testing CP2K ENERGY on H2O (DFT) through a workchain...")
+
+    # Basis set.
+    basis_file = SinglefileData(
+        file=os.path.join(thisdir, "..", "files", "BASIS_MOLOPT")
+    )
+
+    # Pseudopotentials.
+    pseudo_file = SinglefileData(
+        file=os.path.join(thisdir, "..", "files", "GTH_POTENTIALS")
+    )
+
+    # Structure.
+    structure = StructureData(
+        ase=ase.io.read(os.path.join(thisdir, "..", "files", "h2o.xyz"))
+    )
+
+    # Parameters.
+    parameters = Dict(
+        {
+            "GLOBAL": {
+                "RUN_TYPE": "MD",
+                "WALLTIME": "00:00:20",  # too short
+            },
+            "FORCE_EVAL": {
+                "METHOD": "Quickstep",
+                "STRESS_TENSOR": "analytical",
+                "DFT": {
+                    "BASIS_SET_FILE_NAME": "BASIS_MOLOPT",
+                    "POTENTIAL_FILE_NAME": "GTH_POTENTIALS",
+                    "QS": {
+                        "EPS_DEFAULT": 1.0e-12,
+                        "WF_INTERPOLATION": "ps",
+                        "EXTRAPOLATION_ORDER": 3,
+                    },
+                    "MGRID": {
+                        "NGRIDS": 4,
+                        "CUTOFF": 280,
+                        "REL_CUTOFF": 30,
+                    },
+                    "XC": {
+                        "XC_FUNCTIONAL": {
+                            "_": "LDA",
+                        },
+                    },
+                    "POISSON": {
+                        "PERIODIC": "none",
+                        "PSOLVER": "MT",
+                    },
+                    "SCF": {"PRINT": {"RESTART": {"_": "ON"}}},
+                },
+                "SUBSYS": {
+                    "KIND": [
+                        {
+                            "_": "O",
+                            "BASIS_SET": "DZVP-MOLOPT-SR-GTH",
+                            "POTENTIAL": "GTH-LDA-q6",
+                        },
+                        {
+                            "_": "H",
+                            "BASIS_SET": "DZVP-MOLOPT-SR-GTH",
+                            "POTENTIAL": "GTH-LDA-q1",
+                        },
+                    ],
+                },
+            },
+            "MOTION": {
+                "CONSTRAINT": {},
+                "MD": {
+                    "THERMOSTAT": {"CSVR": {}, "TYPE": "csvr"},
+                    "BAROSTAT": {},
+                    "MAX_STEPS": 8,
+                    "STEPS": 10000,
+                    "ENSEMBLE": "npt_f",
+                    "TEMPERATURE": 300.0,
+                },
+                "PRINT": {
+                    "RESTART": {"EACH": {"MD": 1}},
+                },
+            },
+        }
+    )
+
+    # Construct process builder.
+    builder = Cp2kBaseWorkChain.get_builder()
+
+    # Switch on resubmit_unconverged_geometry disabled by default.
+    builder.handler_overrides = Dict(
+        {"restart_incomplete_calculation": {"enabled": True}}
+    )
+
+    # Input structure.
+    builder.cp2k.structure = structure
+    builder.cp2k.parameters = parameters
+    builder.cp2k.code = cp2k_code
+    builder.cp2k.file = {
+        "basis": basis_file,
+        "pseudo": pseudo_file,
+    }
+    builder.cp2k.metadata.options.resources = {
+        "num_machines": 1,
+        "num_mpiprocs_per_machine": 1,
+    }
+    builder.cp2k.metadata.options.max_wallclock_seconds = 1 * 3 * 60
+
+    print("Submitted calculation...")
+    calc = run(builder)
+
+    if "EXT_RESTART" in calc["final_input_parameters"].dict:
+        print("OK, EXT_RESTART section is present in the final_input_parameters.")
+    else:
+        print(
+            "ERROR, EXT_RESTART section is NOT present in the final_input_parameters."
+        )
+        sys.exit(3)
+
+
+@click.command("cli")
+@click.argument("codelabel")
+def cli(codelabel):
+    """Click interface."""
+    try:
+        code = load_code(codelabel)
+    except NotExistent:
+        print(f"The code '{codelabel}' does not exist")
+        sys.exit(1)
+    example_base(code)
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/test/test_datatype_helpers.py b/test/test_datatype_helpers.py
new file mode 100644
index 0000000..989389a
--- /dev/null
+++ b/test/test_datatype_helpers.py
@@ -0,0 +1,60 @@
+import numpy as np
+import pytest
+from aiida import orm
+
+from aiida_cp2k.utils import (
+    merge_trajectory_data_non_unique,
+    merge_trajectory_data_unique,
+)
+
+
+@pytest.mark.parametrize(
+    "step_ranges",
+    (
+        [(1, 20), (21, 40)],
+        [(1, 20), (15, 30)],
+        [(1, 20), (21, 40), (41, 60)],
+        [(1, 25), (21, 30), (31, 60), (45, 80)],
+    ),
+)
+def test_merge_trajectory_data(step_ranges):
+    def get_trajectory(step1=1, step2=20):
+        nstes = step2 - step1 + 1
+        positions = np.array(
+            [
+                [[2, 2, 2.73 + 0.05 * np.random.random()], [2, 2, 2]]
+                for i in range(nstes)
+            ]
+        )
+        cells = np.array(
+            [
+                [[4, 0, 0], [0, 4, 0], [0, 0, 4.75 + 0.05 * np.random.random()]]
+                for i in range(nstes)
+            ]
+        )
+        stepids = np.arange(step1, step2 + 1)
+        symbols = ["H", "H"]
+        trajectory = orm.TrajectoryData()
+        trajectory.set_trajectory(symbols, positions, cells=cells, stepids=stepids)
+        return trajectory
+
+    trajectories = [get_trajectory(*step_range) for step_range in step_ranges]
+
+    total_length = sum(
+        [step_range[1] - step_range[0] + 1 for step_range in step_ranges]
+    )
+
+    unique_elements = []
+    for step_range in step_ranges:
+        unique_elements.extend(range(step_range[0], step_range[1] + 1))
+    total_lenght_unique = len(set(unique_elements))
+
+    merged_trajectory = merge_trajectory_data_non_unique(*trajectories)
+    assert (
+        len(merged_trajectory.get_stepids()) == total_length
+    ), "The merged trajectory has the wrong length."
+
+    merged_trajectory_unique = merge_trajectory_data_unique(*trajectories)
+    assert (
+        len(merged_trajectory_unique.get_stepids()) == total_lenght_unique
+    ), "The merged trajectory with unique stepids has the wrong length."

From 7c2dd38d2644ce7542687c15ad20d36001046f3e Mon Sep 17 00:00:00 2001
From: Aliaksandr Yakutovich <yakutovicha@gmail.com>
Date: Thu, 14 Mar 2024 19:07:27 +0100
Subject: [PATCH 6/9] Bump version v2.0.0 -> v2.1.0b0.

---
 aiida_cp2k/__init__.py | 2 +-
 pyproject.toml         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiida_cp2k/__init__.py b/aiida_cp2k/__init__.py
index efc9d6b..631195d 100644
--- a/aiida_cp2k/__init__.py
+++ b/aiida_cp2k/__init__.py
@@ -6,6 +6,6 @@
 ###############################################################################
 """The official AiiDA plugin for CP2K."""
 
-__version__ = "2.0.0"
+__version__ = "2.1.0b0"
 
 # EOF
diff --git a/pyproject.toml b/pyproject.toml
index 316ebce..37dfdbf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,7 +74,7 @@ filterwarnings = [
 ]
 
 [tool.bumpver]
-current_version = "v2.0.0"
+current_version = "v2.1.0b0"
 version_pattern = "vMAJOR.MINOR.PATCH[PYTAGNUM]"
 commit_message = "Bump version {old_version} -> {new_version}."
 commit = true

From b4f4de58406c9932a79be1b66cded17afdd7ba7e Mon Sep 17 00:00:00 2001
From: Carlo Pignedoli <c.pignedoli@gmail.com>
Date: Sat, 16 Mar 2024 09:43:04 +0100
Subject: [PATCH 7/9] Use `orm.Dict.get_dict()` to get the Python dictionary
 (#211)

In earlier versions of AiiDA (I guess <2.5) the `get()` method is not yet implemented
for the `orm.Dict` object. To support a wider range of AiiDA versions, we convert an
`omr.Dict` to the regular Python dictionary.
---
 aiida_cp2k/workchains/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiida_cp2k/workchains/base.py b/aiida_cp2k/workchains/base.py
index 1f26b78..3ed3cae 100644
--- a/aiida_cp2k/workchains/base.py
+++ b/aiida_cp2k/workchains/base.py
@@ -65,7 +65,7 @@ def results(self):
         trajectories = self._collect_all_trajetories()
         if trajectories:
             self.report("Work chain completed successfully, collecting all trajectories")
-            if self.ctx.inputs.parameters.get("GLOBAL", {}).get("RUN_TYPE") == "GEO_OPT":
+            if self.ctx.inputs.parameters.get_dict().get("GLOBAL", {}).get("RUN_TYPE") == "GEO_OPT":
                 output_trajectory = utils.merge_trajectory_data_non_unique(*trajectories)
             else:
                 output_trajectory = utils.merge_trajectory_data_unique(*trajectories)

From 580021f885f447dd9676907205950531010d067f Mon Sep 17 00:00:00 2001
From: Carlo Pignedoli <c.pignedoli@gmail.com>
Date: Sat, 16 Mar 2024 10:07:09 +0100
Subject: [PATCH 8/9] `TrajectoryData` doesn't support numbers in atomic names
 (#212)

---
 aiida_cp2k/parsers/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/aiida_cp2k/parsers/__init__.py b/aiida_cp2k/parsers/__init__.py
index 440231b..6045ef7 100644
--- a/aiida_cp2k/parsers/__init__.py
+++ b/aiida_cp2k/parsers/__init__.py
@@ -6,6 +6,8 @@
 ###############################################################################
 """AiiDA-CP2K output parser."""
 
+import re
+
 import ase
 import numpy as np
 from aiida import common, engine, orm, parsers, plugins
@@ -128,7 +130,7 @@ def _read_stdout(self):
     def _parse_trajectory(self, structure):
         """CP2K trajectory parser."""
 
-        symbols = [str(site.kind_name) for site in structure.sites]
+        symbols = [re.sub(r"\d+", "", str(site.kind_name)) for site in structure.sites]
 
         # Handle the positions trajectory
         xyz_traj_fname = self.node.process_class._DEFAULT_TRAJECT_XYZ_FILE_NAME

From a2a6d971a576f1739edd022aea55a2e8ceec37c6 Mon Sep 17 00:00:00 2001
From: Aliaksandr Yakutovich <yakutovicha@gmail.com>
Date: Sat, 16 Mar 2024 14:08:11 +0500
Subject: [PATCH 9/9] Bump version v2.1.0b0 -> v2.1.0b1.

---
 aiida_cp2k/__init__.py | 2 +-
 pyproject.toml         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiida_cp2k/__init__.py b/aiida_cp2k/__init__.py
index 631195d..31baffa 100644
--- a/aiida_cp2k/__init__.py
+++ b/aiida_cp2k/__init__.py
@@ -6,6 +6,6 @@
 ###############################################################################
 """The official AiiDA plugin for CP2K."""
 
-__version__ = "2.1.0b0"
+__version__ = "2.1.0b1"
 
 # EOF
diff --git a/pyproject.toml b/pyproject.toml
index 37dfdbf..95be358 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,7 +74,7 @@ filterwarnings = [
 ]
 
 [tool.bumpver]
-current_version = "v2.1.0b0"
+current_version = "v2.1.0b1"
 version_pattern = "vMAJOR.MINOR.PATCH[PYTAGNUM]"
 commit_message = "Bump version {old_version} -> {new_version}."
 commit = true