From a8590b5c6b18a5420b6a0847c6f25cf39ef72eb8 Mon Sep 17 00:00:00 2001
From: Sebastiaan Huber <mail@sphuber.net>
Date: Wed, 22 Mar 2023 13:55:13 +0100
Subject: [PATCH] `LAMMPSBaseParser`: Fix parsing for nodes with the `script`
 input

The `BaseLammpsCalculation` plugin was recently updated to allow
specifying the exact script to run through the `script` input node. If
the default parser is used in this case, which is the `LAMMPSBaseParser`,
a non-zero exit code would always be returned because some of the output
files that the parser wouldn't be there. These output files are normally
written by the script that the plugin itself would create, but if a
complete script is taken from inputs, it cannot be guaranteed that the
script will produce these same outputs.

The parser is updated to only return an exit code if an expected output
file is not in the retrieved files if the `script` input was not
defined. In addition, the `total_wall_time` key, if parsed by the log
file parser, is parsed and converted to seconds and added as the
`total_wall_time_seconds` key.
---
 aiida_lammps/parsers/lammps/lammps_parser.py | 71 +++++++++++++-------
 tests/test_parsers.py                        | 39 ++++++++++-
 tests/test_parsers/test_lammps_base.yml      | 15 +++++
 tests/utils.py                               |  6 ++
 4 files changed, 103 insertions(+), 28 deletions(-)
 create mode 100644 tests/test_parsers/test_lammps_base.yml

diff --git a/aiida_lammps/parsers/lammps/lammps_parser.py b/aiida_lammps/parsers/lammps/lammps_parser.py
index 7237d61..543cbaf 100644
--- a/aiida_lammps/parsers/lammps/lammps_parser.py
+++ b/aiida_lammps/parsers/lammps/lammps_parser.py
@@ -4,6 +4,8 @@
 It takes care of parsing the log.lammps file, the trajectory file and the
 yaml file with the final value of the variables printed in the ``thermo_style``.
 """
+import time
+
 from aiida import orm
 from aiida.common import exceptions
 from aiida.parsers.parser import Parser
@@ -45,39 +47,55 @@ def parse(self, **kwargs):
         list_of_files = out_folder.base.repository.list_object_names()
 
         # check log file
-        if self.node.get_option("logfile_filename") not in list_of_files:
+        logfile_filename = self.node.get_option("logfile_filename")
+        if logfile_filename not in list_of_files:
             return self.exit_codes.ERROR_LOG_FILE_MISSING
-        filename = self.node.get_option("logfile_filename")
         parsed_data = parse_logfile(
             file_contents=self.node.outputs.retrieved.base.repository.get_object_content(
-                filename
+                logfile_filename
             )
         )
         if parsed_data is None:
             return self.exit_codes.ERROR_PARSING_LOGFILE
+
         global_data = parsed_data["global"]
         arrays = parsed_data["time_dependent"]
+        results = {"compute_variables": global_data}
+
+        if "total_wall_time" in global_data:
+            try:
+                parsed_time = time.strptime(global_data["total_wall_time"], "%H:%M:%S")
+            except ValueError:
+                pass
+            else:
+                total_wall_time_seconds = (
+                    parsed_time.tm_hour * 3600
+                    + parsed_time.tm_min * 60
+                    + parsed_time.tm_sec
+                )
+                global_data["total_wall_time_seconds"] = total_wall_time_seconds
 
         # check final variable file
-        if self.node.get_option("variables_filename") not in list_of_files:
-            return self.exit_codes.ERROR_FINAL_VARIABLE_FILE_MISSING
-
-        filename = self.node.get_option("variables_filename")
-        final_variables = parse_final_data(
-            file_contents=self.node.outputs.retrieved.base.repository.get_object_content(
-                filename
+        final_variables = None
+        variables_filename = self.node.get_option("variables_filename")
+        if variables_filename not in list_of_files:
+            if "script" not in self.node.inputs:
+                return self.exit_codes.ERROR_FINAL_VARIABLE_FILE_MISSING
+        else:
+            final_variables = parse_final_data(
+                file_contents=self.node.outputs.retrieved.base.repository.get_object_content(
+                    variables_filename
+                )
             )
-        )
-        if final_variables is None:
-            return self.exit_codes.ERROR_PARSING_FINAL_VARIABLES
+            if final_variables is None:
+                return self.exit_codes.ERROR_PARSING_FINAL_VARIABLES
 
-        results = orm.Dict(dict={**final_variables, "compute_variables": global_data})
+            results.update(**final_variables)
 
         # Expose the results from the log.lammps outputs
-        self.out("results", results)
+        self.out("results", orm.Dict(results))
 
         # Get the time-dependent outputs exposed as an ArrayData
-
         time_dependent_computes = orm.ArrayData()
 
         for key, value in arrays.items():
@@ -87,15 +105,18 @@ def parse(self, **kwargs):
         self.out("time_dependent_computes", time_dependent_computes)
 
         # check trajectory file
-        if self.node.get_option("trajectory_filename") not in list_of_files:
-            return self.exit_codes.ERROR_TRAJECTORY_FILE_MISSING
-        # Gather the lammps trajectory data
-        filename = self.node.get_option("trajectory_filename")
-        with self.node.outputs.retrieved.base.repository.open(filename) as handle:
-            lammps_trajectory = LammpsTrajectory(handle)
-        self.out("trajectories", lammps_trajectory)
-
-        self.out("structure", lammps_trajectory.get_step_structure(-1))
+        trajectory_filename = self.node.get_option("trajectory_filename")
+        if trajectory_filename not in list_of_files:
+            if "script" not in self.node.inputs:
+                return self.exit_codes.ERROR_TRAJECTORY_FILE_MISSING
+        else:
+            with self.node.outputs.retrieved.base.repository.open(
+                trajectory_filename
+            ) as handle:
+                lammps_trajectory = LammpsTrajectory(handle)
+
+            self.out("trajectories", lammps_trajectory)
+            self.out("structure", lammps_trajectory.get_step_structure(-1))
 
         # check stdout
         if self.node.get_option("scheduler_stdout") not in list_of_files:
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index bcf473b..8fe2e96 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -6,7 +6,7 @@
 from textwrap import dedent
 
 from aiida.cmdline.utils.common import get_calcjob_report
-from aiida.orm import FolderData
+from aiida.orm import FolderData, SinglefileData
 from aiida.plugins import ParserFactory
 import pytest
 import yaml
@@ -50,6 +50,41 @@ def get_traj_force():
     )
 
 
+def test_lammps_base(db_test_app, data_regression):
+    """Check if the log file is produced during calculation."""
+    filename = os.path.join(
+        TEST_DIR,
+        "input_files",
+        "parsers",
+        "log.lammps",
+    )
+    retrieved = FolderData()
+    retrieved.base.repository.put_object_from_file(filename, "log.lammps")
+    retrieved.base.repository.put_object_from_filelike(
+        io.StringIO(""), "_scheduler-stdout.txt"
+    )
+    retrieved.base.repository.put_object_from_filelike(
+        io.StringIO(""), "_scheduler-stderr.txt"
+    )
+    inputs = {"script": SinglefileData(io.StringIO(""))}
+    calc_node = db_test_app.generate_calcjob_node(
+        "lammps.base", retrieved, inputs=inputs
+    )
+    parser = ParserFactory("lammps.base")
+    with db_test_app.sandbox_folder() as temp_path:
+        (
+            results,
+            calcfunction,
+        ) = parser.parse_from_node(  # pylint: disable=unused-variable
+            calc_node,
+            retrieved_temporary_folder=temp_path.abspath,
+        )
+
+    assert calcfunction.is_finished_ok
+    assert "results" in calcfunction.outputs
+    data_regression.check({"results": calcfunction.outputs.results.get_dict()})
+
+
 @pytest.mark.parametrize(
     "plugin_name", ["lammps.force", "lammps.optimize", "lammps.md", "lammps.md.multi"]
 )
@@ -221,8 +256,6 @@ def test_run_error(db_test_app, plugin_name):
             retrieved_temporary_folder=temp_path.abspath,
         )
 
-    print(get_calcjob_report(calc_node))
-
     assert calcfunction.is_finished, calcfunction.exception
     assert calcfunction.is_failed, calcfunction.exit_status
     assert (
diff --git a/tests/test_parsers/test_lammps_base.yml b/tests/test_parsers/test_lammps_base.yml
new file mode 100644
index 0000000..99ea478
--- /dev/null
+++ b/tests/test_parsers/test_lammps_base.yml
@@ -0,0 +1,15 @@
+results:
+  compute_variables:
+    bin: standard
+    bins:
+    - 1
+    - 1
+    - 1
+    binsize: 4.06435
+    ghost_atom_cutoff: 8.1287
+    master_list_distance_cutoff: 8.1287
+    max_neighbors_atom: 2000
+    steps_per_second: 45452.422
+    total_wall_time: 0:00:00
+    total_wall_time_seconds: 0
+    units_style: metal
diff --git a/tests/utils.py b/tests/utils.py
index a4b0e38..53476a3 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -254,6 +254,7 @@ def generate_calcjob_node(
         retrieved,
         computer_name="localhost",
         attributes=None,
+        inputs=None,
     ):
         """Fixture to generate a mock `CalcJobNode` for testing parsers.
 
@@ -283,6 +284,11 @@ def generate_calcjob_node(
         if attributes:
             node.base.attributes.set(attributes)  # pylint: disable=no-member
 
+        if inputs:
+            for key, value in inputs.items():
+                value.store()
+                node.add_incoming(value, link_type=LinkType.INPUT_CALC, link_label=key)
+
         node.store()
 
         retrieved.base.links.add_incoming(