stfc · federicazanca · Jun 5, 2024 · Jul 4, 2024 · Jul 5, 2024 · Jul 8, 2024
diff --git a/README.md b/README.md
@@ -107,28 +107,36 @@ See the [developer guide](https://stfc.github.io/aiida-mlip/developer_guide/inde
     * [`md_parser.py`](aiida_mlip/parsers/md_parser.py): `Parser` for `MD` calculation.
     * [`train_parser.py`](aiida_mlip/parsers/train_parser.py): `Parser` for `Train` calculation.
   * [`helpers/`](aiida_mlip/helpers/): `Helpers` to run calculations.
+  * [`workflows/`](aiida_mlip/workflows/): `WorkGraphs` or `WorkChains` for common workflows with mlips.
+    * [`hts_workgraph.py`](aiida_mlip/workflows/hts_workgraph.py): A `WorkGraph` to run high-throughput screening optimisations.
 * [`docs/`](docs/source/): Code documentation
   * [`apidoc/`](docs/source/apidoc/): API documentation
   * [`developer_guide/`](docs/source/developer_guide/): Documentation for developers
   * [`user_guide/`](docs/source/user_guide/): Documentation for users
   * [`images/`](docs/source/images/): Logos etc used in the documentation
 * [`examples/`](examples/): Examples for submitting calculations using this plugin
-  * [`tutorials/`](examples/tutorials/): Scripts for submitting calculations
-  * [`calculations/`](examples/calculations/): Jupyter notebooks with tutorials for running calculations and other files that are used in the tutorial
+  * [`tutorials/`](examples/tutorials/): Jupyter notebooks with tutorials for running calculations and other files that are used in the tutorial
+  * [`calculations/`](examples/calculations/): Scripts for submitting calculations
     * [`submit_singlepoint.py`](examples/calculations/submit_singlepoint.py): Script for submitting a singlepoint calculation
     * [`submit_geomopt.py`](examples/calculations/submit_geomopt.py): Script for submitting a geometry optimisation calculation
     * [`submit_md.py`](examples/calculations/submit_md.py): Script for submitting a molecular dynamics calculation
     * [`submit_train.py`](examples/calculations/submit_train.py): Script for submitting a train calculation.
+  * [`workflows/`](examples/workflows/): Scripts for submitting workflows
+    * [`run_hts_nowc.py`](examples/workflows/run_hts_nowc.py): Script for submitting multiple janus calculations without using any pre-coded high-throughout screening tools (like the WorkGraph).
+    * [`submit_hts_workgraph.py`](examples/workflows/submit_hts_workgraph.py): Script for submitting a high-throughput screening WorkGraph for geometry optimisation.
+  * [`workflows/utils`](examples/workflows/utils): A folder with some scripts for dealing with the high-throughout calculations.
 * [`tests/`](tests/): Basic regression tests using the [pytest](https://docs.pytest.org/en/latest/) framework (submitting a calculation, ...). Install `pip install -e .[testing]` and run `pytest`.
   * [`conftest.py`](tests/conftest.py): Configuration of fixtures for [pytest](https://docs.pytest.org/en/latest/)
   * [`calculations/`](tests/calculations): Calculations
     * [`test_singlepoint.py`](tests/calculations/test_singlepoint.py): Test `SinglePoint` calculation
     * [`test_geomopt.py`](tests/calculations/test_geomopt.py): Test `Geomopt` calculation
     * [`test_md.py`](tests/calculations/test_md.py): Test `MD` calculation
     * [`test_train.py`](tests/calculations/test_train.py): Test `Train` calculation
-  * [`data/`](tests/data): `ModelData`
+  * [`data/`](tests/data): Data
     * [`test_model.py`](tests/data/test_model.py): Test `ModelData` type
     * [`test_config.py`](tests/data/test_config.py): Test `JanusConfigfile` type
+  * [`workflows/`](tests/workflows): Workflows
+    * [`test_hts.py`](tests/workflows/test_hts.py): Test high throughput screening workgraph.
 * [`.gitignore`](.gitignore): Telling git which files to ignore
 * [`.pre-commit-config.yaml`](.pre-commit-config.yaml): Configuration of [pre-commit hooks](https://pre-commit.com/) that sanitize coding style and check for syntax errors. Enable via `pip install -e .[pre-commit] && pre-commit install`
 * [`LICENSE`](LICENSE): License for the plugin

diff --git a/aiida_mlip/workflows/__init__.py b/aiida_mlip/workflows/__init__.py
@@ -0,0 +1 @@
+"""Workflows for aiida-mlip."""
diff --git a/aiida_mlip/workflows/hts_workgraph.py b/aiida_mlip/workflows/hts_workgraph.py
@@ -0,0 +1,75 @@
+"""Workgraph to run high-throughput screening optimisations."""
+
+from pathlib import Path
+
+from aiida_workgraph import WorkGraph, task
+from ase.io import read
+
+from aiida.plugins import CalculationFactory
+
+from aiida_mlip.helpers.help_load import load_structure
+
+Geomopt = CalculationFactory("mlip.opt")
+
+
+@task.graph_builder(outputs=[{"name": "final_structures", "from": "context.relax"}])
+def run_opt_calc(folder: Path, janus_opt_inputs: dict) -> WorkGraph:
+    """
+    Run a geometry optimisation using Geomopt.
+
+    Parameters
+    ----------
+    folder : Path
+        Path to the folder containing input structure files.
+    janus_opt_inputs : dict
+        Dictionary of inputs for the calculations.
+
+    Returns
+    -------
+    WorkGraph
+        The workgraph containing the optimisation tasks.
+    """
+    wg = WorkGraph()
+    for child in folder.glob("**/*"):
+        try:
+            read(child.as_posix())
+        except Exception:  # pylint: disable=broad-except
+            continue
+        structure = load_structure(child)
+        janus_opt_inputs["struct"] = structure
+        relax = wg.add_task(Geomopt, name=f"relax_{child.stem}", **janus_opt_inputs)
+        relax.set_context({"final_structure": f"relax.{child.stem}"})
+    return wg
+
+
+def HTSWorkGraph(folder_path: Path, inputs: dict) -> WorkGraph:
+    """
+    Create and execute a high-throughput workflow for geometry optimisation using MLIPs.
+
+    Parameters
+    ----------
+    folder_path : Path
+        Path to the folder containing input structure files.
+    inputs : dict
+        Dictionary of inputs for the calculations.
+
+    Returns
+    -------
+    WorkGraph
+        The workgraph containing the high-throughput workflow.
+    """
+    wg = WorkGraph("hts_workflow")
+
+    wg.add_task(
+        run_opt_calc, name="opt_task", folder=folder_path, janus_opt_inputs=inputs
+    )
+
+    wg.group_outputs = [{"name": "opt_structures", "from": "opt_task.final_structures"}]
+
+    wg.to_html()
-    wg.to_html()
-    wg.to_html()
+
+    wg.max_number_jobs = 10
+
+    wg.submit()
+
+    return wg
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -197,7 +197,11 @@
 
 # Warnings to ignore when using the -n (nitpicky) option
 # We should ignore any python built-in exception, for instance
-nitpick_ignore = [("py:class", "Logger"), ("py:class", "QbFields")]
+nitpick_ignore = [
+    ("py:class", "Logger"),
+    ("py:class", "QbFields"),
+    ("py:class", "aiida_workgraph.workgraph.WorkGraph"),
+]
 
 
 def run_apidoc(_):

diff --git a/examples/workflows/run_hts_no_wc.py b/examples/workflows/run_hts_no_wc.py
@@ -0,0 +1,115 @@
+"""Example code for submitting high-throughpout calculation without a Workchain"""
+
+import csv
+from pathlib import Path
+import sys
+import time
+
+import click
+
+from aiida.common import NotExistent
+from aiida.engine import run_get_pk, submit
+from aiida.orm import load_code, load_group, load_node
+from aiida.plugins import CalculationFactory
+
+from aiida_mlip.data.config import JanusConfigfile
+from aiida_mlip.data.model import ModelData
+from aiida_mlip.helpers.help_load import load_structure
+
+
+# pylint: disable=too-many-arguments
+# pylint: disable=too-many-locals
+def run_hts(folder, config, calc, output_filename, code, group, launch):
+    """Run high throughput screening using the parameters from the cli."""
+    # Add the required inputs for aiida
+    metadata = {"options": {"resources": {"num_machines": 1}}}
+
+    # All the other paramenters we want them from the config file
+    # We want to pass it as a AiiDA data type for the provenance
+    conf = JanusConfigfile(config)
+    # Define calculation to run
+    Calculation = CalculationFactory(f"mlip.{calc}")
+    # pylint: disable=line-too-long
+    model = ModelData.from_uri(
+        uri="https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model",
+        cache_dir="models",
+        architecture="mace_mp",
+        filename="small.model",
+    )
+    list_of_nodes = []
+    p = Path(folder)
+    for child in p.glob("**/*"):
+        if child.name.endswith("cif"):
+            print(child.name)
+            metadata["label"] = f"{child.name}"
+            # This structure will overwrite the one in the config file if present
+            structure = load_structure(child.absolute())
+            # Run calculation
+            if launch == "run_get_pk":
+                result, pk = run_get_pk(
+                    Calculation,
+                    code=code,
+                    struct=structure,
+                    metadata=metadata,
+                    config=conf,
+                    model=model,
+                )
+                list_of_nodes.append(pk)
+
+                group.add_nodes(load_node(pk))
+                time.sleep(1)
+                print(f"Printing results from calculation: {result}")
+
+            if launch == "submit":
+                result = submit(
+                    Calculation,
+                    code=code,
+                    struct=structure,
+                    metadata=metadata,
+                    config=conf,
+                    model=model,
+                )
+                list_of_nodes.append(result.pk)
+
+                group.add_nodes(load_node(result.pk))
+
+                print(f"Printing results from calculation: {result}")
+
+    print(f"printing dictionary with all {list_of_nodes}")
+    # write list of nodes in csv file
+    # Unnecessary but might be useful. better use group to query
+    with open(output_filename, "w", newline="", encoding="utf-8") as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(["name", "PK"])
+        for node in list_of_nodes:
+            writer.writerow([load_node(node).label, node])
+
+
+@click.command("cli")
+@click.option("--folder", type=Path)
+@click.option("--config", type=Path, help="Config file to use")
+@click.option("--calc", type=str, help="Calc to run", default="sp")
+@click.option("--output_filename", type=str, default="list_nodes.csv")
+@click.option("--codelabel", type=str)
+@click.option("--group", type=int)
+@click.option(
+    "--launch", type=str, default="submit", help="can be run_get_pk or submit"
+)
+# pylint: disable=too-many-arguments
+def cli(folder, config, calc, output_filename, codelabel, group, launch):
+    """Click interface."""
+    try:
+        code = load_code(codelabel)
+    except NotExistent:
+        print(f"The code '{codelabel}' does not exist.")
+        sys.exit(1)
+    try:
+        group = load_group(group)
+    except NotExistent:
+        print(f"The group '{group}' does not exist.")
+
+    run_hts(folder, config, calc, output_filename, code, group, launch)
+
+
+if __name__ == "__main__":
+    cli()  # pylint: disable=no-value-for-parameter
diff --git a/examples/workflows/submit_hts_workgraph.py b/examples/workflows/submit_hts_workgraph.py
@@ -0,0 +1,20 @@
+"""Example submission for hts workgraph."""
+
+from pathlib import Path
+
+from aiida.orm import load_code
+
+from aiida_mlip.data.model import ModelData
+from aiida_mlip.workflows.hts_workgraph import HTSWorkGraph
+
+folder_path = Path("/home/federica/aiida-mlip/tests/workflows/structures/")
+inputs = {
+    "model": ModelData.from_local(
+        "/home/federica/aiida-mlip/tests/data/input_files/mace/mace_mp_small.model",
+        architecture="mace_mp",
+    ),
+    "metadata": {"options": {"resources": {"num_machines": 1}}},
+    "code": load_code("janus@localhost"),
+}
+
+HTSWorkGraph(folder_path, inputs)
diff --git a/examples/workflows/utils/check_status_calc.py b/examples/workflows/utils/check_status_calc.py
@@ -0,0 +1,23 @@
+"""A script to check the status of calculations in a group."""
+
+import sys
+
+from aiida.orm import load_group
+
+if len(sys.argv) != 2:
+    raise ValueError("Must give 1 argument with the node number")
+
+
+group = load_group(pk=int(sys.argv[1]))
+for calc_node in group.nodes:
+
+    if calc_node.is_finished:
+        print(f"Node<{calc_node.pk}> finished with exit status {calc_node.exit_code}")
+    else:
+        print(f"Node<{calc_node.pk}> still in queue")
+
+    if calc_node.is_finished_ok:
+        print(f"Node<{calc_node.pk}> finished ok, exit status {calc_node.exit_code}")
+
+    if calc_node.is_failed:
+        print(f"Node<{calc_node.pk}> failed  with exit status {calc_node.exit_code}")
diff --git a/examples/workflows/utils/config_opt.yml b/examples/workflows/utils/config_opt.yml
@@ -0,0 +1,10 @@
+fmax: 0.01
+pressure: 0.0
+model: "https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model"
+arch: mace_mp
+steps: 100
+vectors-only: True
+calc-kwargs:
+    calc_kwargs:
+      dispersion: True
+      model: large
diff --git a/examples/workflows/utils/delete_nodes.sh b/examples/workflows/utils/delete_nodes.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Define the path to the CSV file
+csv_file="list_nodes.csv"
+
+# Check if the CSV file exists
+if [ ! -f "$csv_file" ]; then
+    echo "CSV file not found: $csv_file"
+    exit 1
+fi
+
+# Flag to skip the first line
+skip_first_line=true
+
+# Loop through each line in the CSV file
+while IFS=, read -r column1 column2 rest_of_columns; do
+    # Skip the first line
+    if $skip_first_line; then
+        skip_first_line=false
+        continue
+    fi
+
+    # Run the Python script with the value from the first column
+    echo "Deleting node  $column2"
+    yes y | verdi node delete "$column2"
+done < "$csv_file"
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,7 +30,8 @@ python = "^3.9"
 aiida-core = "^2.6"
 ase = "^3.23.0"
 voluptuous = "^0.14"
-janus-core = "^v0.6.0b0"
+janus-core = "^v0.6.3b0"
+aiida-workgraph = {extras = ["widget"], version = "^0.3.14"}
 
 [tool.poetry.group.dev.dependencies]
 coverage = {extras = ["toml"], version = "^7.4.1"}
@@ -79,6 +80,9 @@ build-backend = "poetry.core.masonry.api"
 "mlip.md_parser" = "aiida_mlip.parsers.md_parser:MDParser"
 "mlip.train_parser" = "aiida_mlip.parsers.train_parser:TrainParser"
 
+[tool.poetry.plugins."aiida.workflows"]
+"mlip.hts_wg" = "aiida_mlip.workflows.hts_workgraph:HTSWorkGraph"
+
 [tool.black]
 line-length = 88
 

diff --git a/tests/calculations/configs/config_janus_opt.yaml b/tests/calculations/configs/config_janus_opt.yaml
@@ -0,0 +1,3 @@
+minimize-kwargs:
+  opt-kwargs:
+    alpha: 100
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -95,7 +95,7 @@ def janus_code(aiida_local_code_factory):
         The janus code instance.
     """
     janus_path = shutil.which("janus") or os.environ.get("JANUS_PATH")
-    return aiida_local_code_factory(executable=janus_path, entry_point="mlip.sp")
+    return aiida_local_code_factory(executable=janus_path, entry_point="mlip.opt")
 
 
 @pytest.fixture
@@ -240,6 +240,17 @@ def structure_folder(test_folder):
     return test_folder / "calculations" / "structures"
 
 
+@pytest.fixture
+def structure_folder2(test_folder):
+    """
+    Fixture to provide the path to the example file.
+
+    Returns:
+        Path: The path to the example file.
+    """
+    return test_folder / "workflows" / "structures"
+
+
 @pytest.fixture
 def config_folder(test_folder):
     """

diff --git a/tests/workflows/structures/h2o.xyz b/tests/workflows/structures/h2o.xyz
@@ -0,0 +1,5 @@
+3
+Lattice="10.0 0.0 0.0 0.0 10.0 0.0 0.0 0.0 10.0" Properties=species:S:1:pos:R:3 pbc="F F F"
+O   5.0  5.763239  5.596309
+H   5.0  6.526478  5.000000
+H   5.0  5.000000  5.000000
diff --git a/tests/workflows/structures/methane.xyz b/tests/workflows/structures/methane.xyz
@@ -0,0 +1,7 @@
+5
+XYZ file generated by Avogadro.
+C      0.00000    0.00000    0.00000
+H      0.00000    0.00000    1.08900
+H      1.02672    0.00000   -0.36300
+H     -0.51336   -0.88916   -0.36300
+H     -0.51336    0.88916   -0.36300