Merge pull request #12 from Quantum-Accelerators/results

Add results parsing
Quantum-Accelerators · Jan 14, 2024 · 4355d12 · 4355d12
2 parents 5bf2d24 + c0655b9
commit 4355d12
Show file tree

Hide file tree

Showing 9 changed files with 416 additions and 26 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -46,7 +46,7 @@ jobs:
         run: echo "RASPA_DIR=/usr/share/miniconda" >> "$GITHUB_ENV"
 
       - name: Run tests with pytest
-        run: pytest --noconftest --cov=raspa_ase --cov-report=xml
+        run: pytest --cov=raspa_ase --cov-report=xml
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/docs/examples.md b/docs/examples.md
@@ -40,6 +40,7 @@ calc = Raspa(boxes=boxes, components=components, parameters=parameters)
 
 atoms.calc = calc
 atoms.get_potential_energy()
+print(calc.results)
 ```
 
 1. Use an empty `Atoms` object to create a system without a framework.
@@ -103,6 +104,7 @@ calc = Raspa(boxes=boxes, components=components, parameters=parameters)
 
 atoms.calc = calc
 atoms.get_potential_energy()
+print(calc.results)
 ```
 
 ## Example 7: Adsorption isotherm of methane in MFI
@@ -149,6 +151,7 @@ calc = Raspa(components=components, parameters=parameters)
 
 atoms.calc = calc
 atoms.get_potential_energy()
+print(calc.results)
 ```
 
 1. This file is provided in [`raspa_ase/docs/files/MFI_SI.cif`](https://github.com/Quantum-Accelerators/raspa_ase/blob/main/docs/files/MFI_SI.cif) for the sake of this tutorial. The `Atoms` object represents the framework to be studied and will be written out to the current working directory to be used by RASPA.

diff --git a/docs/intro.md b/docs/intro.md
@@ -13,8 +13,11 @@ from raspa_ase import Raspa
 atoms = read("my_framework.cif")
 atoms.calc = Raspa()
 atoms.get_potential_energy()
+print(calc.results)
 ```
 
+Tabulated results can be found in the `calc.results` dictionary.
+
 ## Framework Properties
 
 ### Defining the Structure

diff --git a/src/raspa_ase/calculator.py b/src/raspa_ase/calculator.py
@@ -11,18 +11,14 @@
 from ase.calculators.genericfileio import CalculatorTemplate, GenericFileIOCalculator
 
 from raspa_ase.utils.dicts import merge_parameters, pop_parameter
-from raspa_ase.utils.io import write_frameworks, write_simulation_input
+from raspa_ase.utils.io import parse_output, write_frameworks, write_simulation_input
 from raspa_ase.utils.params import get_framework_params
 
 if TYPE_CHECKING:
-    from typing import Any, TypedDict
+    from typing import Any
 
     from ase.atoms import Atoms
 
-    class Results(TypedDict, total=False):
-        energy: float  # eV
-
-
 SIMULATION_INPUT = "simulation.input"
 LABEL = "raspa"
 
@@ -156,7 +152,7 @@ def write_input(
         write_frameworks(frameworks, directory)
 
     @staticmethod
-    def read_results(directory: Path | str) -> Results:
+    def read_results(directory: Path | str) -> dict[str, Any]:
         """
         Read the results of a RASPA calculation.
 
@@ -170,7 +166,16 @@ def read_results(directory: Path | str) -> Results:
         Results
             The RASPA results, formatted as a dictionary.
         """
-        return {"energy": None}
+        output_path = Path(directory) / "Output"
+        systems = Path(output_path).glob("System_*")
+        results = {"energy": None}
+        for system in systems:
+            data_files = Path(system).glob("*.data")
+            results[system.name] = {}
+            for data_file in data_files:
+                output = parse_output(data_file)
+                results[system.name][data_file.name] = output
+        return results
 
     def load_profile(self, cfg, **kwargs) -> RaspaProfile:
         """

diff --git a/src/raspa_ase/utils/io.py b/src/raspa_ase/utils/io.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import re
 from collections.abc import Iterable
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -73,6 +74,171 @@ def write_frameworks(frameworks: list[Atoms], directory: str | Path) -> None:
         structure.to(str(Path(directory, name + ".cif")))
 
 
+def parse_output(filepath: str | Path) -> dict[str, Any]:
+    """
+    Specific parsing of the output file. Adapted from the following:
+    https://github.com/iRASPA/RASPA2/blob/master/python/output_parser.py
+
+    Parameters
+    ----------
+    filepath
+        The path to the RASPA output file.
+
+    Returns
+    -------
+    dict
+        The parsed output data.
+    """
+
+    def _clean(split_list: list[str]) -> list[float]:
+        """Strips and attempts to convert a list of strings to floats."""
+
+        def try_float(s):
+            try:
+                return float(s)
+            except ValueError:
+                return s
+
+        return [try_float(s.strip()) for s in split_list if s]
+
+    with Path(filepath).open(mode="r") as fd:
+        raspa_output = fd.read()
+
+    # Reads the string into a newline-separated list, skipping useless lines
+    data = [
+        row.strip()
+        for row in raspa_output.splitlines()
+        if row and all(d not in row for d in ["-----", "+++++"])
+    ]
+
+    # Generally, categories in the output are delimited by equal signs
+    delimiters = [
+        i
+        for i, row in enumerate(data)
+        if "=====" in row and "Exclusion constraints energy" not in data[i - 1]
+    ]
+
+    # Append a row for "absolute adsorption:" and "excess adsorption:"
+    # These values are separated into two rows
+    abs_adsorp_rows = [i for i, row in enumerate(data) if "absolute adsorption:" in row]
+    for row in abs_adsorp_rows:
+        data[row] += "  " + data[row + 1]
+        data[row + 2] += data[row + 3]
+        data[row + 1], data[row + 3] = " ", " "
+
+    # Use the delimiters to make a high-level dict. Title is row before
+    # delimiter, and content is every row after delimiter, up to the next title
+    info = {
+        data[n - 1].strip(":"): data[n + 1 : delimiters[i + 1] - 1]
+        for i, n in enumerate(delimiters[:-1])
+    }
+
+    # Let's PARSE!
+    for key, values in info.items():
+        d, note_index = {}, 1
+        for item in values:
+            # Takes care of all "Blocks[ #]", skipping hard-to-parse parts
+            if (
+                "Block" in item
+                and "Box-lengths" not in key
+                and "Van der Waals:" not in item
+            ):
+                blocks = _clean(item.split())
+                d["".join(blocks[:2])] = blocks[2:]
+
+            # Most of the average data values are parsed in this section
+            elif (
+                any(s in item for s in ["Average     ", "Surface area:"])
+                and "desorption" not in key
+            ):
+                average_data = _clean(item.split())
+                # Average values organized by its unit, many patterns here
+                if len(average_data) == 8:
+                    del average_data[2:4]
+                    d[" ".join(average_data[4:6])] = average_data[1:4]
+                elif len(average_data) == 5:
+                    d[average_data[-1]] = average_data[1:4]
+                elif "Surface" in average_data[0]:
+                    d[average_data[-1]] = average_data[2:5]
+                # This is the common case
+                else:
+                    del average_data[2]
+                    d[average_data[-1]] = average_data[1:4]
+
+            # Average box-lengths has its own pattern
+            elif "Box-lengths" in key:
+                box_lengths = _clean(item.split())
+                i = 3 if "angle" in item else 2
+                d[" ".join(box_lengths[:i])] = box_lengths[i:]
+
+            # "Heat of Desorption" section
+            elif "desorption" in key:
+                if "Note" in item:
+                    notes = re.split(r"[:\s]{2,}", item)
+                    d["%s %d" % (notes[0], note_index)] = notes[1]
+                    note_index += 1
+                else:
+                    heat_desorp = _clean(item.split())
+                    # One line has "Average" in front, force it to be normal
+                    if "Average" in item:
+                        del heat_desorp[0]
+                    d[heat_desorp[-1]] = heat_desorp[0:3]
+
+            # Parts where Van der Waals are included
+            elif (
+                "Host-" in key or "-Cation" in key or "Adsorbate-Adsorbate" in key
+            ) and "desorption" not in key:
+                van_der = item.split()
+                # First Column
+                if "Block" in van_der[0]:
+                    sub_data = [
+                        _clean(s.split(":")) for s in re.split(r"\s{2,}", item)[1:]
+                    ]
+                    sub_dict = {s[0]: s[1] for s in sub_data[:2]}
+                    d["".join(van_der[:2])] = [float(van_der[2]), sub_dict]
+                # Average for each columns
+                elif "Average" in item:
+                    avg = _clean(re.split(r"\s{2,}", item))
+                    vdw, coulomb = (_clean(s.split(": ")) for s in avg[2:4])
+                    d[avg[0]] = avg[1]
+                    d["Average %s" % vdw[0]] = vdw[1]
+                    d["Average %s" % coulomb[0]] = coulomb[1]
+                else:
+                    d["standard deviation"] = _clean(van_der)
+
+            # IMPORTANT STUFF
+            elif "Number of molecules" in key:
+                adsorb_data = _clean(item.rsplit(" ", 12))
+                if "Component" in item:
+                    gas_name = adsorb_data[2].strip("[]")
+                    d[gas_name] = {}
+                else:
+                    d[gas_name][adsorb_data[0]] = adsorb_data[1:]
+
+            # Henry and Widom
+            elif "Average Widom" in item:
+                d["Widom"] = _clean(item.rsplit(" ", 5))[1:]
+
+            elif "Average Henry" in item:
+                d["Henry"] = _clean(item.rsplit(" ", 5))[1:]
+
+            # Ignore these
+            elif any(
+                s in item
+                for s in ["=====", "Starting simulation", "Finishing simulation"]
+            ):
+                continue
+
+            # Other strings
+            else:
+                parsed_data = _clean(re.split(r"[()[\]:,\t]", item))
+                d[parsed_data[0]] = parsed_data[1:]
+        # Putting subdictionary back into main object
+        info[key] = d
+
+    return info
+
+
 def _iterable_to_str(v: list[Any]) -> str:
     """
     Convert a list to a space-separated string.

diff --git a/tests/conftest.py b/tests/conftest.py
diff --git a/tests/data/MFI_SI.cif b/tests/data/MFI_SI.cif
@@ -0,0 +1,89 @@
+data_MFI
+
+_audit_creation_method RASPA-1.0
+_audit_creation_date 2011-2-17
+_audit_author_name 'David Dubbeldam'
+
+_citation_author_name        'H. van Koningsveld, H. van Bekkum, and J. C. Jansen'
+_citation_title              'On the location and disorder of the tetrapropylammonium (TPA) ion in zeolite ZSM-5 with improved framework accuracy'
+_citation_journal_abbrev     'Acta Cryst.'
+_citation_journal_volume     B43
+_citation_page_first         127
+_citation_page_last          132
+_citation_year               1987
+
+_cell_length_a    20.022
+_cell_length_b    19.899
+_cell_length_c    13.383
+_cell_angle_alpha 90
+_cell_angle_beta  90
+_cell_angle_gamma 90
+_cell_volume      5332.03
+
+_symmetry_cell_setting          orthorhombic
+_symmetry_space_group_name_Hall '-P 2ac 2n'
+_symmetry_space_group_name_H-M  'P n m a'
+_symmetry_Int_Tables_number     62
+
+loop_
+_symmetry_equiv_pos_as_xyz
+ 'x,y,z'
+ '-x+1/2,-y,z+1/2'
+ '-x,y+1/2,-z'
+ 'x+1/2,-y+1/2,-z+1/2'
+ '-x,-y,-z'
+ 'x+1/2,y,-z+1/2'
+ 'x,-y+1/2,z'
+ '-x+1/2,y+1/2,z+1/2'
+
+loop_
+_atom_site_label
+_atom_site_type_symbol
+_atom_site_fract_x
+_atom_site_fract_y
+_atom_site_fract_z
+_atom_site_charge
+_atom_site_polarization
+_atom_site_anisotropic_displacement
+_atom_site_anisotropic_type
+_atom_site_print_to_pdb
+Si1      Si4+   0.42238   0.0565   -0.33598   2.05    0       0    absolute yes
+Si2      Si4+   0.30716   0.02772  -0.1893    2.05    0       0    absolute yes
+Si3      Si4+   0.27911   0.06127   0.0312    2.05    0       0    absolute yes
+Si4      Si4+   0.12215   0.06298   0.0267    2.05    0       0    absolute yes
+Si5      Si4+   0.07128   0.02722  -0.18551   2.05    0       0    absolute yes
+Si6      Si4+   0.18641   0.05896  -0.32818   2.05    0       0    absolute yes
+Si7      Si4+   0.42265  -0.1725   -0.32718   2.05    0       0    absolute yes
+Si8      Si4+   0.30778  -0.13016  -0.18548   2.05    0       0    absolute yes
+Si9      Si4+   0.27554  -0.17279   0.03109   2.05    0       0    absolute yes
+Si10     Si4+   0.12058  -0.1731    0.02979   2.05    0       0    absolute yes
+Si11     Si4+   0.07044  -0.13037  -0.182     2.05    0       0    absolute yes
+Si12     Si4+   0.18706  -0.17327  -0.31933   2.05    0       0    absolute yes
+O1       O2-    0.3726    0.0534   -0.2442   -1.025   0       0    absolute yes
+O2       O2-    0.3084    0.0587   -0.0789   -1.025   0       0    absolute yes
+O3       O2-    0.2007    0.0592    0.0289   -1.025   0       0    absolute yes
+O4       O2-    0.0969    0.0611   -0.0856   -1.025   0       0    absolute yes
+O5       O2-    0.1149    0.0541   -0.2763   -1.025   0       0    absolute yes
+O6       O2-    0.2435    0.0553   -0.246    -1.025   0       0    absolute yes
+O7       O2-    0.3742   -0.1561   -0.2372   -1.025   0       0    absolute yes
+O8       O2-    0.3085   -0.1552   -0.0728   -1.025   0       0    absolute yes
+O9       O2-    0.198    -0.1554    0.0288   -1.025   0       0    absolute yes
+O10      O2-    0.091    -0.1614   -0.0777   -1.025   0       0    absolute yes
+O11      O2-    0.1169   -0.1578   -0.2694   -1.025   0       0    absolute yes
+O12      O2-    0.2448   -0.1594   -0.2422   -1.025   0       0    absolute yes
+O13      O2-    0.3047   -0.051    -0.1866   -1.025   0       0    absolute yes
+O14      O2-    0.0768   -0.0519   -0.1769   -1.025   0       0    absolute yes
+O15      O2-    0.4161    0.1276   -0.3896   -1.025   0       0    absolute yes
+O16      O2-    0.4086   -0.0017   -0.4136   -1.025   0       0    absolute yes
+O17      O2-    0.402    -0.1314   -0.4239   -1.025   0       0    absolute yes
+O18      O2-    0.1886    0.1298   -0.3836   -1.025   0       0    absolute yes
+O19      O2-    0.194     0.0007   -0.4082   -1.025   0       0    absolute yes
+O20      O2-    0.1951   -0.1291   -0.419    -1.025   0       0    absolute yes
+O21      O2-   -0.0037    0.0502   -0.208    -1.025   0       0    absolute yes
+O22      O2-   -0.004    -0.1528   -0.2078   -1.025   0       0    absolute yes
+O23      O2-    0.4192   -0.25     -0.354    -1.025   0       0    absolute yes
+O24      O2-    0.1884   -0.25     -0.3538   -1.025   0       0    absolute yes
+O25      O2-    0.2883   -0.25      0.0579   -1.025   0       0    absolute yes
+O26      O2-    0.1085   -0.25      0.0611   -1.025   0       0    absolute yes
+
+
diff --git a/tests/raspa.out b/tests/raspa.out