Skip to content

Commit

Permalink
Added Info attributes to ascii class files
Browse files Browse the repository at this point in the history
Now all ascii files will contain the info attributes.

This enables the class to dynamically add attributes
based on the content of Sile._info_attributes_.

Each of those entries will be converted to a regex Pattern
and will process on each read line by patching the instance
readline function.

This is very crude, and might be slow for very big files with lots
of attributes, perhaps it can be streamlined a bit by removing those
attributes which are static and already found (or perhaps all non-updatable
attributes will be removed for searching once the full file has been processed).

It proves quite simple to use and makes the code look prettier and
more readable. But the internals are a bit messy.

Signed-off-by: Nick Papior <[email protected]>
  • Loading branch information
zerothi committed Oct 26, 2023
1 parent c2ebad7 commit 6428dee
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 64 deletions.
83 changes: 22 additions & 61 deletions src/sisl/io/orca/stdout.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
import numpy as np
from functools import partial

from sisl._internal import set_module
from sisl.messages import deprecation
Expand All @@ -15,80 +16,39 @@
__all__ = ["outputSileORCA", "stdoutSileORCA"]


_A = partial(SileORCA._Attr, updatable=False)


@set_module("sisl.io.orca")
class stdoutSileORCA(SileORCA):
""" Output file from ORCA """

def _setup(self, *args, **kwargs):
""" Ensure the class has essential tags """
super()._setup(*args, **kwargs)
self._completed = None
self._na = None
self._no = None
self._vdw = None

def readline(self, *args, **kwargs):
line = super().readline(*args, **kwargs)
if self._completed is None and "ORCA TERMINATED NORMALLY" in line:
self._completed = True
elif self._completed is None and line == '':
self._completed = False
elif self._na is None and "Number of atoms" in line:
v = line.split()
self._na = int(v[-1])
elif self._no is None and "Number of basis functions" in line:
v = line.split()
self._no = int(v[-1])
elif self._vdw is None and "DFT DISPERSION CORRECTION" in line:
self._vdw = True
return line

readline.__doc__ = SileORCA.readline.__doc__
_info_attributes_ = [
_A("na", r".*Number of atoms",
lambda attr, match: int(match.string.split()[-1])),
_A("no", r".*Number of basis functions",
lambda attr, match: int(match.string.split()[-1])),
_A("_vdw_", r".*DFT DISPERSION CORRECTION",
lambda attr, match: True, default=False),
_A("completed", r".*ORCA TERMINATED NORMALLY",
lambda attr, match: True, default=False),
]

def completed(self):
""" True if the full file has been read and "ORCA TERMINATED NORMALLY" was found. """
if self._completed is None:
with self:
completed = self.step_to("ORCA TERMINATED NORMALLY")[0]
else:
completed = self._completed
if completed:
self._completed = True
return completed
return self.info.completed

@property
@deprecation("stdoutSileORCA.na is deprecated in favor of stdoutSileORCA.info.na", "0.16.0")
def na(self):
""" Number of atoms """
if self._na is None:
with self:
f = self.step_to("Number of atoms")
if f[0]:
self._na = int(f[1].split()[-1])
return self._na
return self.info.na

@property
@deprecation("stdoutSileORCA.no is deprecated in favor of stdoutSileORCA.info.no", "0.16.0")
def no(self):
""" Number of orbitals (basis functions) """
if self._no is None:
with self:
f = self.step_to("Number of basis functions")
if f[0]:
self._no = int(f[1].split()[-1])
return self._no

@property
def _vdw_(self):
""" Whether VDW dispersions are included """
if self._vdw is None:
old_line = None
if hasattr(self, "fh"):
old_line = self.fh.tell()
with self:
f = self.step_to("DFT DISPERSION CORRECTION")
self._vdw = f[0]
if old_line is not None:
self.fh.seek(old_line)
return self._vdw
return self.info.no

@SileBinder(postprocess=np.array)
@sile_fh_open()
Expand Down Expand Up @@ -306,9 +266,10 @@ def read_energy(self):
E["embedding"] = float(v[-2]) * Ha2eV
line = self.readline()

if self._vdw_:
if self.info._vdw_:
self.step_to("DFT DISPERSION CORRECTION")
v = self.step_to("Dispersion correction", allow_reread=False)[1].split()
print("vdW", v, self.info._vdw_)
E["vdw"] = float(v[-1]) * Ha2eV

return E
Expand Down Expand Up @@ -355,7 +316,7 @@ def read_orbital_energies(self):
return E


outputSileORCA = deprecation("outputSileORCA has been deprecated in favor of outSileOrca.", "0.15")(stdoutSileORCA)
outputSileORCA = deprecation("outputSileORCA has been deprecated in favor of stdoutSileOrca.", "0.15")(stdoutSileORCA)

add_sile("output", stdoutSileORCA, gzip=True, case=False)
add_sile("orca.out", stdoutSileORCA, gzip=True, case=False)
Expand Down
2 changes: 2 additions & 0 deletions src/sisl/io/orca/tests/test_stdout.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ def test_charge_orbital_reduced_unpol(sisl_files):
assert C[0] == 0.315910
assert S is None

@pytest.mark.only
def test_charge_orbital_full_unpol(sisl_files):
f = sisl_files(_dir, 'molecule2.output')
out = stdoutSileORCA(f)
Expand All @@ -253,6 +254,7 @@ def test_charge_orbital_full_unpol(sisl_files):
assert C is None
assert S is None

@pytest.mark.only
def test_read_energy(sisl_files):
f = sisl_files(_dir, 'molecule.output')
out = stdoutSileORCA(f)
Expand Down
191 changes: 188 additions & 3 deletions src/sisl/io/sile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
from operator import and_, contains
from os.path import basename, splitext
from pathlib import Path
from typing import Any, Callable, Optional
from typing import Any, Callable, Optional, Union
from textwrap import dedent, indent
import re

from sisl._environ import get_environ_variable
from sisl._internal import set_module
from sisl.messages import SislInfo, SislWarning, deprecate
from sisl.messages import SislInfo, SislWarning, deprecate, info
from sisl.utils.misc import str_spec

from ._help import *
Expand Down Expand Up @@ -716,7 +718,190 @@ def close(self):


@set_module("sisl.io")
class Sile(BaseSile):
class Info:
""" An info class that creates .info with inherent properties
These properties can be added at will.
"""

# default to be empty
_info_attributes_ = []

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.info = _Info(self)

class _Info:
""" The actual .info object that will attached to the instance.
As of know this is problematic to document.
We should figure out a way to do that.
"""

def __init__(self, instance):
# attach this info instance to the instance
self._instance = instance
self._attrs = []
self._properties = []

# Patch once the properties has been created

# Patch the readline of the instance
def patch(info):
# grab the function to be patched
instance = info._instance
properties = info._properties
func = instance.readline

@wraps(func)
def readline(*args, **kwargs):
line = func(*args, **kwargs)
for prop in properties:
prop.process(line)
return line
return readline

self._instance.readline = patch(self)

# add the properties
for prop in instance._info_attributes_:
if isinstance(prop, dict):
prop = _Attr(**prop)
else:
prop = prop.copy()
self.add_property(prop)

def add_property(self, prop):
""" Add a new property to be reachable from the .info """
self._attrs.append(prop.attr)
self._properties.append(prop)

def __str__(self):
""" Return a string of the contained attributes, with the values they currently contain """
return "\n".join([p.documentation() for p in self._properties])

def __getattr__(self, attr):
""" Overwrite the attribute retrieval to be able to fetch the actual values from the information """
inst = self._instance
if attr not in self._attrs:
raise AttributeError(f"{inst.__class__.__name__}.info.{attr} does not exist, did you mistype?")

idx = self._attrs.index(attr)
prop = self._properties[idx]
if prop.found:
# only when hitting the new line will this change...
return prop.value

# we need to parse the rest of the file
# This is not ideal, but...
loc = None
try:
loc = inst.fh.tell()
except AttributeError:
pass
with inst:
line = inst.readline()
while not (prop.found or line == ''):
line = inst.readline()
if loc is not None:
inst.fh.seek(loc)

if not prop.found:
# TODO see if this should just be a warning? Perhaps it would be ok that it can't be
# found.
info(f"Attribute {attr} could not be found in {inst}")

return prop.value



class _Attr:
""" Holder for parsing lines and extracting information from text files
This consists of:
attr:
the name of the attribute
This will be the `sile.info.<name>` access point.
regex:
the regular expression used to match a line.
If a `str`, it will be compiled *as is* to a regex pattern.
`regex.match(line)` will be used to check if the value should be updated.
parser:
if `regex.match(line)` returns a match that is true, then this parser will
be executed.
The parser *must* be a function accepting two arguments:
def parser(attr, match)
where `attr` is this object, and `match` is the match done on the line.
(Note that `match.string` will return the full line used to match against).
updatable:
control whether a new match on the line will update using `parser`.
If false, only the first match will update the value
default:
the default value of the attribute
found:
whether the value has been found in the file.
"""
__slots__ = ("attr", "regex", "parser", "updatable", "value", "found", "doc")

def __init__(self,
attr: str,
regex: Union[str, re.Pattern],
parser,
doc: str="",
updatable: bool=True,
default: Optional[Any]=None,
found: bool=False,
):
self.attr = attr
if isinstance(regex, str):
regex = re.compile(regex)
self.regex = regex
self.parser = parser
self.updatable = updatable
self.value = default
self.found = found
self.doc = doc

def process(self, line):
if self.found and not self.updatable:
return False

match = self.regex.match(line)
if match:
self.value = self.parser(self, match)
#print(f"found {self.attr}={self.value} with {line}")
self.found = True
return True

return False

def copy(self):
return self.__class__(attr=self.attr,
regex=self.regex,
parser=self.parser,
doc=self.doc,
updatable=self.updatable,
default=self.value,
found=self.found)

def documentation(self):
""" Returns a documentation string for this object """
if self.doc:
doc = "\n" + indent(dedent(self.doc), " " * 4)
else:
doc = ""
return f"{self.attr}[{self.value}]: r'{self.regex.pattern}'{doc}"

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.info = self._Info(self)


@set_module("sisl.io")
class Sile(Info, BaseSile):
""" Base class for ASCII files
All ASCII files that needs to be added to the global lookup table can
Expand Down

0 comments on commit 6428dee

Please sign in to comment.