Skip to content

Commit

Permalink
Optimize summary reading
Browse files Browse the repository at this point in the history
keys_as_bytes is dependent on equinor/resfo#52
  • Loading branch information
eivindjahren committed Jan 4, 2024
1 parent 4526a60 commit 7a5f0fb
Showing 1 changed file with 33 additions and 35 deletions.
68 changes: 33 additions & 35 deletions src/ert/config/_read_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,8 @@ def read_summary(
filepath: str, fetch_keys: Sequence[str]
) -> Tuple[List[str], Sequence[datetime], Any]:
summary, spec = _get_summary_filenames(filepath)
date_index, start_date, date_units, keys, key_indecies = _read_spec(
spec, fetch_keys
)
fetched, time_map = _read_summary(
summary, start_date, date_units, key_indecies, date_index
)
date_index, start_date, date_units, keys, mask = _read_spec(spec, fetch_keys)
fetched, time_map = _read_summary(summary, start_date, date_units, mask, date_index)

return (keys, time_map, fetched)

Expand All @@ -278,23 +274,23 @@ def _key2str(key: Union[bytes, str]) -> str:

def _read_spec(
spec: str, fetch_keys: Sequence[str]
) -> Tuple[int, datetime, DateUnit, List[str], List[int]]:
) -> Tuple[int, datetime, DateUnit, List[str], npt.NDArray[Any]]:
date = None
n = None
nx = None
ny = None

arrays: Dict[str, Optional[npt.NDArray[Any]]] = {
arrays: Dict[bytes, Optional[npt.NDArray[Any]]] = {
kw: None
for kw in [
"WGNAMES",
"NUMS",
"KEYWORDS",
"NUMLX",
"NUMLY",
"NUMLZ",
"LGRNAMES",
"UNITS",
b"WGNAMES ",
b"NUMS ",
b"KEYWORDS",
b"NUMLX ",
b"NUMLY ",
b"NUMLZ ",
b"LGRNAMES",
b"UNITS ",
]
}

Expand All @@ -306,7 +302,7 @@ def _read_spec(
format = resfo.Format.UNFORMATTED

with open(spec, mode) as fp:
for entry in resfo.lazy_read(fp, format):
for entry in resfo.lazy_read(fp, format, keys_as_bytes=True):
if all(
p is not None
for p in (
Expand All @@ -320,21 +316,21 @@ def _read_spec(
)
):
break
kw = _key2str(entry.read_keyword())
kw = entry.read_keyword()
if kw in arrays:
vals = entry.read_array()
if vals is resfo.MESS or isinstance(vals, resfo.MESS):
raise ValueError(f"{kw} in {spec} was MESS")
arrays[kw] = vals
if kw == "DIMENS":
if kw == b"DIMENS ":
vals = entry.read_array()
if vals is resfo.MESS or isinstance(vals, resfo.MESS):
raise ValueError(f"DIMENS in {spec} was MESS")
size = len(vals)
n = vals[0] if size > 0 else None
nx = vals[1] if size > 1 else None
ny = vals[2] if size > 2 else None
if kw == "STARTDAT":
if kw == b"STARTDAT":
vals = entry.read_array()
if vals is resfo.MESS or isinstance(vals, resfo.MESS):
raise ValueError(f"Startdate in {spec} was MESS")
Expand All @@ -354,13 +350,13 @@ def _read_spec(
second=microsecond // 10**6,
microsecond=microsecond % 10**6,
)
keywords = arrays["KEYWORDS"]
wgnames = arrays["WGNAMES"]
nums = arrays["NUMS"]
numlx = arrays["NUMLX"]
numly = arrays["NUMLY"]
numlz = arrays["NUMLZ"]
lgr_names = arrays["LGRNAMES"]
keywords = arrays[b"KEYWORDS"]
wgnames = arrays[b"WGNAMES "]
nums = arrays[b"NUMS "]
numlx = arrays[b"NUMLX "]
numly = arrays[b"NUMLY "]
numlz = arrays[b"NUMLZ "]
lgr_names = arrays[b"LGRNAMES"]

if date is None:
raise ValueError(f"keyword startdat missing in {spec}")
Expand Down Expand Up @@ -407,22 +403,24 @@ def optional_get(arr: Optional[npt.NDArray[Any]], idx: int) -> Any:
indices.append(i)
keys.append(key)

units = arrays["UNITS"]
mask = np.in1d(np.arange(n), indices)

units = arrays[b"UNITS "]
if units is None:
raise ValueError(f"keyword units missing in {spec}")
if date_index is None:
raise ValueError(f"KEYWORDS did not contain TIME in {spec}")
if date_index >= len(units):
raise ValueError(f"Unit missing for TIME in {spec}")

return date_index, date, DateUnit[_key2str(units[date_index])], keys, indices
return date_index, date, DateUnit[_key2str(units[date_index])], keys, mask


def _read_summary(
summary: str,
start_date: datetime,
unit: DateUnit,
indices: List[int],
mask: npt.NDArray[Any],
date_index: int,
) -> Tuple[npt.NDArray[np.float32], List[datetime]]:
if summary.lower().endswith("funsmry"):
Expand All @@ -442,16 +440,16 @@ def read_params():
vals = last_params.read_array()
if vals is resfo.MESS or isinstance(vals, resfo.MESS):
raise ValueError(f"PARAMS in {summary} was MESS")
values.append(vals[indices])
values.append(vals[mask])
dates.append(start_date + unit.make_delta(float(vals[date_index])))
last_params = None

with open(summary, mode) as fp:
for entry in resfo.lazy_read(fp, format):
kw = _key2str(entry.read_keyword())
if kw == "PARAMS":
for entry in resfo.lazy_read(fp, format, keys_as_bytes=True):
kw = entry.read_keyword()
if kw == b"PARAMS ":
last_params = entry
if kw == "SEQHDR":
if kw == b"SEQHDR ":
read_params()
read_params()
return np.array(values).T, dates
Expand Down

0 comments on commit 7a5f0fb

Please sign in to comment.