diff --git a/src/resfo/_formatted/read.py b/src/resfo/_formatted/read.py index 20448f9..81a4b4a 100644 --- a/src/resfo/_formatted/read.py +++ b/src/resfo/_formatted/read.py @@ -21,7 +21,7 @@ class FormattedArray(ResArray): An array entry in an formatted res file. """ - def __init__(self, stream): + def __init__(self, stream, keys_as_bytes=False): self.start = stream.tell() self.stream = stream @@ -30,6 +30,7 @@ def __init__(self, stream): self._type = None self._data_start = None self._array = None + self._keys_as_bytes = keys_as_bytes self._is_eof = False @@ -107,6 +108,8 @@ def _read_number(self): def _read_keyword(self): self._keyword = self._read_quote_separated() + if self._keys_as_bytes: + self._keyword = self._keyword.encode("ascii") def _read_type(self): self._type = self._read_quote_separated().encode("ascii") diff --git a/src/resfo/_unformatted/read.py b/src/resfo/_unformatted/read.py index 21db765..426e91f 100644 --- a/src/resfo/_unformatted/read.py +++ b/src/resfo/_unformatted/read.py @@ -1,4 +1,5 @@ import io +from abc import abstractmethod import numpy as np import resfo.types as res_types @@ -8,7 +9,7 @@ from resfo.errors import ResfoParsingError -class UnformattedResArray(ResArray): +class _UnformattedResArray(ResArray): """ An array entry in a unformatted res file. """ @@ -77,14 +78,9 @@ def _read_record_marker(self, expected_value): f"Unexpected size of record {value} ({value.to_bytes(4, byteorder='big', signed=True)})" ) + @abstractmethod def _read_keyword(self): - """ - With stream.peek() at the start of the keyword, reads - it into self._keyword - """ - self._keyword = self.stream.read(8).decode("ascii") - if not self._keyword or len(self._keyword) < 8: - raise ResfoParsingError("Reached end-of-file while reading keyword") + pass def _read_type(self): """ @@ -153,3 +149,33 @@ def _read(self): self._data_start = self.stream.tell() self.stream.seek(bytes_to_skip, io.SEEK_CUR) + + +class UnformattedResArray(_UnformattedResArray): + """ + An array entry in a unformatted res file. + """ + + def _read_keyword(self): + """ + With stream.peek() at the start of the keyword, reads + it into self._keyword + """ + self._keyword = self.stream.read(8).decode("ascii") + if not self._keyword or len(self._keyword) < 8: + raise ResfoParsingError("Reached end-of-file while reading keyword") + + +class UnformattedResArrayBytes(_UnformattedResArray): + """ + An array entry in a unformatted res file. + """ + + def _read_keyword(self): + """ + With stream.peek() at the start of the keyword, reads + it into self._keyword + """ + self._keyword = self.stream.read(8) + if not self._keyword or len(self._keyword) < 8: + raise ResfoParsingError("Reached end-of-file while reading keyword") diff --git a/src/resfo/array_entry.py b/src/resfo/array_entry.py index fd85897..70cc994 100644 --- a/src/resfo/array_entry.py +++ b/src/resfo/array_entry.py @@ -108,18 +108,18 @@ def _read(self): pass @classmethod - def parse(cls, stream): + def parse(cls, stream, *args, **kwargs): """ Parse an res file from the given opened file handle. Is a generator of ResArrays """ - record = cls(stream) + record = cls(stream, *args, **kwargs) record._read() while not record._is_eof: - new_record = cls(stream) + new_record = cls(stream, *args, **kwargs) yield record new_record._read() record = new_record diff --git a/src/resfo/read.py b/src/resfo/read.py index 757b07e..46b2f34 100644 --- a/src/resfo/read.py +++ b/src/resfo/read.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple from resfo._formatted.read import FormattedArray -from resfo._unformatted.read import UnformattedResArray +from resfo._unformatted.read import UnformattedResArray, UnformattedResArrayBytes from resfo.array_entry import ResArray from resfo.format import Format, check_correct_mode, get_stream, guess_format @@ -20,7 +20,9 @@ def read(*args, **kwargs) -> List[Tuple[str, "ReadArrayValue"]]: ] -def lazy_read(filelike, fileformat: Optional[Format] = None) -> Iterator[ResArray]: +def lazy_read( + filelike, fileformat: Optional[Format] = None, keys_as_bytes: bool = False +) -> Iterator[ResArray]: """ Reads the contents of an res file and generates the entries of that file. Each entry has a entry.read_keyword() and @@ -42,6 +44,10 @@ def lazy_read(filelike, fileformat: Optional[Format] = None) -> Iterator[ResArra :param fileformat: Either resfo.Format.FORMATTED for ascii format, resfo.Format.UNFORMATTED for binary formatted files or None for guess. + :param keys_as_bytes: If fileformat=resf.Format.FORMATTED is given, + it is possible to request that the bytes of the keyword is given + undecoded. This is somewhat faster so can be useful for + files with many keywords. :raises resfo.ResfoParsingError: If the file is not a valid res file. @@ -62,9 +68,12 @@ def lazy_read(filelike, fileformat: Optional[Format] = None) -> Iterator[ResArra try: if fileformat == Format.FORMATTED: - yield from FormattedArray.parse(stream) + yield from FormattedArray.parse(stream, keys_as_bytes=keys_as_bytes) else: - yield from UnformattedResArray.parse(stream) + if keys_as_bytes: + yield from UnformattedResArrayBytes.parse(stream) + else: + yield from UnformattedResArray.parse(stream) finally: if didopen: stream.close()