esoreader.py

# This file is licensed under the terms of the MIT license. See the file
# "LICENSE" in the project root for more information.
#
# This module was developed by Daren Thomas at the assistant chair for
# Sustainable Architecture and Building Technologies (Suat) at the Institute of
# Technology in Architecture, ETH Zuerich. See http://suat.arch.ethz.ch for
# more information.

'''
esoreader.py

A python module for reading \*.eso files generated by EnergyPlus

The eso files generated by EnergyPlus contains a data dictionary, which
describes the values reported by EnergyPlus. The list of values reported
depends on the simulation input file, specifically the Output:Variable
objects. EnergyPlus can output the same variable at different
frequencies and for different "keys", which are for instance surfaces or
equipment names.

Following the data dictionary is a list of output variable values for
each of the configured variable coordinates.

The output of the esoreader module is therefore a data dictionary object
that contains a mapping of variable "coordinates" (grouping of reporting
frequency, key and variable name) to the index used by EnergyPlus and a
data object, which essentially just maps that index to the timeseries
data.

Example
=======

New interface:

    import esoreader
    PATH_TO_ESO = r'/Path/To/EnergyPlus/Output/eplusout.eso'
    eso = esoreader.read_from_path(PATH_TO_ESO)
    df = eso.to_frame('total heat loss energy')  #  pandas.DataFrame


Old interface: (still works)
::

    import esoreader

    PATH_TO_ESO = r'/Path/To/EnergyPlus/Output/eplusout.eso'
    dd, data = esoreader.read(PATH_TO_ESO)
    frequency, key, variable = dd.find_variable(
        'Zone Ventilation Total Heat Loss Energy')[0]
    idx = dd.index[frequency, key, variable]
    time_series = data[idx]
'''


def read(eso_file_path):
    """Read in an .eso file and return the data dictionary and a dictionary
    representing the data.
    NOTE: this function is here for backward compatibilty reasons. Use
    read_from_path() instead to obtain an EsoFile object.
    """
    eso = read_from_path(eso_file_path)
    return eso.dd, eso.data


def read_from_path(eso_file_path):
    """
    read in a .eso file and return an EsoFile object that can be used
    to read in pandas DataFrame and Series objects.
    """
    with open(eso_file_path, 'r') as eso_file:
        eso = EsoFile(eso_file)
    return eso


class DataDictionary(object):
    def __init__(self, version=None, timestamp=None):
        '''
        variables = dict of ids, int => [reporting_frequency,
                                         key, variable, unit]

        index = dict {(key, variable, reporting_frequency) => id)}
        '''
        self.version = version
        self.timestamp = timestamp
        self.variables = {}
        self.index = {}

    def build_index(self):
        """builds a reverse index for finding ids.
        """
        for id, value in self.variables.items():
            reporting_frequency, key, variable, unit = value
            self.index[reporting_frequency, key, variable] = id

    def find_variable(self, search):
        """returns the coordinates (timestep, key, variable_name) in the
        data dictionary that can be used to find an index. The search is case
        insensitive."""
        return [(timestep, key, variable_name)
                for timestep, key, variable_name in self.index.keys()
                if search.lower() in variable_name.lower()]


class EsoFile(object):

    def __init__(self, eso_file):
        self.eso_file = eso_file
        self.dd = self._read_data_dictionary()
        self.dd.build_index()
        self.data = self._read_data()

    def find_variable(self, search, key=None, frequency='TimeStep'):
        """returns the coordinates (timestep, key, variable_name) in the
        data dictionary that can be used to find an index. The search is case
        insensitive and need only be specified partially."""
        variables = self.dd.find_variable(search)
        variables = [v for v in variables
                     if v[0].lower() == frequency.lower()]
        if key:
            variables = [v for v in variables
                         if v[1].lower() == key.lower()]
        return variables

    def to_frame(self, search, key=None, frequency='TimeStep', index=None, use_key_for_columns=True):
        """
        creates a pandas DataFrame objects with a column for every variable
        that matches the search pattern and key. An None key matches all keys.
        NOTE: The frequency *has* to be the same for all variables selected.
        (uses find_variable to select the variables)
        """
        from pandas import DataFrame
        variables = self.find_variable(search, key=key, frequency=frequency)
        if use_key_for_columns:
            data = {v[1]: self.data[self.dd.index[v]] for v in variables}
        else:
            # use variable name as column name
            data = {v[2]: self.data[self.dd.index[v]] for v in variables}
        df = DataFrame(data)
        if index is not None:
            df.index = index
        return df

    def _read_reporting_frequency(self, line):
        reporting_frequency = None
        if '! ' in line:
            line = line.split('! ')[0]
        if ' !' in line:
            line, reporting_frequency = line.split(' !')
            # RunPeriod contains more stuff (" [Value,Min,Month,Day,Hour,
            # Minute, Max,Month,Day,Hour,Minute]")split it off
            reporting_frequency = reporting_frequency.split()[0]
        return line, reporting_frequency

    def _read_variable_unit(self, variable):
        unit = None
        if '[' in variable:
            variable, unit = variable.split('[')
            unit = unit[:-1]  # remove ']' at the end
            variable = variable.strip()
        return variable, unit

    def _read_data_dictionary(self):
        """parses the head of the eso_file, returning the data dictionary.
        the file object eso_file is advanced to the position needed by
        read_data.
        """
        version, timestamp = [s.strip() for s
                              in self.eso_file.readline().split(',')[-2:]]
        dd = DataDictionary(version, timestamp)
        line = self.eso_file.readline().strip()
        while line != 'End of Data Dictionary':
            line, reporting_frequency = self._read_reporting_frequency(line)
            if reporting_frequency:
                fields = [f.strip() for f in line.split(',')]
                if len(fields) >= 4:
                    id, nfields, key, variable = fields[:4]
                else:
                    id, nfields, variable = fields[:3]
                    key = None
                variable, unit = self._read_variable_unit(variable)
                dd.variables[int(id)] = [reporting_frequency, key,
                                         variable, unit]
            else:
                # ignore the lines that aren't report variables
                pass
            line = self.eso_file.readline().strip()
        dd.ids = set(dd.variables.keys())
        return dd

    def _read_data(self):
        '''parse the data from the .eso file returning,
        NOTE: eso_file should be the same file object that was passed to
        read_data_dictionary(eso_file) to obtain dd.'''
        data = {}  # id => [value]
        for id in self.dd.variables.keys():
            data[id] = []
        for line in self.eso_file:
            if line.startswith('End of Data'):
                break
            fields = [f.strip() for f in line.split(',')]
            id = int(fields[0])
            if id not in self.dd.ids:
                # skip entries that are not output:variables
                continue
            data[id].append(float(fields[1]))
        return data