From 28dba41d21907ada8040010c8fb42e5b6a0bdaf1 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Thu, 18 Feb 2021 09:13:39 -0700 Subject: [PATCH 01/13] Updated RTS-GMLC parser. New parser is targeted at reading a full set of data in RTS-GMLC format, and then generating multiple Egret models for various time spans within the read-in data. --- egret/parsers/__init__.py | 1 + egret/parsers/rts_gmlc/_reserves.py | 27 + egret/parsers/rts_gmlc/parsed_cache.py | 147 +++++ egret/parsers/rts_gmlc/parser.py | 744 ++++++++++++++++++++++++ egret/parsers/rts_gmlc_parser.py | 745 ------------------------- 5 files changed, 919 insertions(+), 745 deletions(-) create mode 100644 egret/parsers/rts_gmlc/_reserves.py create mode 100644 egret/parsers/rts_gmlc/parsed_cache.py create mode 100644 egret/parsers/rts_gmlc/parser.py delete mode 100644 egret/parsers/rts_gmlc_parser.py diff --git a/egret/parsers/__init__.py b/egret/parsers/__init__.py index 1f8378ce..bbb02141 100644 --- a/egret/parsers/__init__.py +++ b/egret/parsers/__init__.py @@ -7,3 +7,4 @@ # This software is distributed under the Revised BSD License. # ___________________________________________________________________________ +from .rts_gmlc import parser as rts_gmlc_parser diff --git a/egret/parsers/rts_gmlc/_reserves.py b/egret/parsers/rts_gmlc/_reserves.py new file mode 100644 index 00000000..d25dd12c --- /dev/null +++ b/egret/parsers/rts_gmlc/_reserves.py @@ -0,0 +1,27 @@ +# ___________________________________________________________________________ +# +# EGRET: Electrical Grid Research and Engineering Tools +# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +# Government retains certain rights in this software. +# This software is distributed under the Revised BSD License. +# ___________________________________________________________________________ + +from __future__ import annotations + +reserve_name_map = { + 'Spin_Up': 'spinning_reserve_requirement', + 'Reg_Up': 'regulation_up_requirement', + 'Reg_Down': 'regulation_down_requirement', + 'Flex_Up': 'flexible_ramp_up_requirement', + 'Flex_Down': 'flexible_ramp_down_requirement' +} + +def is_valid_reserve_name(name:str, model_dict:dict=None): + if name in reserve_name_map: + return True + if name.find('_R') < 0: + return False + res, area = name.split('_R', 1) + return (res in reserve_name_map) and \ + ((model_dict is None) or (area in model_dict['elements']['area'])) diff --git a/egret/parsers/rts_gmlc/parsed_cache.py b/egret/parsers/rts_gmlc/parsed_cache.py new file mode 100644 index 00000000..429fa326 --- /dev/null +++ b/egret/parsers/rts_gmlc/parsed_cache.py @@ -0,0 +1,147 @@ +# ___________________________________________________________________________ +# +# EGRET: Electrical Grid Research and Engineering Tools +# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +# Government retains certain rights in this software. +# This software is distributed under the Revised BSD License. +# ___________________________________________________________________________ + +from __future__ import annotations + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from typing import Dict + from pandas import DataFrame + from datetime import datetime + +import copy + +from egret.data.model_data import ModelData + +from ._reserves import reserve_name_map + +class ParsedCache(): + + def __init__(self, model_skeleton:dict, + begin_time:datetime, end_time:datetime, + minutes_per_day_ahead_period:int, minutes_per_real_time_period:int, + timeseries_data:DataFrame, + load_participation_factors:Dict[str,float]): + self.skeleton = model_skeleton + self.begin_time = begin_time + self.end_time = end_time + self.timeseries_df = timeseries_data + self.minutes_per_period = { + 'DAY_AHEAD': minutes_per_day_ahead_period, + 'REAL_TIME': minutes_per_real_time_period + } + self.load_participation_factors = load_participation_factors + + # Find and save the index of the first row of each sim type in timeseries_df + cur_sim = self.timeseries_df['Simulation'][0] + self._first_indices = {cur_sim:0} + for i in range(1,len(self.timeseries_df)): + if self.timeseries_df['Simulation'].iat[i] != cur_sim: + cur_sim = self.timeseries_df['Simulation'].iat[i] + self._first_indices[cur_sim] = i + + + def generate_model(self, simulation_type:str, begin_time:datetime, end_time:datetime) -> ModelData: + md = copy.deepcopy(self.skeleton) + self._process_timeseries_data(md, simulation_type, begin_time, end_time) + self._insert_system_data(md, simulation_type, begin_time, end_time) + return ModelData(md) + + def _process_timeseries_data(self, md:dict, simulation_type:str, + begin_time:datetime, end_time:datetime) -> None: + df = self.timeseries_df + + # Go through each timeseries value for this simulation type + for i in range(self._first_indices[simulation_type], len(df)): + if df.iat[i, df.columns.get_loc('Simulation')] != simulation_type: + break + + category = df.iat[i, df.columns.get_loc('Category')] + + if category == 'Generator': + self._process_generator_timeseries(md, begin_time, end_time, i) + elif category == 'Area': + self._process_area_timeseries(md, begin_time, end_time, i) + elif category == 'Reserve': + self._process_reserve_timeseries(md, begin_time, end_time, i) + + def _process_generator_timeseries(self, md:dict, begin_time:datetime, + end_time:datetime, df_index:int): + df = self.timeseries_df + i = df_index + gen_name = df.iat[i, df.columns.get_loc('Object')] + gen_dict = md['elements']['generator'][gen_name] + param = df.iat[i, df.columns.get_loc('Parameter')] + data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time].to_list() + + if param == 'PMin MW': + gen_dict['p_min'] = { 'data_type': 'time_series', + 'values' : data } + elif param == 'PMax MW': + gen_dict['p_max'] = { 'data_type': 'time_series', + 'values' : data } + else: + raise ValueError(f"Unexpected generator timeseries data: {param}") + + def _process_area_timeseries(self, md:dict, begin_time:datetime, + end_time:datetime, df_index:int): + df = self.timeseries_df + i = df_index + + area_name = df.iat[i, df.columns.get_loc('Object')] + param = df.iat[i, df.columns.get_loc('Parameter')] + assert(param == "MW Load") + data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time] + + for bus, load_dict in md['elements']['load'].items(): + # Skip loads from other areas + if load_dict['area'] != area_name: + continue + + # Replace skeleton's p_load with the timeseries data, scaled by the load's + # portion of the area's total load. + # Also replace q_load, if present, with timeseries + p_factor = self.load_participation_factors[bus] + # save skeleton's scalar p_load + p_load = load_dict['p_load'] if 'p_load' in load_dict else None + # overwrite skeleton's p_load with timeseries + load_dict['p_load'] = { 'data_type': 'time_series', + 'values' : [v*p_factor for v in data] } + if p_load is not None and 'q_load' in load_dict: + q_over_p = load_dict['q_load'] / p_load + load_dict['q_load'] = { 'data_type': 'time_series', + 'values' : [v*q_over_p for v in load_dict['p_load']['values']] } + + def _process_reserve_timeseries(self, md:dict, begin_time:datetime, + end_time:datetime, df_index:int): + df = self.timeseries_df + i = df_index + + res_name = df.iat[i, df.columns.get_loc('Object')] + + if res_name in reserve_name_map: + target_dict = md['system'] + else: + # reserve name must be _R, + # split into type and area + res_name, area_name = res_name.split("_R", 1) + target_dict = md['elements']['area'][area_name] + + data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time] + target_dict[reserve_name_map[res_name]] = { 'data_type': 'time_series', + 'values' : data.to_list() } + + def _insert_system_data(self, md:dict, simulation_type:str, + begin_time:datetime, end_time:datetime): + md['system']['time_period_length_minutes'] = self.minutes_per_period[simulation_type] + + df = self.timeseries_df + sample_df = df.iat[self._first_indices[simulation_type], df.columns.get_loc('Series')] + dates = sample_df[begin_time:end_time].index + md['system']['time_keys'] = [dt.strftime('%Y-%m-%d %H:%M') for dt in dates] diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py new file mode 100644 index 00000000..6d6874f4 --- /dev/null +++ b/egret/parsers/rts_gmlc/parser.py @@ -0,0 +1,744 @@ +# ___________________________________________________________________________ +# +# EGRET: Electrical Grid Research and Engineering Tools +# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +# Government retains certain rights in this software. +# This software is distributed under the Revised BSD License. +# ___________________________________________________________________________ + +from __future__ import annotations + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from typing import Dict, Union + +import os.path +import pandas as pd +from datetime import datetime, timedelta +from collections import namedtuple + +import egret.data.model_data as md + +from .parsed_cache import ParsedCache +from ._reserves import is_valid_reserve_name + +def create_ModelData(rts_gmlc_dir:str, + begin_time:Union[datetime,str], end_time:Union[datetime,str], + simulation:str="DAY_AHEAD", t0_state:dict = None): + + """ + Create a ModelData object from the RTS-GMLC data. + + Parameters + ---------- + rts_gmlc_dir : str + Path to RTS-GMLC directory + begin_time : datetime.datetime or str + Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, + the later of which assumes a midnight start. + end_time : datetime.datetime or str + End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, + the later of which assumes a midnight start. + simulation : str + Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, + default is "DAY_AHEAD". + t0_state : dict or Nonetype + Keys of this dict are thermal generator names, each element of which is another dictionary with + keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the + generator is on at t0, the real power output at t0, and the reactive power output at t0. + If this is None, default values are loaded. + + Returns + ------- + egret.model_data.ModelData + Returns a ModelData object with the timeseries data specified + """ + return md.ModelData(create_model_data_dict(rts_gmlc_dir, begin_time, end_time, simulation, t0_state)) + +def create_model_data_dict(rts_gmlc_dir:str, + begin_time:Union[datetime,str], end_time:Union[datetime,str], + simulation:str="DAY_AHEAD", t0_state:dict = None): + + """ + Create a model_data dictionary from the RTS-GMLC data. + + Parameters + ---------- + rts_gmlc_dir : str + Path to RTS-GMLC directory + begin_time : datetime.datetime or str + Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, + the later of which assumes a midnight start. + end_time : datetime.datetime or str + End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, + the later of which assumes a midnight start. + simulation : str + Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, + default is "DAY_AHEAD". + t0_state : dict or Nonetype + Keys of this dict are thermal generator names, each element of which is another dictionary with + keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the + generator is on at t0, the real power output at t0, and the reactive power output at t0. + If this is None, default values are loaded. + + Returns + ------- + dict : A dictionary in the format required for the ModelData object. + """ + cache = parse_to_cache(rts_gmlc_dir, begin_time, end_time) + model = cache.generate_model(simulation, begin_time, end_time) + if t0_state is not None: + for name, gen in model['elements']['generator']: + if gen['generator_type']=='thermal': + gen['initial_status'] = t0_state[name]['initial_status'] + gen['initial_p_output'] = t0_state[name]['initial_p_output'] + gen['initial_q_output'] = t0_state[name]['initial_q_output'] + return model + + +def parse_to_cache(rts_gmlc_dir:str, + begin_time:datetime, + end_time:datetime) -> ParsedCache: + ''' Parse data in RTS-GMLC format, keeping the portions between a start and end time + + rts_gmlc_dir : str + Path to RTS-GMLC directory + begin_time : datetime.datetime or str + Beginning of time horizon. + end_time : datetime.datetime or str + End of time horizon. + simulation : str + Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, + default is "DAY_AHEAD". + ''' + if not os.path.exists(rts_gmlc_dir): + raise ValueError(f'RTS-GMLC directory "{rts_gmlc_dir}" does not exist') + + # Create the skeleton + model_data = _create_rtsgmlc_skeleton(rts_gmlc_dir) + + # Save the data frequencies + metadata_df = _read_metadata(rts_gmlc_dir) + minutes_per_period = {'DAY_AHEAD':int(metadata_df.loc['Period_Resolution', 'DAY_AHEAD'])//60, + 'REAL_TIME':int(metadata_df.loc['Period_Resolution', 'REAL_TIME'])//60} + + data_start, data_end = _get_data_date_range(metadata_df) + + # TODO: Validate begin_time and end_time + + timeseries_df = _read_timeseries_data(model_data, rts_gmlc_dir, + begin_time, end_time, minutes_per_period) + + load_participation_factors = _compute_bus_load_participation_factors(model_data) + + return ParsedCache(model_data, begin_time, end_time, + minutes_per_period['DAY_AHEAD'], minutes_per_period['REAL_TIME'], + timeseries_df, load_participation_factors) + + +def _read_metadata(base_dir:str) -> pd.DataFrame: + metadata_path = os.path.join(base_dir, "simulation_objects.csv") + if not os.path.exists(metadata_path): + raise ValueError(f'RTS-GMLC directory "{rts_gmlc_dir}" does not contain expected CSV files.') + + # Read metadata about the data + metadata_df = pd.read_csv(metadata_path, index_col=0) + + return metadata_df + +def _get_data_date_range(metadata_df): + ''' Get the range of dates for which there is data available + ''' + import dateutil.parser + + # Data start time + row = metadata_df.loc['Date_From'] + data_start = max(dateutil.parser.parse(row['DAY_AHEAD']), + dateutil.parser.parse(row['REAL_TIME'])) + + # Data end time is a little more tricky + row = metadata_df.loc['Date_To'] + def _extract_end_date(which:str): + # The actual end date is the metadata's Date_To plus a number of look ahead periods. + # Each look ahead period is a specified number of seconds + extra_seconds = int(metadata_df.loc['Look_Ahead_Periods_per_Step'][which]) * \ + int(metadata_df.loc['Look_Ahead_Resolution'][which]) + end_date = dateutil.parser.parse(row[which]) + return end_date + timedelta(seconds=extra_seconds) + # Get the end date for each kind of data. Both kinds of data + # are available up through the later of the two simulation categories. + data_end = max(_extract_end_date('DAY_AHEAD'), + _extract_end_date('REAL_TIME')) + + return (data_start, data_end) + +def _read_timeseries_file(file_name:str, minutes_per_period:int, + start_time:datetime, end_time:datetime, + series_name:str) -> pd.DataFrame: + """ + Read data from a timeseries file, returning only the data that falls within the requested time range + + Parameters + ---------- + file_name:str + Path to a CVS file with timeseries data + minutes_per_period:int + The number of minutes between time periods in the data + start_time:datetime + The earliest time to include in the returned data + end_time:datetime + The first time to NOT include in the returned data + series_name:str + The name the column holding the resulting data + + Returns + ------- + df : A DataFrame with data between the specified dates + + Timeseries files can be in one of two formats, columnar or 2D. A columnar file can hold more + than one timeseries in the same file, with one row per time period and one column per series. + A 2D timeseries file holds only one timeseries, with one row per day and one column per time + period within the day. This function reads from either format. + + If the indicated file is columnar, the returned DataFrame will include all columns in the file, + one column per time series, with columns named as they appear in the file. + + If the indicated file is 2D, the returned DataFrame will have a single column whose name + is assigned to be the `series_name` passed into the function. + """ + # Determine which layout we're working with by checking column headers + headers = pd.read_csv(file_name, nrows=0).columns.tolist() + if headers[3] == 'Period': + return _read_columnar_timeseries_file(file_name, minutes_per_period, start_time, end_time) + else: + df = _read_2D_timeseries_file(file_name, minutes_per_period, start_time, end_time, series_name) + return df + +def _read_columnar_timeseries_file(file_name:str, minutes_per_period:int, + start_time:datetime, end_time:datetime) -> pd.DataFrame: + """ + Read data from a timeseries file, returning only the data that falls within the requested time range + + Parameters + ---------- + file_name:str + Path to a CVS file with timeseries data + minutes_per_period:int + The number of minutes between time periods in the data + start_time:datetime + The earliest time to include in the returned data + end_time:datetime + The first time to NOT include in the returned data + + Returns + ------- + df : A DataFrame with data between the specified dates + + The returned DataFrame converts the first 4 columns into a datetime which is used as + the DataFrame's index. All other CSV columns are included as columns in the DataFrame. + """ + _date_parser = lambda *columns: datetime(*map(int,columns[0:3])) + \ + timedelta(minutes = minutes_per_period*(int(columns[3])-1)) + df = pd.read_csv(file_name, + header=0, + parse_dates=[[0, 1, 2, 3]], + date_parser=_date_parser, + index_col=0) + df.index.names = ['DateTime'] + + df.sort_index(inplace=True) + + # Remove data outside requested time period. + # DataFrame slices include the end of the slice, so we need to reduce it slightly to + # avoid including an extra value at the end. + end_time = end_time - timedelta(seconds=1) + df = df[start_time:end_time] + + # Be sure to return a copy instead of a view into the larger data set + return df.copy() + +def _read_2D_timeseries_file(file_name:str, minutes_per_period:int, + start_time:datetime, end_time:datetime, + column_name:str) -> DataFrame: + """ + Read data from a timeseries file with a 2D layout, returning only the data that falls within the requested time range + + Parameters + ---------- + file_name:str + Path to a CVS file with reserve-formatted timeseries data + start_time:datetime + The earliest time to include in the returned data + end_time:datetime + The first time to NOT include in the returned data + minutes_per_period:int + The number of minutes between time periods in the data + column_name:str + The name that should be given to the DataFrame's column + + Returns + ------- + df : A single-column pandas DataFrame with data between the specified dates + + The returned DataFrame has one row per cell in the original CSV, indexed by the cell's + datetime. The first 3 columns indicate the date for all cells in the row, and other + column headers indicate the time within the day. Only data within the requested time + period is included in the results. Like a typical python range, the returned data includes + the start_time but does not include the end_time. + """ + _date_parser = lambda *columns: datetime(*map(int,columns[0:3])) + df = pd.read_csv(file_name, + header=0, + parse_dates=[[0, 1, 2]], + date_parser=_date_parser, + index_col=0) + df.sort_index(inplace=True) + + # Remove data outside requested time period. + # DataFrame slices include the end of the slice, so we need to reduce it slightly to + # avoid including an extra value at the end. + end_time = end_time - timedelta(seconds=1) + df = df[start_time:end_time] + + # Now divide rows into one row per column, resetting index to appropriate datetime + s = df.stack() + s.index = map(lambda i: i[0] + timedelta(minutes=minutes_per_period*(int(i[1])-1)), s.index) + s.index.names = ['DateTime'] + + # Filter one more time, trimming out times of day that fall before or after the requested range + if s.index[0] < start_time or s.index[-1] >= end_time: + s = s[start_time:end_time] + + # Create and return a new 1-column DataFrame + return pd.DataFrame({column_name: s}) + +def _create_rtsgmlc_skeleton(rts_gmlc_dir): + """ + Creates a data dictionary from the RTS-GMLC data files, without loading hourly data + + Parameters + ---------- + rts_gmlc_dir : str + Path to RTS-GMLC directory + + Returns + ------- + data : dict + Returns a dict loaded from the RTS-GMLC data + """ + + base_dir = rts_gmlc_dir + + model_data = md.ModelData.empty_model_data_dict() + + elements = model_data["elements"] + system = model_data["system"] + + system["name"] = "RTS-GMLC" + + # this is the default used in the MATPOWER writer for RTS-GMLC + system["baseMVA"] = 100. + + elements["bus"] = {} + elements["load"] = {} + elements["shunt"] = {} + + # add the buses + bus_types = {'PQ': 'PQ', + 'PV': 'PV', + 'Ref': 'ref'} + bus_id_to_name = {} + bus_areas = set() + bus_df = pd.read_csv(os.path.join(base_dir,'bus.csv')) + for idx,row in bus_df.iterrows(): + BUS_TYPE = row['Bus Type'] + if not BUS_TYPE in bus_types: + raise ValueError(f'Encountered an unsupported bus type: "{BUS_TYPE}" when parsing RTS-GMLC input file') + + bus_name = str(row['Bus Name']) + bus_dict = { + "id": str(row['Bus ID']), + "base_kv": float(row['BaseKV']), + "matpower_bustype": bus_types[BUS_TYPE], + "vm": float(row['V Mag']), + "va": float(row['V Angle']), + "v_min": 0.95, + "v_max": 1.05, + "area": str(row['Area']), + "zone": str(int(row['Zone'])), + } + + if bus_dict["base_kv"] <= 0: + raise ValueError(f'BaseKV value for bus "{bus_name}" is <= 0. Not supported.') + + PD = float(row['MW Load']) + QD = float(row['MVAR Load']) + if PD != 0 or QD != 0: + load_dict = { + "bus": bus_name, + "in_service":True, + "p_load": PD, + "q_load": QD, + "area": bus_dict['area'], + "zone": bus_dict['zone'] + } + elements["load"][bus_name] = load_dict + + GS = float(row['MW Shunt G']) + BS = float(row['MVAR Shunt B']) + if GS != 0 or BS != 0: + shunt_dict = { + "shunt_type":"fixed", + "bus": bus_name, + "gs": GS, + "bs": BS + } + elements["shunt"][bus_name] = shunt_dict + + if BUS_TYPE == 'Ref': + va = bus_dict['va'] + if va != 0: + if abs(va) >= 1e-16: + raise ValueError('EGRET only supports reference buses with an angle of 0 degrees.') + msg = "\nEgret only supports reference buses with an angle of 0 degrees. \nFound a " \ + "reference bus with an angle close to 0. \n" \ + "Value: {va}\nSetting reference bus angle to 0." + warnings.warn(msg) + bus_dict['va'] = 0.0 + system["reference_bus"] = bus_name + system["reference_bus_angle"] = 0 + + bus_id_to_name[bus_dict['id']] = bus_name + bus_areas.add(bus_dict['area']) + elements['bus'][bus_name] = bus_dict + + # add the areas + elements['area'] = {name:dict() for name in bus_areas} + + # add the branches + elements["branch"] = {} + branch_df = pd.read_csv(os.path.join(base_dir,'branch.csv')) + for idx,row in branch_df.iterrows(): + + branch_dict = { + "from_bus": bus_id_to_name[str(row['From Bus'])], + "to_bus": bus_id_to_name[str(row['To Bus'])], + "in_service": True, + "resistance": float(row['R']), + "reactance": float(row['X']), + "charging_susceptance": float(row['B']), + "rating_long_term": float(row['Cont Rating']) or None, + "rating_short_term": float(row['LTE Rating']) or None, + "rating_emergency": float(row['STE Rating']) or None, + "angle_diff_min": -90, + "angle_diff_max": 90, + "pf": None, + "qf": None, + "pt": None, + "qt": None + } + + TAP = float(row['Tr Ratio']) + if TAP != 0.0: + branch_dict["branch_type"] = "transformer" + branch_dict["transformer_tap_ratio"] = TAP + branch_dict["transformer_phase_shift"] = 0.0 + else: + branch_dict["branch_type"] = "line" + + name = str(row['UID']) + elements["branch"][name] = branch_dict + + # add the DC branches + if os.path.exists(os.path.join(base_dir,'dc_branch.csv')): + branch_df = pd.read_csv(os.path.join(base_dir,'dc_branch.csv')) + for idx,row in branch_df.iterrows(): + + # TODO: I have no idea what field names Egrets expects or supports for DC branches. + # The code below is just a placeholder. + branch_dict = { + "from_bus": bus_id_to_name[str(row['From Bus'])], + "to_bus": bus_id_to_name[str(row['To Bus'])], + "in_service": True, + "branch_type": "dc", + "resistance": float(row['R Line']) + } + + name = str(row['UID']) + elements["branch"][name] = branch_dict + + # add the generators + elements["generator"] = {} + RENEWABLE_TYPES = {'WIND', 'HYDRO', 'RTPV', 'PV'} + gen_df = pd.read_csv(os.path.join(base_dir,'gen.csv')) + for idx,row in gen_df.iterrows(): + # if this is storage we need to handle it differently + if row['Fuel'] == 'Storage': + continue + + # NOTE: for now, Egret doesn't handle CSP -- not clear how to model + if row['Unit Type'] == 'CSP': + continue + + name = str(row['GEN UID']) + bus_name = bus_id_to_name[str(row['Bus ID'])] + gen_dict = { + "bus": bus_name, + "in_service": True, + "mbase": 100.0, + "pg": float(row['MW Inj']), + "qg": float(row['MVAR Inj']), + "vg": float(row['V Setpoint p.u.']), + "p_min": float(row['PMin MW']), + "p_max": float(row['PMax MW']), + "q_min": float(row['QMin MVAR']), + "q_max": float(row['QMax MVAR']), + "ramp_q": float(row['Ramp Rate MW/Min']), + "fuel": str(row['Fuel']), + "unit_type": str(row['Unit Type']), + "area": elements['bus'][bus_name]['area'], + "zone": elements['bus'][bus_name]['zone'] + } + + UNIT_TYPE = str(row['Unit Type']) + if UNIT_TYPE in RENEWABLE_TYPES: + gen_dict["generator_type"] = "renewable" + elif UNIT_TYPE == 'SYNC_COND': + ## TODO: should we have a flag for these? + gen_dict["generator_type"] = "thermal" + else: + gen_dict["generator_type"] = "thermal" + + elements["generator"][name] = gen_dict + + # after this is only really needed for thermal units + if UNIT_TYPE in RENEWABLE_TYPES: + continue + + # Gen cost + ## round as in RTS-GMLC Prescient/topysp.py + pmax = float(row['PMax MW']) + x = {i: round(float(row[f'Output_pct_{i}'])*pmax, 1) + for i in range(4) + } + + ## /1000. from the RTS-GMLC MATPOWER writer -- + ## heat rates are in BTU/kWh, 1BTU == 10^-6 MMBTU, 1kWh == 10^-3 MWh, so MMBTU/MWh == 10^3/10^6 * BTU/kWh + f = {} + f[0] = (float(row['HR_avg_0'])*1000./ 1000000.)*x[0] + for i in range(1,4): + f[i] = (((x[i]-x[i-1])*(float(row[f'HR_incr_{i}'])*1000. / 1000000.))) + f[i-1] + + fuel_price = float(row['Fuel Price $/MMBTU']) + y = {i: fuel_price*f[i] for i in range(4)} + + # only include the cost coeffecients that matter + P_COEFF = [ (x[i], round(y[i],2)) for i in range(4) if (((i == 0) or (x[i-1],y[i-1]) != (x[i], y[i])) and (x[i], y[i]) != (0.,0.)) ] + if P_COEFF == []: + P_COEFF = [(pmax, 0.0)] + + F_COEFF = [ (x[i], round(f[i],2)) for i in range(4) if (((i == 0) or (x[i-1],f[i-1]) != (x[i], f[i])) and (x[i], f[i]) != (0.,0.)) ] + if F_COEFF == []: + F_COEFF = [(pmax, 0.0)] + + # UC Data + MIN_DN_TIME = float(row['Min Down Time Hr']) + + # Startup types and costs + COLD_HEAT = float(row['Start Heat Cold MBTU']) + WARM_HEAT = float(row['Start Heat Warm MBTU']) + HOT_HEAT = float(row['Start Heat Hot MBTU']) + + COLD_TIME = float(row['Start Time Cold Hr']) + WARM_TIME = float(row['Start Time Warm Hr']) + HOT_TIME = float(row['Start Time Hot Hr']) + + FIXED_START_COST = float(row['Non Fuel Start Cost $']) + + if (COLD_TIME <= MIN_DN_TIME) or (COLD_TIME == WARM_TIME == HOT_TIME): + STARTUP_COSTS = [(MIN_DN_TIME, round(COLD_HEAT*fuel_price + FIXED_START_COST, 2))] + STARTUP_FUEL = [(MIN_DN_TIME, COLD_HEAT)] + + elif WARM_TIME <= MIN_DN_TIME: + STARTUP_COSTS = [(MIN_DN_TIME, round(WARM_HEAT*fuel_price + FIXED_START_COST, 2)),\ + (COLD_TIME, round(COLD_HEAT*fuel_price + FIXED_START_COST, 2))] + STARTUP_FUEL = [(MIN_DN_TIME, WARM_HEAT),\ + (COLD_TIME, COLD_HEAT)] + + else: + STARTUP_COSTS = [(MIN_DN_TIME, round(HOT_HEAT*fuel_price+FIXED_START_COST,2)),\ + (WARM_TIME, round(WARM_HEAT*fuel_price+FIXED_START_COST,2)),\ + (COLD_TIME, round(COLD_HEAT*fuel_price+FIXED_START_COST,2))] + STARTUP_FUEL = [(MIN_DN_TIME, HOT_HEAT),\ + (WARM_TIME, WARM_HEAT),\ + (COLD_TIME, COLD_HEAT)] + gen_dict["startup_cost"] = STARTUP_COSTS + gen_dict["startup_fuel"] = STARTUP_FUEL + gen_dict["shutdown_cost"] = 0.0 + + gen_dict["pc1"] = 0.0 + gen_dict["pc2"] = 0.0 + gen_dict["qc1_min"] = 0.0 + gen_dict["qc1_max"] = 0.0 + gen_dict["qc2_min"] = 0.0 + gen_dict["qc2_max"] = 0.0 + gen_dict["agc_capable"] = True + gen_dict["p_min_agc"] = gen_dict["p_min"] + gen_dict["p_max_agc"] = gen_dict["p_max"] + + ramp_q = gen_dict['ramp_q'] + gen_dict["ramp_agc"] = ramp_q + gen_dict["ramp_10"] = 10.*ramp_q + gen_dict["ramp_30"] = 30.*ramp_q + gen_dict["ramp_up_60min"] = 60.*ramp_q + gen_dict["ramp_down_60min"] = 60.*ramp_q + + gen_dict["power_factor"] = 0.0 + gen_dict["p_cost"] = {"data_type": "cost_curve", "cost_curve_type":"piecewise", "values": P_COEFF } + gen_dict["p_fuel"] = {"data_type": "fuel_curve", "values": F_COEFF } + gen_dict["fuel_cost"] = fuel_price + + # these assumptions are the same as prescient-rtsgmlc + gen_dict["startup_capacity"] = gen_dict['p_min'] + gen_dict["shutdown_capacity"] = gen_dict['p_min'] + gen_dict["min_up_time"] = float(row['Min Up Time Hr']) + gen_dict["min_down_time"] = MIN_DN_TIME + gen_dict["must_run"] = False + + elements["generator"][name] = gen_dict + + return model_data + +def _compute_bus_load_participation_factors(model_data): + ''' + compute aggregate load per area, and then compute + load participation factors from each bus from that data. + + Returns + ======= + participation_factors:dict[str,float] + Maps bus name to the fraction of its area load that it carries (0 to 1) + ''' + elements = model_data['elements'] + + # Sum the loads for each area + area_total_load = {area:0 for area in elements['area']} + for name, load in elements["load"].items(): + area = elements["bus"][load["bus"]]["area"] + area_total_load[area] += load["p_load"] + + bus_load_participation_factors = {} + for name, load in elements["load"].items(): + area = elements["bus"][load["bus"]]["area"] + bus_load_participation_factors[name] = load["p_load"] / area_total_load[area] + + return bus_load_participation_factors + +def _read_timeseries_data(model_data:dict, rts_gmlc_dir:str, + start_time:datetime, end_time:datetime, + minutes_per_period:Dict[str,int]): + """ + Parse all relevant timeseries files + + Returns + ======= + all_timeseries: DataFrame + A DataFrame with the following columns: + [Simulation, Category, Object, Parameter, Scaling Factor, Series] + + The Series column holds the data as a pandas series, indexed by the datetime + of the value. + + """ + # Where we'll keep our results + timeseries_data = {'DAY_AHEAD':{}, 'REAL_TIME':{}} + + # All timeseries data that has already been read (map[filename] -> DataFrame) + timeseries_file_map = {} + + timeseries_pointer_df = pd.read_csv(os.path.join(rts_gmlc_dir, "timeseries_pointers.csv"), header=0) + + elements = model_data['elements'] + params_of_interest = { + 'Generator': { 'PMin MW', 'PMax MW'}, + 'Reserve': {'Requirement'}, + 'Area': {'MW Load'} + } + + # Add a column to timeseries DF to reference the parsed data + # instead of the file name + timeseries_pointer_df['Series'] = None + + # Store the timeseries data in the timeseries DF + for idx,row in timeseries_pointer_df.iterrows(): + # Skip rows we don't ingest + if not row['Category'] in params_of_interest: + continue + if not row['Parameter'] in params_of_interest[row['Category']]: + continue + + # Skip generators not in skeleton + if row['Category'] == 'Generator' and not row['Object'] in elements['generator']: + continue + # Skip areas not in skeleton + if row['Category'] == 'Area' and not row['Object'] in elements['area']: + continue + + is_reserve = (row['Category'] == 'Reserve') + if is_reserve: + # Skip unrecognized reserve names + name = str(row['Object']) + if not is_valid_reserve_name(name, model_data): + continue + + # Read the timeseries file if we haven't already, using the + # canonical file path as a key into previously read filenames. + fname = os.path.abspath(os.path.join(rts_gmlc_dir, row['Data File'])) + if not fname in timeseries_file_map: + sim = row['Simulation'] + data = _read_timeseries_file(fname, minutes_per_period[sim], + start_time, end_time, row['Object']) + timeseries_file_map[fname] = data + + # Save a reference to the relevant data as a Series + scaling_factor = float(row['Scaling Factor']) + timeseries_pointer_df.at[idx,'Series'] = timeseries_file_map[fname][row['Object']]*scaling_factor + + # Remove the file path from the DF + timeseries_pointer_df.pop('Data File') + + # Remove irrelevant rows + timeseries_pointer_df.dropna(subset=['Series'], inplace=True) + + # Sort by simulation + timeseries_pointer_df.sort_values(by='Simulation', inplace=True) + + return timeseries_pointer_df + +def _get_datetimes_from_strings(begin_time:str, end_time:str): + + datetime_format = "%Y-%m-%d %H:%M:%S" + + datestr = "YYYY-DD-MM" + midnight = " 00:00:00" + + if isinstance(begin_time,datetime): + pass + elif isinstance(begin_time,str): + if len(begin_time) == len(datestr): + begin_time += midnight + begin_time = datetime.strptime(begin_time,datetime_format) + else: + raise ValueError("Unable to parse begin_time") + + if isinstance(end_time,datetime): + pass + elif isinstance(end_time,str): + if len(end_time) == len(datestr): + end_time += midnight + end_time = datetime.strptime(end_time,datetime_format) + else: + raise ValueError("Unable to parse end_time") + + return begin_time, end_time diff --git a/egret/parsers/rts_gmlc_parser.py b/egret/parsers/rts_gmlc_parser.py deleted file mode 100644 index 664193af..00000000 --- a/egret/parsers/rts_gmlc_parser.py +++ /dev/null @@ -1,745 +0,0 @@ -# ___________________________________________________________________________ -# -# EGRET: Electrical Grid Research and Engineering Tools -# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC -# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. -# Government retains certain rights in this software. -# This software is distributed under the Revised BSD License. -# ___________________________________________________________________________ - -""" -This module provides supporting functions for interacting with the RTS-GMLC data - -It includes methods to parse the RTS-GMLC data and load them into a TemporalGridNetwork object - -""" - -import os.path -import egret.data.model_data as md -import pandas as pd -import math -from datetime import datetime, timedelta -from collections import namedtuple - - -def create_ModelData(rts_gmlc_dir, begin_time, end_time, simulation="DAY_AHEAD", t0_state = None): - - """ - Create a ModelData object from the RTS-GMLC data. - - Parameters - ---------- - rts_gmlc_dir : str - Path to RTS-GMLC directory - begin_time : datetime.datetime or str - Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - end_time : datetime.datetime or str - End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - simulation : str - Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, - default is "DAY_AHEAD". - t0_state : dict or Nonetype - Keys of this dict are thermal generator names, each element of which is another dictionary with - keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the - generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, default values are loaded. - - Returns - ------- - egret.model_data.ModelData - Returns a ModelData object with the timeseries data specified - """ - return md.ModelData(create_model_data_dict(rts_gmlc_dir, begin_time, end_time, simulation, t0_state)) - -def create_model_data_dict(rts_gmlc_dir, begin_time, end_time, simulation="DAY_AHEAD", t0_state = None): - - """ - Create a model_data dictionary from the RTS-GMLC data. - - Parameters - ---------- - rts_gmlc_dir : str - Path to RTS-GMLC directory - begin_time : datetime.datetime or str - Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - end_time : datetime.datetime or str - End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - simulation : str - Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, - default is "DAY_AHEAD". - t0_state : dict or Nonetype - Keys of this dict are thermal generator names, each element of which is another dictionary with - keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the - generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, default values are loaded. - - Returns - ------- - dict : A dictionary in the format required for the ModelData object. - """ - - simulation = simulation.upper() - if simulation not in ["DAY_AHEAD", "REAL_TIME"]: - raise ValueError('simulation must be "DAY_AHEAD" or "REAL_TIME"') - - base_dir = os.path.join(rts_gmlc_dir,'RTS_Data','SourceData') - - begin_time, end_time = _get_datetimes(begin_time, end_time) - - - TimeSeriesPointer = namedtuple('TimeSeriesPointer', - ['Object', - 'Simulation', - 'Parameter', - 'DataFile']) - - DateTimeValue = namedtuple('DateTimeValue', - ['DateTime', 'Value']) - - Load = namedtuple('Load', - ['DateTime', - 'Area1', - 'Area2', - 'Area3']) - - timeseries_pointer_df = pd.read_csv(os.path.join(base_dir, "timeseries_pointers.csv"), header=0, sep=',') - - time_delta = end_time - begin_time - - hours = 24*time_delta.days + math.ceil(time_delta.seconds/3600.) - - model_data = _create_rtsgmlc_skeleton(rts_gmlc_dir) - - ## create an object for easy iterating - md_obj = md.ModelData(model_data) - - system = md_obj.data["system"] - elements = md_obj.data["elements"] - - if simulation == "DAY_AHEAD": - system["time_period_length_minutes"] = 60 - else: - system["time_period_length_minutes"] = 5 - - # compute aggregate load per area, and then compute - # load participation factors from each bus from that data. - region_total_load = {} - areas = ["Area"+str(i) for i in range(1,4)] - for this_region in areas: - this_region_total_load = 0.0 - ## loads have exactly one bus - for name, load in md_obj.elements("load"): - bus = elements["bus"][load["bus"]] - if bus["area"] == this_region: - this_region_total_load += load["p_load"] - region_total_load[this_region] = this_region_total_load - - bus_load_participation_factor_dict = {} - bus_Ql_over_Pl_dict = {} - for name, load in md_obj.elements("load"): - bus = elements["bus"][load["bus"]] - bus_load_participation_factor_dict[name] = load["p_load"] / region_total_load[bus["area"]] - bus_Ql_over_Pl_dict[name] = load["q_load"] / load["p_load"] - - timeseries_pointer_dict = {} - for timeseries_pointer_index in timeseries_pointer_df.index.tolist(): - this_timeseries_pointer_dict = timeseries_pointer_df.loc[timeseries_pointer_index].to_dict() - new_timeseries_pointer = TimeSeriesPointer(this_timeseries_pointer_dict["Object"], - this_timeseries_pointer_dict["Simulation"], - this_timeseries_pointer_dict["Parameter"], - os.path.join(base_dir, this_timeseries_pointer_dict["Data File"])) - - timeseries_pointer_dict[(new_timeseries_pointer.Object, new_timeseries_pointer.Simulation)] = new_timeseries_pointer - - filtered_timeseries = {} - for name, gen in md_obj.elements("generator", generator_type="renewable"): - if gen["fuel"] in ["Solar", "Wind", "Hydro"]: - if (name, simulation) not in timeseries_pointer_dict: - print("***WARNING - No timeseries pointer entry found for generator=%s" % name) - - else: - #print("Time series for generator=%s will be loaded from file=%s" % (name, timeseries_pointer_dict[(name,"DAY_AHEAD")].DataFile)) - renewables_timeseries_df = _read_rts_gmlc_table(timeseries_pointer_dict[(name,simulation)].DataFile, simulation) - this_source_timeseries_df = renewables_timeseries_df.loc[:,["Year_Month_Day_Period", name]] - this_source_timeseries_df = this_source_timeseries_df.rename(columns = {"Year_Month_Day_Period" : "DateTime"}) - - start_mask = this_source_timeseries_df["DateTime"] >= begin_time - end_mask = this_source_timeseries_df["DateTime"] < end_time - this_source_masked_timeseries_df = this_source_timeseries_df[start_mask & end_mask] - - renewables_timeseries_dict = this_source_masked_timeseries_df.to_dict(orient='split') - renewables_timeseries = [] - for this_row in renewables_timeseries_dict["data"]: - renewables_timeseries.append(DateTimeValue(this_row[0], - float(this_row[1]))) - filtered_timeseries[name] = renewables_timeseries - - load_timeseries_spec = timeseries_pointer_dict[("Load",simulation)] - ## FIX issue with RTS-GMLC - if simulation == "REAL_TIME" and load_timeseries_spec.DataFile == os.path.join(base_dir,'..','timeseries_data_files', 'Load', 'REAL_TIME_regional_Load.csv'): - load_timeseries_spec = TimeSeriesPointer(load_timeseries_spec.Object, load_timeseries_spec.Simulation, load_timeseries_spec.Parameter, - os.path.join(base_dir, '..','timeseries_data_files','Load','REAL_TIME_regional_load.csv')) - load_timeseries_df = _read_rts_gmlc_table(load_timeseries_spec.DataFile, simulation) - load_timeseries_df = load_timeseries_df.rename(columns = {"Year_Month_Day_Period" : "DateTime"}) - start_mask = load_timeseries_df["DateTime"] >= begin_time - end_mask = load_timeseries_df["DateTime"] < end_time - masked_load_timeseries_df = load_timeseries_df[start_mask & end_mask] - load_dict = masked_load_timeseries_df.to_dict(orient='split') - load_timeseries = [] - for load_row in load_dict["data"]: - load_timeseries.append(Load(load_row[0], - float(load_row[1]), - float(load_row[2]), - float(load_row[3]))) - - reserves_dfs = {} - spin_reserve_categories = ["Spin_Up_R1", "Spin_Up_R2", "Spin_Up_R3"] - - other_reserve_categories = ["Reg_Down", "Reg_Up",] - ## flexiramp products only in day-ahead simulation - if simulation == "DAY_AHEAD": - other_reserve_categories += ["Flex_Down", "Flex_Up",] - - for reserve in spin_reserve_categories: - reserves_dfs[reserve] = _read_rts_gmlc_table(timeseries_pointer_dict[(reserve, simulation)].DataFile, simulation) - - - reserves_dict = {} - for name, reserve_df in reserves_dfs.items(): - reserve_df = reserve_df.rename(columns = {"Year_Month_Day_Period" : "DateTime"}) - start_mask = reserve_df["DateTime"] >= begin_time - end_mask = reserve_df["DateTime"] < end_time - reserve_df = reserve_df[start_mask & end_mask] - reserve_timeseries = [] - for this_row in reserve_df.to_dict(orient='split')["data"]: - reserve_timeseries.append(DateTimeValue(this_row[0], float(this_row[1]))) - reserves_dict[name] = reserve_timeseries - - for reserve in other_reserve_categories: - reserves_dict[reserve] = _read_rts_gmlc_reserve_table( - timeseries_pointer_dict[(reserve, simulation)].DataFile, - begin_time, - end_time, - simulation, - ) - - - times = [] - for load in load_timeseries: - times.append(str(load.DateTime)) - - system["time_keys"] = times - - _times_to_idx_map = { t : i for i,t in enumerate(times) } - - ## load into grid_network object - ## First, load Pl, Ql - for name, load in md_obj.elements("load"): - ## preallocate list - pl_list, ql_list = _preallocated_list(load_timeseries), _preallocated_list(load_timeseries) - bus = elements["bus"][load["bus"]] - for i, load_time in enumerate(load_timeseries): - area_load = getattr(load_time,bus["area"]) - pl_list[i] = round(bus_load_participation_factor_dict[name]*area_load,2) - ql_list[i] = pl_list[i]*bus_Ql_over_Pl_dict[name] - load["p_load"] = _make_time_series_dict(pl_list) - load["q_load"] = _make_time_series_dict(ql_list) - - ## load in area reserve factors - area_spin_map = {'Area1':'Spin_Up_R1', 'Area2':'Spin_Up_R2', 'Area3':'Spin_Up_R3'} - for name, area in md_obj.elements("area"): - spin_reserve_list = _preallocated_list(times) - for datetimevalue in reserves_dict[area_spin_map[name]]: - spin_reserve_list[_times_to_idx_map[str(datetimevalue.DateTime)]] = round(datetimevalue.Value,2) - area["spinning_reserve_requirement"] = _make_time_series_dict(spin_reserve_list) - - ## load in global reserve factors - rts_to_egret_reserve_map = { - "Flex_Down": "flexible_ramp_down_requirement", - "Flex_Up" : "flexible_ramp_up_requirement", - "Reg_Down": "regulation_down_requirement", - "Reg_Up" : "regulation_up_requirement", - } - for reserve in other_reserve_categories: - reserves_list = [ reserves_dict[reserve][t] for t in times ] - system[rts_to_egret_reserve_map[reserve]] = _make_time_series_dict(reserves_list) - - ## now load renewable generator stuff - for name, gen in md_obj.elements("generator", generator_type="renewable"): - if gen["fuel"] not in ["Solar", "Wind", "Hydro"]: - continue - renewables_timeseries = filtered_timeseries[name] - ## for safety, curtailable renewables can go down to 0 - gen["p_min"] = 0. - output_list = _preallocated_list(times) - for datetimevalue in renewables_timeseries: - output_list[_times_to_idx_map[str(datetimevalue.DateTime)]] = round(datetimevalue.Value,2) - - gen["p_max"] = _make_time_series_dict(output_list) - # set must-take for Hydro and RTPV - if gen["unit_type"] in ["HYDRO", "RTPV"]: - ## copy is for safety when overwriting - gen["p_min"] = _make_time_series_dict(output_list.copy()) - - - ## get this from the same place the prescient reader does - if t0_state is None: - unit_on_time_df = pd.read_csv(os.path.join(base_dir ,'..','FormattedData','PLEXOS','PLEXOS_Solution','DAY_AHEAD Solution Files','noTX','on_time_7.12.csv'), - header=0, - sep=",") - unit_on_time_df_as_dict = unit_on_time_df.to_dict(orient="split") - unit_on_t0_state_dict = {} - for i in range(0,len(unit_on_time_df_as_dict["columns"])): - gen_id = unit_on_time_df_as_dict["columns"][i] - unit_on_t0_state_dict[gen_id] = int(unit_on_time_df_as_dict["data"][0][i]) - - for name, gen in md_obj.elements("generator", generator_type="thermal"): - gen["initial_status"] = unit_on_t0_state_dict[name] - if gen["initial_status"] < 0: - gen["initial_p_output"] = 0. - gen["initial_q_output"] = 0. - else: - gen["initial_p_output"] = gen["p_min"] - gen["initial_q_output"] = max(0., gen["q_min"]) - - else: - for name, gen in md_obj.elements("generator", generator_type="thermal"): - gen["initial_status"] = t0_state[name]["initial_status"] - gen["initial_p_output"] = t0_state[name]["initial_p_output"] - gen["initial_q_output"] = t0_state[name]["initial_q_output"] - - return md_obj.data - -def _create_rtsgmlc_skeleton(rts_gmlc_dir): - """ - Creates a grid_data dictionary from the RTS-GMLC data, - but does not load hourly data - - Parameters - ---------- - rts_gmlc_dir : str - Path to RTS-GMLC directory - - Returns - ------- - grid_data : dict - Returns a dict loaded from the RTS-GMLC data - """ - - base_dir = os.path.join(rts_gmlc_dir,'RTS_Data', 'SourceData') - - case_name = "RTS-GMLC" - - model_data = md.ModelData.empty_model_data_dict() - - elements = model_data["elements"] - system = model_data["system"] - - system["name"] = case_name - - # this is the default used in the MATPOWER writer for RTS-GMLC - system["baseMVA"] = 100. - - elements["bus"] = {} - elements["load"] = {} - elements["shunt"] = {} - - # add the buses - bus_df = pd.read_csv(os.path.join(base_dir,'bus.csv')) - for idx,row in bus_df.iterrows(): - BUS_I = str(row['Bus ID']) - if row['Bus Type'] == 'PQ': - BUS_TYPE = 1 - elif row['Bus Type'] == 'PV': - BUS_TYPE = 2 - elif row['Bus Type'] == 'Ref': - BUS_TYPE = 3 - else: - BUS_TYPE = 4 - - PD = float(row['MW Load']) - QD = float(row['MVAR Load']) - GS = float(row['MW Shunt G']) - BS = float(row['MVAR Shunt B']) - BUS_AREA = str(row['Area']) - VM = float(row['V Mag']) - VA = float(row['V Angle']) - BASE_KV = float(row['BaseKV']) - ZONE = str(int(row['Zone'])) - VMAX = 1.05 #default used in RTS-GMLC MATPOWER writer - VMIN = 0.95 #default used in RTS-GMLC MATPOWER writer - - bus_dict = dict() - - if BUS_TYPE < 1 or BUS_TYPE > 3: - raise ValueError("Encountered an unsupported bus type: {} when parsing MATPOWER input file".format(BUS_TYPE)) - - bus_types = {1: "PQ", 2: "PV", 3: "ref", 4: "isolated"} - bus_dict["matpower_bustype"] = bus_types[BUS_TYPE] - - if BUS_TYPE == 3: - if VA != 0: - if abs(VA) >= 1e-16: - raise ValueError('EGRET only supports reference buses with an angle of 0 degrees.') - msg = "\nEgret only supports reference buses with an angle of 0 degrees. \nFound a " \ - "reference bus with an angle close to 0. \n" \ - "Value: {0}".format(VA) + "\nSetting reference bus angle to 0." - warnings.warn(msg) - system["reference_bus"] = BUS_I - system["reference_bus_angle"] = VA - - if PD != 0 or QD != 0: - load_dict = {"bus": BUS_I, "in_service":True} - load_dict["p_load"] = PD - load_dict["q_load"] = QD - load_dict["area"] = "Area"+BUS_AREA - load_dict["zone"] = ZONE - elements["load"]['load_'+BUS_I] = load_dict - - if GS != 0 or BS != 0: - shunt_dict = {"shunt_type":"fixed", "bus": BUS_I} - shunt_dict["gs"] = GS - shunt_dict["bs"] = BS - elements["shunt"]['shunt_'+BUS_I] = shunt_dict - - bus_dict["vm"] = VM - bus_dict["va"] = VA - if BASE_KV > 0: - bus_dict["base_kv"] = BASE_KV - else: - raise ValueError('BASE_KV value found that is <= 0. Not supported at this time.') - - bus_dict["area"] = "Area"+BUS_AREA - bus_dict["zone"] = ZONE - bus_dict["v_min"] = VMIN - bus_dict["v_max"] = VMAX - bus_dict["id"] = row['Bus Name'] - - elements["bus"][BUS_I] = bus_dict - - # add the areas - area_names = ['Area1', 'Area2', 'Area3'] - elements["area"] = {} - for name in area_names: - ## TODO: what else should be in here? - elements["area"][name] = dict() - - - elements["branch"] = {} - # add the branches - branch_df = pd.read_csv(os.path.join(base_dir,'branch.csv')) - for idx,row in branch_df.iterrows(): - name = str(row['UID']) - F_BUS = str(row['From Bus']) - T_BUS = str(row['To Bus']) - BR_R = float(row['R']) - BR_X = float(row['X']) - BR_B = float(row['B']) - RATE_A = float(row['Cont Rating']) - RATE_B = float(row['Cont Rating']) - RATE_C = float(row['Cont Rating']) - if RATE_A == 0: - RATE_A = None - if RATE_B == 0: - RATE_B = None - if RATE_C == 0: - RATE_C = None - TAP = float(row['Tr Ratio']) - SHIFT = 0.0 # these hard-coded values are the defaults - BR_STATUS = 1 #from the RTS-GMLC MATPOWER writer - ANGMIN = -90. - ANGMAX = 90. - PF = None # these values are not given - QF = None - PT = None - QT = None - - branch_dict = {"from_bus": F_BUS, "to_bus": T_BUS} - branch_dict["resistance"] = BR_R - branch_dict["reactance"] = BR_X - branch_dict["charging_susceptance"] = BR_B - - if TAP != 0.0: - branch_dict["transformer_tap_ratio"] = TAP - branch_dict["transformer_phase_shift"] = SHIFT - branch_dict["branch_type"] = "transformer" - else: - branch_dict["branch_type"] = "line" - - branch_dict["rating_long_term"] = RATE_A - branch_dict["rating_short_term"] = RATE_B - branch_dict["rating_emergency"] = RATE_C - branch_dict["angle_diff_min"] = ANGMIN - branch_dict["angle_diff_max"] = ANGMAX - assert(BR_STATUS == 0 or BR_STATUS == 1) - if BR_STATUS == 1: - branch_dict["in_service"] = True - else: - branch_dict["in_service"] = False - branch_dict["pf"] = PF - branch_dict["qf"] = QF - branch_dict["pt"] = PT - branch_dict["qt"] = QT - - elements["branch"][name] = branch_dict - - # add the generators - elements["generator"] = {} - RENEWABLE_TYPES = ['WIND', 'HYDRO', 'RTPV', 'PV'] - gen_df = pd.read_csv(os.path.join(base_dir,'gen.csv')) - for idx,row in gen_df.iterrows(): - name = str(row['GEN UID']) - GEN_BUS = str(row['Bus ID']) - gen_dict = {"bus":GEN_BUS} - - # if this is a renewable, hydro, or storage need to handle differently - # (hydro schedules in RTS-GMLC are fixed) - if row['Fuel'] in ['Storage']: - pass - else: - # NOTE: for now, prescient doesn't handle CSP -- not clear how to model - if row['Unit Type'] == 'CSP': - continue - ## (mostly) MATPOWER data - PG = float(row['MW Inj']) - QG = float(row['MVAR Inj']) - QMAX = float(row['QMax MVAR']) - QMIN = float(row['QMin MVAR']) - RAMP_Q = 1.*float(row['Ramp Rate MW/Min']) - VG = float(row['V Setpoint p.u.']) - MBASE = 100. #set in RTS-GMLC MATPOWER writer - GEN_STATUS = 1 - PMAX = float(row['PMax MW']) - PMIN = float(row['PMin MW']) - FUEL = str(row['Fuel']) - UNIT_TYPE = str(row['Unit Type']) - - if UNIT_TYPE in RENEWABLE_TYPES: - gen_dict["generator_type"] = "renewable" - elif UNIT_TYPE == 'SYNC_COND': - ## TODO: should we have a flag for these? - gen_dict["generator_type"] = "thermal" - else: - gen_dict["generator_type"] = "thermal" - gen_dict["bus"] = GEN_BUS - gen_dict["mbase"] = MBASE - gen_dict["in_service"] = True - gen_dict["pg"] = PG - gen_dict["qg"] = QG - gen_dict["vg"] = VG - gen_dict["p_min"] = PMIN - gen_dict["p_max"] = PMAX - gen_dict["q_min"] = QMIN - gen_dict["q_max"] = QMAX - gen_dict["ramp_q"] = RAMP_Q - gen_dict["fuel"] = FUEL - gen_dict["unit_type"] = UNIT_TYPE - gen_dict["area"] = elements["bus"][gen_dict["bus"]]["area"] - gen_dict["zone"] = elements["bus"][gen_dict["bus"]]["zone"] - - # after this is only really needed for thermal units - if UNIT_TYPE in RENEWABLE_TYPES: - elements["generator"][name] = gen_dict - continue - - - PC1 = 0.0 - PC2 = 0.0 - QC1MIN = 0.0 - QC1MAX = 0.0 - QC2MIN = 0.0 - QC2MAX = 0.0 - RAMP_AGC = 1.*float(row['Ramp Rate MW/Min']) - RAMP_10 = 10.*float(row['Ramp Rate MW/Min']) - RAMP_30 = 30.*float(row['Ramp Rate MW/Min']) - RAMP_UP_60 = 60.*float(row['Ramp Rate MW/Min']) - RAMP_DN_60 = 60.*float(row['Ramp Rate MW/Min']) - APF = 0.0 # 0.0 from RTS-GMLC MATPOWER writer - - - # Gen cost - x = {} - ## round as in RTS-GMLC Prescient/topysp.py - x[0] = round(float(row['Output_pct_0'])*float(row['PMax MW']),1) - x[1] = round(float(row['Output_pct_1'])*float(row['PMax MW']),1) - x[2] = round(float(row['Output_pct_2'])*float(row['PMax MW']),1) - x[3] = round(float(row['Output_pct_3'])*float(row['PMax MW']),1) - - - y = {} - ## /1000. from the RTS-GMLC MATPOWER writer -- - ## heat rates are in BTU/kWh, 1BTU == 10^-6 MMBTU, 1kWh == 10^-3 MWh, so MMBTU/MWh == 10^3/10^6 * BTU/kWh - y[0] = float(row['Fuel Price $/MMBTU'])*((float(row['HR_avg_0'])*1000./ 1000000.)*x[0]) - y[1] = float(row['Fuel Price $/MMBTU'])*(((x[1]-x[0])*(float(row['HR_incr_1'])*1000. / 1000000.))) + y[0] - y[2] = float(row['Fuel Price $/MMBTU'])*(((x[2]-x[1])*(float(row['HR_incr_2'])*1000. / 1000000.))) + y[1] - y[3] = float(row['Fuel Price $/MMBTU'])*(((x[3]-x[2])*(float(row['HR_incr_3'])*1000. / 1000000.))) + y[2] - - f = {} - f[0] = ((float(row['HR_avg_0'])*1000./ 1000000.)*x[0]) - f[1] = (((x[1]-x[0])*(float(row['HR_incr_1'])*1000. / 1000000.))) + f[0] - f[2] = (((x[2]-x[1])*(float(row['HR_incr_2'])*1000. / 1000000.))) + f[1] - f[3] = (((x[3]-x[2])*(float(row['HR_incr_3'])*1000. / 1000000.))) + f[2] - - # only include the cost coeffecients that matter - P_COEFF = [ (x[i], round(y[i],2)) for i in range(4) if (((i == 0) or (x[i-1],y[i-1]) != (x[i], y[i])) and (x[i], y[i]) != (0.,0.)) ] - if P_COEFF == []: - P_COEFF = [(PMAX, 0.0)] - - F_COEFF = [ (x[i], round(f[i],2)) for i in range(4) if (((i == 0) or (x[i-1],f[i-1]) != (x[i], f[i])) and (x[i], f[i]) != (0.,0.)) ] - if F_COEFF == []: - F_COEFF = [(PMAX, 0.0)] - - # UC Data - MIN_UP_TIME = float(row['Min Up Time Hr']) - MIN_DN_TIME = float(row['Min Down Time Hr']) - - # Startup types and costs - COLD_HEAT = float(row['Start Heat Cold MBTU']) - WARM_HEAT = float(row['Start Heat Warm MBTU']) - HOT_HEAT = float(row['Start Heat Hot MBTU']) - - COLD_TIME = float(row['Start Time Cold Hr']) - WARM_TIME = float(row['Start Time Warm Hr']) - HOT_TIME = float(row['Start Time Hot Hr']) - - FUEL_PRICE = float(row['Fuel Price $/MMBTU']) - FIXED_START_COST = float(row['Non Fuel Start Cost $']) - - - if (COLD_TIME <= MIN_DN_TIME) or (COLD_TIME == WARM_TIME == HOT_TIME): - STARTUP_COSTS = [(MIN_DN_TIME, round(COLD_HEAT*FUEL_PRICE+FIXED_START_COST,2))] - STARTUP_FUEL = [(MIN_DN_TIME, COLD_HEAT)] - - elif WARM_TIME <= MIN_DN_TIME: - STARTUP_COSTS = [(MIN_DN_TIME, round(WARM_HEAT*FUEL_PRICE+FIXED_START_COST,2)),\ - (COLD_TIME, round(COLD_HEAT*FUEL_PRICE+FIXED_START_COST,2))] - STARTUP_FUEL = [(MIN_DN_TIME, WARM_HEAT),\ - (COLD_TIME, COLD_HEAT)] - - else: - STARTUP_COSTS = [(MIN_DN_TIME, round(HOT_HEAT*FUEL_PRICE+FIXED_START_COST,2)),\ - (WARM_TIME, round(WARM_HEAT*FUEL_PRICE+FIXED_START_COST,2)),\ - (COLD_TIME, round(COLD_HEAT*FUEL_PRICE+FIXED_START_COST,2))] - STARTUP_FUEL = [(MIN_DN_TIME, HOT_HEAT),\ - (WARM_TIME, WARM_HEAT),\ - (COLD_TIME, COLD_HEAT)] - - SHUTDOWN_COST = 0.0 - - gen_dict["pc1"] = PC1 - gen_dict["pc2"] = PC2 - gen_dict["qc1_min"] = QC1MIN - gen_dict["qc1_max"] = QC1MAX - gen_dict["qc2_min"] = QC2MIN - gen_dict["qc2_max"] = QC2MAX - gen_dict["agc_capable"] = True - gen_dict["p_min_agc"] = gen_dict["p_min"] - gen_dict["p_max_agc"] = gen_dict["p_max"] - gen_dict["ramp_agc"] = RAMP_AGC - gen_dict["ramp_10"] = RAMP_10 - gen_dict["ramp_30"] = RAMP_30 - gen_dict["ramp_up_60min"] = RAMP_UP_60 - gen_dict["ramp_down_60min"] = RAMP_DN_60 - gen_dict["power_factor"] = APF - gen_dict["p_cost"] = {"data_type": "cost_curve", "cost_curve_type":"piecewise", "values": P_COEFF } - gen_dict["p_fuel"] = {"data_type": "fuel_curve", "values": F_COEFF } - gen_dict["fuel_cost"] = FUEL_PRICE - - gen_dict["startup_cost"] = STARTUP_COSTS - gen_dict["startup_fuel"] = STARTUP_FUEL - gen_dict["shutdown_cost"] = SHUTDOWN_COST - # these assumptions are the same as prescient-rtsgmlc - gen_dict["startup_capacity"] = PMIN - gen_dict["shutdown_capacity"] = PMIN - gen_dict["min_up_time"] = MIN_UP_TIME - gen_dict["min_down_time"] = MIN_DN_TIME - gen_dict["must_run"] = False - - elements["generator"][name] = gen_dict - - return model_data - -def _read_rts_gmlc_table(file_name, simulation): - if simulation == "DAY_AHEAD": - _date_parser = lambda *columns: datetime(*map(int,columns[0:3]), int(columns[3])-1) - else: - minute_mutli = 5 - hour_divisor = 12 - time_periods_in_day = 24*hour_divisor - _date_parser = lambda *columns: datetime(*map(int,columns[0:3]),\ - (int(columns[3])-1)//hour_divisor, - minute_mutli*((int(columns[3])-1)%hour_divisor)) - return pd.read_csv(file_name, - header=0, - sep=',', - parse_dates=[[0, 1, 2, 3]], - date_parser=_date_parser) - -def _read_rts_gmlc_reserve_table(file_name, begin_time, end_time, simulation): - table_dict = pd.read_csv(file_name, header=0, sep=',').T.to_dict() - - if simulation == "DAY_AHEAD": - hour_divisor = 1 - minute_mutli = 0 - time_periods_in_day = 24 - else: - minute_mutli = 5 - hour_divisor = 12 - time_periods_in_day = 24*hour_divisor - - by_datetime_dict = dict() - for day_num, day_data in table_dict.items(): - year = day_data['Year'] - month = day_data['Month'] - day= day_data['Day'] - for i in range(1,time_periods_in_day+1): - date_time = datetime(year=int(year), month=int(month), day=int(day), - hour=(i-1)//hour_divisor, minute=minute_mutli*((i-1)%hour_divisor)) - if begin_time <= date_time < end_time: - by_datetime_dict[str(date_time)] = float(day_data[str(i)]) - return by_datetime_dict - -def _make_time_series_dict( values ): - return {"data_type":"time_series", "values": values } - -def _preallocated_list( other_iter ): - return [None for _ in other_iter] - -def _get_datetimes(begin_time, end_time): - - datetime_format = "%Y-%m-%d %H:%M:%S" - - datestr = "YYYY-DD-MM" - midnight = " 00:00:00" - - if isinstance(begin_time,datetime): - pass - elif isinstance(begin_time,str): - if len(begin_time) == len(datestr): - begin_time += midnight - begin_time = datetime.strptime(begin_time,datetime_format) - else: - raise ValueError("Unable to parse begin_time") - - if isinstance(end_time,datetime): - pass - elif isinstance(end_time,str): - if len(end_time) == len(datestr): - end_time += midnight - end_time = datetime.strptime(end_time,datetime_format) - else: - raise ValueError("Unable to parse end_time") - - # stay in the times provided - assert begin_time >= datetime(year=2020, month=1, day=1) - assert end_time <= datetime(year=2021, month=1, day=1) - - # We only take times in whole hours (for now) - assert (begin_time.minute == 0. and begin_time.second == 0. and begin_time.microsecond == 0.) - assert (end_time.minute == 0. and end_time.second == 0. and end_time.microsecond == 0.) - - return begin_time, end_time From 6258d991db5c0d29af633e7bccfd47335d13c393 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Thu, 18 Feb 2021 19:11:24 -0700 Subject: [PATCH 02/13] Fixes to RTS-GMLC parser. * Return dict (not ModelData) from create_model_data_dict() * Parse dates if provided as strings * Don't multiply timeseries by their Scaling Factor --- egret/parsers/rts_gmlc/parser.py | 36 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index 6d6874f4..d7b0e17f 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -94,12 +94,12 @@ def create_model_data_dict(rts_gmlc_dir:str, gen['initial_status'] = t0_state[name]['initial_status'] gen['initial_p_output'] = t0_state[name]['initial_p_output'] gen['initial_q_output'] = t0_state[name]['initial_q_output'] - return model + return model.data def parse_to_cache(rts_gmlc_dir:str, - begin_time:datetime, - end_time:datetime) -> ParsedCache: + begin_time:Union[datetime,str], + end_time:Union[datetime,str]) -> ParsedCache: ''' Parse data in RTS-GMLC format, keeping the portions between a start and end time rts_gmlc_dir : str @@ -125,7 +125,9 @@ def parse_to_cache(rts_gmlc_dir:str, data_start, data_end = _get_data_date_range(metadata_df) - # TODO: Validate begin_time and end_time + begin_time, end_time = _parse_datetimes_if_strings(begin_time, end_time) + # TODO: Validate begin_time and end_time. + # Do we want to enforce that they fall within the data date range? timeseries_df = _read_timeseries_data(model_data, rts_gmlc_dir, begin_time, end_time, minutes_per_period) @@ -645,14 +647,12 @@ def _read_timeseries_data(model_data:dict, rts_gmlc_dir:str, ======= all_timeseries: DataFrame A DataFrame with the following columns: - [Simulation, Category, Object, Parameter, Scaling Factor, Series] + [Simulation, Category, Object, Parameter, Series] The Series column holds the data as a pandas series, indexed by the datetime of the value. """ - # Where we'll keep our results - timeseries_data = {'DAY_AHEAD':{}, 'REAL_TIME':{}} # All timeseries data that has already been read (map[filename] -> DataFrame) timeseries_file_map = {} @@ -702,11 +702,13 @@ def _read_timeseries_data(model_data:dict, rts_gmlc_dir:str, timeseries_file_map[fname] = data # Save a reference to the relevant data as a Series - scaling_factor = float(row['Scaling Factor']) - timeseries_pointer_df.at[idx,'Series'] = timeseries_file_map[fname][row['Object']]*scaling_factor + timeseries_pointer_df.at[idx,'Series'] = timeseries_file_map[fname][row['Object']] - # Remove the file path from the DF - timeseries_pointer_df.pop('Data File') + # Remove columns that we don't want to preserve + keepers= {'Simulation', 'Category', 'Object', 'Parameter', 'Series'} + for c in timeseries_pointer_df.columns: + if not c in keepers: + timeseries_pointer_df.pop(c) # Remove irrelevant rows timeseries_pointer_df.dropna(subset=['Series'], inplace=True) @@ -716,7 +718,17 @@ def _read_timeseries_data(model_data:dict, rts_gmlc_dir:str, return timeseries_pointer_df -def _get_datetimes_from_strings(begin_time:str, end_time:str): +def _parse_datetimes_if_strings(begin_time:Union[datetime,str], end_time:Union[datetime,str]): + ''' + Ensure both dates are datetimes, parsing date strings if necessary. + + Returns + ------- + begin_time:datetime + The begin_time as a datetime, parsing it if necessary + end_time:datetime + The end_time as a datetime, parsing it if necessary + ''' datetime_format = "%Y-%m-%d %H:%M:%S" From 94904a92c02a504f0329a7aca66a1c168ee1fb83 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Wed, 24 Feb 2021 14:12:56 -0700 Subject: [PATCH 03/13] Read constant reserve requirements from reserves.csv. They follow the same naming conventions as timeseries data. The constant requirement values in reserves.csv are only used if there is not a corresponding timeseries in timeseries_pointers.csv; otherwise the constant value is replaced with timeseries values. --- egret/parsers/rts_gmlc/parser.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index d7b0e17f..19dc723c 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -21,7 +21,7 @@ import egret.data.model_data as md from .parsed_cache import ParsedCache -from ._reserves import is_valid_reserve_name +from ._reserves import is_valid_reserve_name, reserve_name_map def create_ModelData(rts_gmlc_dir:str, begin_time:Union[datetime,str], end_time:Union[datetime,str], @@ -610,6 +610,25 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): elements["generator"][name] = gen_dict + # Add the reserves + reserve_df = pd.read_csv(os.path.join(base_dir,'reserves.csv')) + for idx,row in reserve_df.iterrows(): + res_name = row['Reserve Product'] + req = float(row['Requirement (MW)']) + + if res_name in reserve_name_map: + target_dict = system + else: + # reserve name must be _R. + # split into type and area + res_name, area_name = res_name.split("_R", 1) + if area_name not in elements['area']: + # Skip areas not referenced elsewhere + continue + target_dict = elements['area'][area_name] + + target_dict[reserve_name_map[res_name]] = req + return model_data def _compute_bus_load_participation_factors(model_data): From 1e9d074267bd90dc4e2a9a3b3b9bb8648a36edba Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Wed, 24 Feb 2021 14:31:26 -0700 Subject: [PATCH 04/13] Remove unused file --- egret/parsers/parser.py | 972 ---------------------------------------- 1 file changed, 972 deletions(-) delete mode 100644 egret/parsers/parser.py diff --git a/egret/parsers/parser.py b/egret/parsers/parser.py deleted file mode 100644 index 4a179647..00000000 --- a/egret/parsers/parser.py +++ /dev/null @@ -1,972 +0,0 @@ -# ___________________________________________________________________________ -# -# EGRET: Electrical Grid Research and Engineering Tools -# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC -# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. -# Government retains certain rights in this software. -# This software is distributed under the Revised BSD License. -# ___________________________________________________________________________ - -""" -This module provides supporting functions for interacting with standard format input data - -It includes methods to parse the data and load them into a TemporalGridNetwork object - -""" - -import os.path -import egret.data.model_data as md -import pandas as pd -import math -from datetime import datetime, timedelta -from collections import namedtuple - - -def convert_load_by_area_to_source(data_dir, begin_time, end_time, t0_state=None): - """ - Create a ModelData object from the input data. Assumes data is formatted like the RTS-GMLC repository's 'RTS_Data' directory. - - Parameters - ---------- - data_dir : str - Path to data directory - begin_time : datetime.datetime or str - Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - end_time : datetime.datetime or str - End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - t0_state : dict or Nonetype - Keys of this dict are thermal generator names, each element of which is another dictionary with - keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the - generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, default values are loaded. - """ - for simulation in ['DAY_AHEAD', 'REAL_TIME']: - simulation = simulation.upper() - - base_dir = os.path.join(data_dir, 'SourceData') - - begin_time, end_time = _get_datetimes(begin_time, end_time, base_dir, simulation) - - TimeSeriesPointer = namedtuple('TimeSeriesPointer', - ['Object', - 'Simulation', - 'Parameter', - 'DataFile']) - - DateTimeValue = namedtuple('DateTimeValue', - ['DateTime', 'Value']) - - areas = _get_eligible_areas(rts_gmlc_dir) - area_names = _get_eligible_area_names(areas) - - Load = namedtuple('Load', ['DateTime'] + area_names) - - timeseries_pointer_df = pd.read_csv(os.path.join(base_dir, "timeseries_pointers.csv"), header=0, sep=',') - - time_delta = end_time - begin_time - - hours = 24 * time_delta.days + math.ceil(time_delta.seconds / 3600.) - - model_data = _create_rtsgmlc_skeleton(rts_gmlc_dir) - - ## create an object for easy iterating - md_obj = md.ModelData(model_data) - - system = md_obj.data["system"] - elements = md_obj.data["elements"] - - if simulation == "DAY_AHEAD": - system["time_period_length_minutes"] = 60 - else: - system["time_period_length_minutes"] = 5 - - # compute aggregate load per area, and then compute - # load participation factors from each bus from that data. - region_total_load = {} - areas = ["Area" + str(i) for i in range(1, 4)] - for this_region in areas: - this_region_total_load = 0.0 - ## loads have exactly one bus - for name, load in md_obj.elements("load"): - bus = elements["bus"][load["bus"]] - if bus["area"] == this_region: - this_region_total_load += load["p_load"] - region_total_load[this_region] = this_region_total_load - - bus_load_participation_factor_dict = {} - bus_Ql_over_Pl_dict = {} - for name, load in md_obj.elements("load"): - bus = elements["bus"][load["bus"]] - bus_load_participation_factor_dict[name] = load["p_load"] / region_total_load[bus["area"]] - bus_Ql_over_Pl_dict[name] = load["q_load"] / load["p_load"] - - timeseries_pointer_dict = {} - for timeseries_pointer_index in timeseries_pointer_df.index.tolist(): - this_timeseries_pointer_dict = timeseries_pointer_df.loc[timeseries_pointer_index].to_dict() - new_timeseries_pointer = TimeSeriesPointer(this_timeseries_pointer_dict["Object"], - this_timeseries_pointer_dict["Simulation"], - this_timeseries_pointer_dict["Parameter"], - os.path.join(base_dir, - this_timeseries_pointer_dict["Data File"])) - - timeseries_pointer_dict[ - (new_timeseries_pointer.Object, new_timeseries_pointer.Simulation)] = new_timeseries_pointer - - load_timeseries_spec = timeseries_pointer_dict[("1", simulation)] - load_timeseries_df = _read_rts_gmlc_table(load_timeseries_spec.DataFile, simulation) - load_timeseries_df = load_timeseries_df.rename(columns={"Year_Month_Day_Period": "DateTime"}) - start_mask = load_timeseries_df["DateTime"] >= begin_time - end_mask = load_timeseries_df["DateTime"] < end_time - masked_load_timeseries_df = load_timeseries_df[start_mask & end_mask] - load_dict = masked_load_timeseries_df.to_dict(orient='split') - load_timeseries = [] - for load_row in load_dict["data"]: - load_timeseries.append(Load(load_row[0], - float(load_row[1]), - float(load_row[2]), - float(load_row[3]))) - - times = [] - for load in load_timeseries: - times.append(str(load.DateTime)) - - system["time_keys"] = times - - ## load into grid_network object - ## First, load Pl, Ql - for name, load in md_obj.elements("load"): - pl_dict, ql_dict = dict(), dict() - bus = elements["bus"][load["bus"]] - for load_time in load_timeseries: - area_load = getattr(load_time, bus["area"]) - pl_dict[str(load_time.DateTime)] = round(bus_load_participation_factor_dict[name] * area_load, 2) - ql_dict[str(load_time.DateTime)] = pl_dict[str(load_time.DateTime)] * bus_Ql_over_Pl_dict[name] - load["p_load"] = _make_time_series_dict(list(pl_dict.values())) - load["q_load"] = _make_time_series_dict(list(ql_dict.values())) - - new_load_time_series = [] - - day_ahead_load_file = '../timeseries_data_files/Load/new_load_time_series_DA.csv' - real_time_load_file = '../timeseries_data_files/Load/new_load_time_series_RT.csv' - - for ix, load_time in enumerate(load_timeseries, start=0): - load_time_series_record = {} - load_time_series_record['Year'] = load_time.DateTime.year - load_time_series_record['Month'] = load_time.DateTime.month - load_time_series_record['Day'] = load_time.DateTime.day - - if simulation == 'DAY_AHEAD': - load_time_series_record['Period'] = (ix % 24) + 1 - else: - load_time_series_record['Period'] = (ix % (24 * 12)) + 1 - - for name, load in md_obj.elements('load'): - bus = elements['bus'][load['bus']] - area_load = getattr(load_time, bus['area']) - - load_time_series_record[name] = round(bus_load_participation_factor_dict[name] * area_load, 2) - - new_load_time_series.append(load_time_series_record) - - new_load_time_series_df = pd.DataFrame(new_load_time_series) - new_load_time_series_df = new_load_time_series_df[ - ['Year', 'Month', 'Day', 'Period'] + new_load_time_series_df.columns[4:].tolist()] - new_load_time_series_fname = 'new_load_time_series_{0}.csv'.format('DA' if simulation == "DAY_AHEAD" else 'RT') - new_load_time_series_df.to_csv( - os.path.join(data_dir, 'timeseries_data_files', 'Load', new_load_time_series_fname), index=False) - - # Augment time series pointer dataframe. - for name, load in md_obj.elements('load'): - new_load_timeseries_spec = {} - new_load_timeseries_spec['Object'] = name - new_load_timeseries_spec['Parameter'] = 'Requirement' - new_load_timeseries_spec['Simulation'] = 'DAY_AHEAD' - new_load_timeseries_spec['Data File'] = day_ahead_load_file - timeseries_pointer_df = timeseries_pointer_df.append(new_load_timeseries_spec, ignore_index=True) - - new_load_timeseries_spec = {} - new_load_timeseries_spec['Object'] = name - new_load_timeseries_spec['Parameter'] = 'Requirement' - new_load_timeseries_spec['Simulation'] = 'REAL_TIME' - new_load_timeseries_spec['Data File'] = real_time_load_file - timeseries_pointer_df = timeseries_pointer_df.append(new_load_timeseries_spec, ignore_index=True) - - timeseries_pointer_df.loc[timeseries_pointer_df['Object'] != 'Load'].to_csv( - os.path.join(data_dir, 'SourceData', 'timeseries_pointers.csv'), index=False) - - -def create_ModelData(data_dir, begin_time, end_time, simulation="DAY_AHEAD", t0_state=None): - """ - Create a ModelData object from the input data. - - Parameters - ---------- - data_dir : str - Path to data directory - begin_time : datetime.datetime or str - Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - end_time : datetime.datetime or str - End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - simulation : str - Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, - default is "DAY_AHEAD". - t0_state : dict or Nonetype - Keys of this dict are thermal generator names, each element of which is another dictionary with - keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the - generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, default values are loaded. - - Returns - ------- - egret.model_data.ModelData - Returns a ModelData object with the timeseries data specified - """ - return md.ModelData(create_model_data_dict(data_dir, begin_time, end_time, simulation, t0_state)) - - -def create_model_data_dict(rts_gmlc_dir, begin_time, end_time, simulation="DAY_AHEAD", t0_state=None): - """ - Create a model_data dictionary from the RTS-GMLC data. - - Parameters - ---------- - rts_gmlc_dir : str - Path to RTS-GMLC directory - begin_time : datetime.datetime or str - Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - end_time : datetime.datetime or str - End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - simulation : str - Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, - default is "DAY_AHEAD". - t0_state : dict or Nonetype - Keys of this dict are thermal generator names, each element of which is another dictionary with - keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the - generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, default values are loaded. - - Returns - ------- - dict : A dictionary in the format required for the ModelData object. - """ - - simulation = simulation.upper() - if simulation not in ["DAY_AHEAD", "REAL_TIME"]: - raise ValueError('simulation must be "DAY_AHEAD" or "REAL_TIME"') - - base_dir = os.path.join(rts_gmlc_dir, 'SourceData') - - begin_time, end_time = _get_datetimes(begin_time, end_time, base_dir, simulation) - - TimeSeriesPointer = namedtuple('TimeSeriesPointer', - ['Object', - 'Simulation', - 'Parameter', - 'DataFile']) - - DateTimeValue = namedtuple('DateTimeValue', - ['DateTime', 'Value']) - - areas = _get_eligible_areas(rts_gmlc_dir) - area_names = _get_eligible_area_names(areas) - - Load = namedtuple('Load', ['DateTime'] + area_names) - - timeseries_pointer_df = pd.read_csv(os.path.join(base_dir, "timeseries_pointers.csv"), header=0, sep=',') - - time_delta = end_time - begin_time - - hours = 24 * time_delta.days + math.ceil(time_delta.seconds / 3600.) - - model_data = _create_rtsgmlc_skeleton(rts_gmlc_dir) - - ## create an object for easy iterating - md_obj = md.ModelData(model_data) - - system = md_obj.data["system"] - elements = md_obj.data["elements"] - - if simulation == "DAY_AHEAD": - system["time_period_length_minutes"] = 60 - else: - system["time_period_length_minutes"] = 5 - - # compute aggregate load per area, and then compute - # load participation factors from each bus from that data. - region_total_load = {} - for this_region in area_names: - this_region_total_load = 0.0 - ## loads have exactly one bus - for name, load in md_obj.elements("load"): - bus = elements["bus"][load["bus"]] - if bus["area"] == this_region: - this_region_total_load += load["p_load"] - region_total_load[this_region] = this_region_total_load - - bus_load_participation_factor_dict = {} - bus_Ql_over_Pl_dict = {} - for name, load in md_obj.elements("load"): - bus = elements["bus"][load["bus"]] - bus_load_participation_factor_dict[name] = load["p_load"] / region_total_load[bus["area"]] - bus_Ql_over_Pl_dict[name] = load["q_load"] / load["p_load"] - - timeseries_pointer_dict = {} - for timeseries_pointer_index in timeseries_pointer_df.index.tolist(): - this_timeseries_pointer_dict = timeseries_pointer_df.loc[timeseries_pointer_index].to_dict() - new_timeseries_pointer = TimeSeriesPointer(this_timeseries_pointer_dict["Object"], - this_timeseries_pointer_dict["Simulation"], - this_timeseries_pointer_dict["Parameter"], - os.path.join(base_dir, this_timeseries_pointer_dict["Data File"])) - - timeseries_pointer_dict[ - (new_timeseries_pointer.Object, new_timeseries_pointer.Simulation)] = new_timeseries_pointer - - filtered_timeseries = {} - for name, gen in md_obj.elements("generator", generator_type="renewable"): - if gen["fuel"] in ["Solar", "Wind", "Hydro"]: - if (name, simulation) not in timeseries_pointer_dict: - print("***WARNING - No timeseries pointer entry found for generator=%s" % name) - - else: - # print("Time series for generator=%s will be loaded from file=%s" % (name, timeseries_pointer_dict[(name,"DAY_AHEAD")].DataFile)) - renewables_timeseries_df = _read_rts_gmlc_table(timeseries_pointer_dict[(name, simulation)].DataFile, - simulation) - this_source_timeseries_df = renewables_timeseries_df.loc[:, ["Year_Month_Day_Period", name]] - this_source_timeseries_df = this_source_timeseries_df.rename( - columns={"Year_Month_Day_Period": "DateTime"}) - - start_mask = this_source_timeseries_df["DateTime"] >= begin_time - end_mask = this_source_timeseries_df["DateTime"] < end_time - this_source_masked_timeseries_df = this_source_timeseries_df[start_mask & end_mask] - - renewables_timeseries_dict = this_source_masked_timeseries_df.to_dict(orient='split') - renewables_timeseries = [] - for this_row in renewables_timeseries_dict["data"]: - renewables_timeseries.append(DateTimeValue(this_row[0], - float(this_row[1]))) - filtered_timeseries[name] = renewables_timeseries - - for name, load in md_obj.elements("load"): - load_timeseries_spec = timeseries_pointer_dict[(name, simulation)] - load_timeseries_df = _read_rts_gmlc_table(load_timeseries_spec.DataFile, simulation) - load_timeseries_df = load_timeseries_df.rename(columns={"Year_Month_Day_Period": "DateTime"}) - start_mask = load_timeseries_df["DateTime"] >= begin_time - end_mask = load_timeseries_df["DateTime"] < end_time - masked_load_timeseries_df = load_timeseries_df[start_mask & end_mask] - load_dict = masked_load_timeseries_df.to_dict(orient='records') - - reserves_dfs = {} - spin_reserve_categories = ["Spin_Up_R1", "Spin_Up_R2", "Spin_Up_R3"] - - other_reserve_categories = ["Reg_Down", "Reg_Up", ] - ## flexiramp products only in day-ahead simulation - if simulation == "DAY_AHEAD": - other_reserve_categories += ["Flex_Down", "Flex_Up", ] - - for reserve in spin_reserve_categories: - reserves_dfs[reserve] = _read_rts_gmlc_table(timeseries_pointer_dict[(reserve, simulation)].DataFile, - simulation) - - reserves_dict = {} - for name, reserve_df in reserves_dfs.items(): - reserve_df = reserve_df.rename(columns={"Year_Month_Day_Period": "DateTime"}) - start_mask = reserve_df["DateTime"] >= begin_time - end_mask = reserve_df["DateTime"] < end_time - reserve_df = reserve_df[start_mask & end_mask] - reserve_timeseries = [] - for this_row in reserve_df.to_dict(orient='split')["data"]: - reserve_timeseries.append(DateTimeValue(this_row[0], float(this_row[1]))) - reserves_dict[name] = reserve_timeseries - - for reserve in other_reserve_categories: - reserves_dict[reserve] = _read_rts_gmlc_reserve_table( - timeseries_pointer_dict[(reserve, simulation)].DataFile, - begin_time, - end_time, - simulation, - ) - - times = [] - for load in load_dict: - times.append(str(load['DateTime'])) - - system["time_keys"] = times - - ## load into grid_network object - ## First, load Pl, Ql - for name, load in md_obj.elements("load"): - pl_dict, ql_dict = dict(), dict() - bus = elements["bus"][load["bus"]] - for load_row in load_dict: - pl_dict[str(load_row['DateTime'])] = round(load_row[name], 2) - ql_dict[str(load_row['DateTime'])] = pl_dict[str(load_row['DateTime'])] * bus_Ql_over_Pl_dict[name] - load["p_load"] = _make_time_series_dict(list(pl_dict.values())) - load["q_load"] = _make_time_series_dict(list(ql_dict.values())) - - ## load in area reserve factors - area_spin_map = _create_rts_gmlc_area_spin_map(rts_gmlc_dir) - for name, area in md_obj.elements("area"): - spin_reserve_dict = dict() - for datetimevalue in reserves_dict[area_spin_map[name]]: - spin_reserve_dict[str(datetimevalue.DateTime)] = round(datetimevalue.Value, 2) - area["spinning_reserve_requirement"] = _make_time_series_dict(list(spin_reserve_dict.values())) - - ## load in global reserve factors - rts_to_egret_reserve_map = { - "Flex_Down": "flexible_ramp_down_requirement", - "Flex_Up": "flexible_ramp_up_requirement", - "Reg_Down": "regulation_down_requirement", - "Reg_Up": "regulation_up_requirement", - } - for reserve in other_reserve_categories: - system[rts_to_egret_reserve_map[reserve]] = _make_time_series_dict(list(reserves_dict[reserve].values())) - - ## now load renewable generator stuff - for name, gen in md_obj.elements("generator", generator_type="renewable"): - if gen["fuel"] not in ["Solar", "Wind", "Hydro"]: - continue - renewables_timeseries = filtered_timeseries[name] - ## for safety, curtailable renewables can go down to 0 - gen["p_min"] = 0. - output_dict = dict() - for datetimevalue in renewables_timeseries: - output_dict[str(datetimevalue.DateTime)] = round(datetimevalue.Value, 2) - - gen["p_max"] = _make_time_series_dict(list(output_dict.values())) - # set must-take for Hydro and RTPV - if gen["unit_type"] in ["HYDRO", "RTPV"]: - ## copy is for safety when overwriting - gen["p_min"] = _make_time_series_dict(list(output_dict.copy().values())) - - ## get this from the same place the prescient reader does - if t0_state is None: - unit_on_time_df = pd.read_csv(os.path.join(base_dir, - "../FormattedData/PLEXOS/PLEXOS_Solution/DAY_AHEAD Solution Files/noTX/on_time_7.12.csv"), - header=0, - sep=",") - unit_on_time_df_as_dict = unit_on_time_df.to_dict(orient="split") - unit_on_t0_state_dict = {} - for i in range(0, len(unit_on_time_df_as_dict["columns"])): - gen_id = unit_on_time_df_as_dict["columns"][i] - unit_on_t0_state_dict[gen_id] = int(unit_on_time_df_as_dict["data"][0][i]) - - for name, gen in md_obj.elements("generator", generator_type="thermal"): - gen["initial_status"] = unit_on_t0_state_dict[name] - if gen["initial_status"] < 0: - gen["initial_p_output"] = 0. - gen["initial_q_output"] = 0. - else: - gen["initial_p_output"] = gen["p_min"] - gen["initial_q_output"] = max(0., gen["q_min"]) - - else: - for name, gen in md_obj.elements("generator", generator_type="thermal"): - gen["initial_status"] = t0_state[name]["initial_status"] - gen["initial_p_output"] = t0_state[name]["initial_p_output"] - gen["initial_q_output"] = t0_state[name]["initial_q_output"] - - return md_obj.data - -def _create_rts_gmlc_area_spin_map(rts_gmlc_dir): - base_dir = os.path.join(rts_gmlc_dir, 'SourceData') - reserves = pd.read_csv(os.path.join(base_dir, 'reserves.csv')) - area_spin_map = {} - areas = _get_eligible_areas(rts_gmlc_dir) - area_names = _get_eligible_area_names(areas) - #assuming we have areas that correspond to the "Eligible Regions" category, starting at 1, 2, 3... - for area, name in zip(areas, area_names): - spin_name = reserves.loc[reserves['Eligible Regions'] == str(area)]['Reserve Product'].values[0] - area_spin_map[name] = spin_name - return area_spin_map - -def _get_rts_gmlc_start_end_dates(base_dir, simulation): - simulation_objects = pd.read_csv(os.path.join(base_dir, 'simulation_objects.csv')) - date_from = simulation_objects.loc[simulation_objects['Simulation_Parameters'] == 'Date_From'] - date_to = simulation_objects.loc[simulation_objects['Simulation_Parameters'] == 'Date_To'] - from_date_string = '' - to_date_string = '' - if simulation == 'DAY_AHEAD': - from_date_string = date_from.iloc[0]['DAY_AHEAD'] - to_date_string = date_to.iloc[0]['DAY_AHEAD'] - else: - from_date_string = date_from.iloc[0]['REAL_TIME'] - to_date_string = date_to.iloc[0]['REAL_TIME'] - start_date = datetime.strptime(from_date_string, '%m/%d/%y %H:%M') - end_date = datetime.strptime(to_date_string, '%m/%d/%y %H:%M') - return start_date, end_date - -def _get_eligible_areas(rts_gmlc_dir): - base_dir = os.path.join(rts_gmlc_dir, 'SourceData') - bus = pd.read_csv(os.path.join(base_dir, 'bus.csv')) - return bus['Area'].drop_duplicates().values.tolist() - -def _get_eligible_area_names(areas): - area_names = list(map(lambda x: 'Area' + str(x), areas)) - return area_names - - -def _create_rtsgmlc_skeleton(rts_gmlc_dir): - """ - Creates a grid_data dictionary from the RTS-GMLC data, - but does not load hourly data - - Parameters - ---------- - rts_gmlc_dir : str - Path to RTS-GMLC directory - - Returns - ------- - grid_data : dict - Returns a dict loaded from the RTS-GMLC data - """ - - base_dir = os.path.join(rts_gmlc_dir, 'SourceData') - - case_name = "RTS-GMLC" - - model_data = md.ModelData.empty_model_data_dict() - - elements = model_data["elements"] - system = model_data["system"] - - system["name"] = case_name - - # this is the default used in the MATPOWER writer for RTS-GMLC - system["baseMVA"] = 100. - - elements["bus"] = {} - elements["load"] = {} - elements["shunt"] = {} - - # add the buses - bus_df = pd.read_csv(os.path.join(base_dir, 'bus.csv')) - for idx, row in bus_df.iterrows(): - BUS_I = str(row['Bus ID']) - if row['Bus Type'] == 'PQ': - BUS_TYPE = 1 - elif row['Bus Type'] == 'PV': - BUS_TYPE = 2 - elif row['Bus Type'] == 'Ref': - BUS_TYPE = 3 - else: - BUS_TYPE = 4 - - PD = float(row['MW Load']) - QD = float(row['MVAR Load']) - GS = float(row['MW Shunt G']) - BS = float(row['MVAR Shunt B']) - BUS_AREA = str(row['Area']) - VM = float(row['V Mag']) - VA = float(row['V Angle']) - BASE_KV = float(row['BaseKV']) - ZONE = str(int(row['Zone'])) - VMAX = 1.05 # default used in RTS-GMLC MATPOWER writer - VMIN = 0.95 # default used in RTS-GMLC MATPOWER writer - - bus_dict = dict() - - if BUS_TYPE < 1 or BUS_TYPE > 3: - raise ValueError( - "Encountered an unsupported bus type: {} when parsing MATPOWER input file".format(BUS_TYPE)) - - bus_types = {1: "PQ", 2: "PV", 3: "ref", 4: "isolated"} - bus_dict["matpower_bustype"] = bus_types[BUS_TYPE] - - if BUS_TYPE == 3: - if VA != 0: - if abs(VA) >= 1e-16: - raise ValueError('EGRET only supports reference buses with an angle of 0 degrees.') - msg = "\nEgret only supports reference buses with an angle of 0 degrees. \nFound a " \ - "reference bus with an angle close to 0. \n" \ - "Value: {0}".format(VA) + "\nSetting reference bus angle to 0." - warnings.warn(msg) - system["reference_bus"] = BUS_I - system["reference_bus_angle"] = VA - - if PD != 0 or QD != 0: - load_dict = {"bus": BUS_I, "in_service": True} - load_dict["p_load"] = PD - load_dict["q_load"] = QD - load_dict["area"] = "Area" + BUS_AREA - load_dict["zone"] = ZONE - elements["load"]['load_' + BUS_I] = load_dict - - if GS != 0 or BS != 0: - shunt_dict = {"shunt_type": "fixed", "bus": BUS_I} - shunt_dict["gs"] = GS - shunt_dict["bs"] = BS - elements["shunt"]['shunt_' + BUS_I] = shunt_dict - - bus_dict["vm"] = VM - bus_dict["va"] = VA - if BASE_KV > 0: - bus_dict["base_kv"] = BASE_KV - else: - raise ValueError('BASE_KV value found that is <= 0. Not supported at this time.') - - bus_dict["area"] = "Area" + BUS_AREA - bus_dict["zone"] = ZONE - bus_dict["v_min"] = VMIN - bus_dict["v_max"] = VMAX - bus_dict["id"] = row['Bus Name'] - - elements["bus"][BUS_I] = bus_dict - - # add the areas - - elements["area"] = {} - areas = _get_eligible_areas(rts_gmlc_dir) - area_names = _get_eligible_area_names(areas) - for name in area_names: - ## TODO: what else should be in here? - elements["area"][name] = dict() - - elements["branch"] = {} - # add the branches - branch_df = pd.read_csv(os.path.join(base_dir, 'branch.csv')) - for idx, row in branch_df.iterrows(): - name = str(row['UID']) - F_BUS = str(row['From Bus']) - T_BUS = str(row['To Bus']) - BR_R = float(row['R']) - BR_X = float(row['X']) - BR_B = float(row['B']) - RATE_A = float(row['Cont Rating']) - RATE_B = float(row['Cont Rating']) - RATE_C = float(row['Cont Rating']) - if RATE_A == 0: - RATE_A = None - if RATE_B == 0: - RATE_B = None - if RATE_C == 0: - RATE_C = None - TAP = float(row['Tr Ratio']) - SHIFT = 0.0 # these hard-coded values are the defaults - BR_STATUS = 1 # from the RTS-GMLC MATPOWER writer - ANGMIN = -90. - ANGMAX = 90. - PF = None # these values are not given - QF = None - PT = None - QT = None - - branch_dict = {"from_bus": F_BUS, "to_bus": T_BUS} - branch_dict["resistance"] = BR_R - branch_dict["reactance"] = BR_X - branch_dict["charging_susceptance"] = BR_B - - if TAP != 0.0: - branch_dict["transformer_tap_ratio"] = TAP - branch_dict["transformer_phase_shift"] = SHIFT - branch_dict["branch_type"] = "transformer" - else: - branch_dict["branch_type"] = "line" - - branch_dict["rating_long_term"] = RATE_A - branch_dict["rating_short_term"] = RATE_B - branch_dict["rating_emergency"] = RATE_C - branch_dict["angle_diff_min"] = ANGMIN - branch_dict["angle_diff_max"] = ANGMAX - assert (BR_STATUS == 0 or BR_STATUS == 1) - if BR_STATUS == 1: - branch_dict["in_service"] = True - else: - branch_dict["in_service"] = False - branch_dict["pf"] = PF - branch_dict["qf"] = QF - branch_dict["pt"] = PT - branch_dict["qt"] = QT - - elements["branch"][name] = branch_dict - - # add the generators - elements["generator"] = {} - RENEWABLE_TYPES = ['WIND', 'HYDRO', 'RTPV', 'PV'] - gen_df = pd.read_csv(os.path.join(base_dir, 'gen.csv')) - for idx, row in gen_df.iterrows(): - name = str(row['GEN UID']) - GEN_BUS = str(row['Bus ID']) - gen_dict = {"bus": GEN_BUS} - - # if this is a renewable, hydro, or storage need to handle differently - # (hydro schedules in RTS-GMLC are fixed) - if row['Fuel'] in ['Storage']: - pass - else: - # NOTE: for now, prescient doesn't handle CSP -- not clear how to model - if row['Unit Type'] == 'CSP': - continue - ## (mostly) MATPOWER data - PG = float(row['MW Inj']) - QG = float(row['MVAR Inj']) - QMAX = float(row['QMax MVAR']) - QMIN = float(row['QMin MVAR']) - RAMP_Q = 1. * float(row['Ramp Rate MW/Min']) - VG = float(row['V Setpoint p.u.']) - MBASE = 100. # set in RTS-GMLC MATPOWER writer - GEN_STATUS = 1 - PMAX = float(row['PMax MW']) - PMIN = float(row['PMin MW']) - FUEL = str(row['Fuel']) - UNIT_TYPE = str(row['Unit Type']) - - if UNIT_TYPE in RENEWABLE_TYPES: - gen_dict["generator_type"] = "renewable" - elif UNIT_TYPE == 'SYNC_COND': - ## TODO: should we have a flag for these? - gen_dict["generator_type"] = "thermal" - else: - gen_dict["generator_type"] = "thermal" - gen_dict["bus"] = GEN_BUS - gen_dict["mbase"] = MBASE - gen_dict["in_service"] = True - gen_dict["pg"] = PG - gen_dict["qg"] = QG - gen_dict["vg"] = VG - gen_dict["p_min"] = PMIN - gen_dict["p_max"] = PMAX - gen_dict["q_min"] = QMIN - gen_dict["q_max"] = QMAX - gen_dict["ramp_q"] = RAMP_Q - gen_dict["fuel"] = FUEL - gen_dict["unit_type"] = UNIT_TYPE - gen_dict["area"] = elements["bus"][gen_dict["bus"]]["area"] - gen_dict["zone"] = elements["bus"][gen_dict["bus"]]["zone"] - - # after this is only really needed for thermal units - if UNIT_TYPE in RENEWABLE_TYPES: - elements["generator"][name] = gen_dict - continue - - PC1 = 0.0 - PC2 = 0.0 - QC1MIN = 0.0 - QC1MAX = 0.0 - QC2MIN = 0.0 - QC2MAX = 0.0 - RAMP_AGC = 1. * float(row['Ramp Rate MW/Min']) - RAMP_10 = 10. * float(row['Ramp Rate MW/Min']) - RAMP_30 = 30. * float(row['Ramp Rate MW/Min']) - RAMP_UP_60 = 60. * float(row['Ramp Rate MW/Min']) - RAMP_DN_60 = 60. * float(row['Ramp Rate MW/Min']) - APF = 0.0 # 0.0 from RTS-GMLC MATPOWER writer - - # Gen cost - x = {} - ## round as in RTS-GMLC Prescient/topysp.py - x[0] = round(float(row['Output_pct_0']) * float(row['PMax MW']), 1) - x[1] = round(float(row['Output_pct_1']) * float(row['PMax MW']), 1) - x[2] = round(float(row['Output_pct_2']) * float(row['PMax MW']), 1) - x[3] = round(float(row['Output_pct_3']) * float(row['PMax MW']), 1) - - y = {} - y[0] = float(row['Fuel Price $/MMBTU']) * ((float(row['HR_avg_0']) * 1000. / 1000000.) * x[ - 0]) ## /1000. from the RTS-GMLC MATPOWER writer, - y[1] = float(row['Fuel Price $/MMBTU']) * (((x[1] - x[0]) * (float(row['HR_incr_1']) * 1000. / 1000000.))) + \ - y[0] - y[2] = float(row['Fuel Price $/MMBTU']) * (((x[2] - x[1]) * (float(row['HR_incr_2']) * 1000. / 1000000.))) + \ - y[1] - y[3] = float(row['Fuel Price $/MMBTU']) * (((x[3] - x[2]) * (float(row['HR_incr_3']) * 1000. / 1000000.))) + \ - y[2] - - # only include the cost coeffecients that matter - P_COEFF = [(x[i], round(y[i], 2)) for i in range(4) if - (((i == 0) or (x[i - 1], y[i - 1]) != (x[i], y[i])) and (x[i], y[i]) != (0., 0.))] - if P_COEFF == []: - P_COEFF = [(PMAX, 0.0)] - - # UC Data - MIN_UP_TIME = float(row['Min Up Time Hr']) - MIN_DN_TIME = float(row['Min Down Time Hr']) - - # Startup types and costs - COLD_HEAT = float(row['Start Heat Cold MBTU']) - WARM_HEAT = float(row['Start Heat Warm MBTU']) - HOT_HEAT = float(row['Start Heat Hot MBTU']) - - COLD_TIME = float(row['Start Time Cold Hr']) - WARM_TIME = float(row['Start Time Warm Hr']) - HOT_TIME = float(row['Start Time Hot Hr']) - - FUEL_PRICE = float(row['Fuel Price $/MMBTU']) - FIXED_START_COST = float(row['Non Fuel Start Cost $']) - - if (COLD_TIME <= MIN_DN_TIME) or (COLD_TIME == WARM_TIME == HOT_TIME): - STARTUP_COSTS = [(MIN_DN_TIME, round(COLD_HEAT * FUEL_PRICE + FIXED_START_COST, 2))] - elif WARM_TIME <= MIN_DN_TIME: - STARTUP_COSTS = [(MIN_DN_TIME, round(WARM_HEAT * FUEL_PRICE + FIXED_START_COST, 2)), \ - (COLD_TIME, round(COLD_HEAT * FUEL_PRICE + FIXED_START_COST, 2))] - else: - STARTUP_COSTS = [(MIN_DN_TIME, round(HOT_HEAT * FUEL_PRICE + FIXED_START_COST, 2)), \ - (WARM_TIME, round(WARM_HEAT * FUEL_PRICE + FIXED_START_COST, 2)), \ - (COLD_TIME, round(COLD_HEAT * FUEL_PRICE + FIXED_START_COST, 2))] - - SHUTDOWN_COST = 0.0 - - gen_dict["pc1"] = PC1 - gen_dict["pc2"] = PC2 - gen_dict["qc1_min"] = QC1MIN - gen_dict["qc1_max"] = QC1MAX - gen_dict["qc2_min"] = QC2MIN - gen_dict["qc2_max"] = QC2MAX - gen_dict["agc_capable"] = True - gen_dict["p_min_agc"] = gen_dict["p_min"] - gen_dict["p_max_agc"] = gen_dict["p_max"] - gen_dict["ramp_agc"] = RAMP_AGC - gen_dict["ramp_10"] = RAMP_10 - gen_dict["ramp_30"] = RAMP_30 - gen_dict["ramp_up_60min"] = RAMP_UP_60 - gen_dict["ramp_down_60min"] = RAMP_DN_60 - gen_dict["power_factor"] = APF - gen_dict["p_cost"] = {"data_type": "cost_curve", "cost_curve_type": "piecewise", "values": P_COEFF} - - gen_dict["startup_cost"] = STARTUP_COSTS - gen_dict["shutdown_cost"] = SHUTDOWN_COST - # these assumptions are the same as prescient-rtsgmlc - gen_dict["startup_capacity"] = PMIN - gen_dict["shutdown_capacity"] = PMIN - gen_dict["min_up_time"] = MIN_UP_TIME - gen_dict["min_down_time"] = MIN_DN_TIME - gen_dict["must_run"] = False - - elements["generator"][name] = gen_dict - - return model_data - - -def _read_rts_gmlc_table(file_name, simulation): - if simulation == "DAY_AHEAD": - _date_parser = lambda *columns: datetime(*map(int, columns[0:3]), int(columns[3]) - 1) - else: - minute_mutli = 5 - hour_divisor = 12 - time_periods_in_day = 24 * hour_divisor - _date_parser = lambda *columns: datetime(*map(int, columns[0:3]), \ - (int(columns[3]) - 1) // hour_divisor, - minute_mutli * ((int(columns[3]) - 1) % hour_divisor)) - return pd.read_csv(file_name, - header=0, - sep=',', - parse_dates=[[0, 1, 2, 3]], - date_parser=_date_parser) - - -def _read_rts_gmlc_reserve_table(file_name, begin_time, end_time, simulation): - table_dict = pd.read_csv(file_name, header=0, sep=',').T.to_dict() - - if simulation == "DAY_AHEAD": - hour_divisor = 1 - minute_mutli = 0 - time_periods_in_day = 24 - else: - minute_mutli = 5 - hour_divisor = 12 - time_periods_in_day = 24 * hour_divisor - - by_datetime_dict = dict() - for day_num, day_data in table_dict.items(): - year = day_data['Year'] - month = day_data['Month'] - day = day_data['Day'] - for i in range(1, time_periods_in_day + 1): - date_time = datetime(year=int(year), month=int(month), day=int(day), - hour=(i - 1) // hour_divisor, minute=minute_mutli * ((i - 1) % hour_divisor)) - if begin_time <= date_time < end_time: - by_datetime_dict[str(date_time)] = float(day_data[str(i)]) - return by_datetime_dict - - -def _make_time_series_dict(values): - return {"data_type": "time_series", "values": values} - - -def _get_datetimes(begin_time, end_time, base_dir, simulation): - datetime_format = "%Y-%m-%d %H:%M:%S" - - datestr = "YYYY-DD-MM" - midnight = " 00:00:00" - - if isinstance(begin_time, datetime): - pass - elif isinstance(begin_time, str): - if len(begin_time) == len(datestr): - begin_time += midnight - begin_time = datetime.strptime(begin_time, datetime_format) - else: - raise ValueError("Unable to parse begin_time") - - if isinstance(end_time, datetime): - pass - elif isinstance(end_time, str): - if len(end_time) == len(datestr): - end_time += midnight - end_time = datetime.strptime(end_time, datetime_format) - else: - raise ValueError("Unable to parse end_time") - - # stay in the times provided - rts_start_date, rts_end_date = _get_rts_gmlc_start_end_dates(base_dir, simulation) - assert begin_time >= rts_start_date - assert end_time <= rts_end_date - - # We only take times in whole hours (for now) - assert (begin_time.minute == 0. and begin_time.second == 0. and begin_time.microsecond == 0.) - assert (end_time.minute == 0. and end_time.second == 0. and end_time.microsecond == 0.) - - return begin_time, end_time - - -if __name__ == '__main__': - from egret.viz.generate_graphs import generate_stack_graph - from egret.models.unit_commitment import solve_unit_commitment, create_tight_unit_commitment_model - import matplotlib.pyplot as plt - - current_dir = os.path.dirname(os.path.abspath(__file__)) - rts_gmlc_dir = os.path.join(current_dir, '..', '..', '..', 'RTS-GMLC', - 'RTS_Data') # This is just the root of the RTS-GMLC data set. - - # This converts the load data (in RTS-GMLC format) such that individual loads have their own time series explicitly specified (instead of one system-wide time series). - # It should only need to be run once. - convert_load_by_area_to_source( - rts_gmlc_dir, "2020-01-01", "2020-12-31", - t0_state=None, - ) - - # Test model creation and UC solve for one day using the newly formatted data. - begin_time = "2020-07-05" - end_time = "2020-07-06" - - md = create_ModelData( - rts_gmlc_dir, begin_time, end_time, - simulation="DAY_AHEAD", - t0_state=None, - ) - - solved_md = solve_unit_commitment(md, - 'gurobi_persistent', - mipgap=0.001, - timelimit=None, - solver_tee=True, - symbolic_solver_labels=False, - options=None, - uc_model_generator=create_tight_unit_commitment_model, - relaxed=False, - return_model=False - ) - - fig, ax = generate_stack_graph( - solved_md, - title=begin_time, - show_individual_components=False, - plot_individual_generators=False, - x_tick_frequency=4, - ) - - plt.show() From 164bb12f1ec7e56884c703f3aea97dc251f0c5af Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Wed, 24 Feb 2021 17:10:04 -0700 Subject: [PATCH 05/13] Set initial generator state when reading RTS-GMLC. Initial state is taken from a passed in t0_state dict. If none is passed in, a file named "initial_status.csv" is read and data is pulled from there. That file can have 1, 2, or 3 data lines (initial_status, initial_p_output, and initial_q_output). It must have line 1, but if it doens't have line 2 or 3, initial values are set to p_min and q_min. Note that if you are caching the results of parsing, initial state is not set (otherwise, every date range would end up with the same initial state). For models generated from the parsed cache, you can call rts_gmlc_parser.set_t0_data() on the resulting model. Also, treat ROR generators as HYDRO. --- egret/parsers/rts_gmlc/parser.py | 71 ++++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index 19dc723c..17d1d935 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -88,12 +88,7 @@ def create_model_data_dict(rts_gmlc_dir:str, """ cache = parse_to_cache(rts_gmlc_dir, begin_time, end_time) model = cache.generate_model(simulation, begin_time, end_time) - if t0_state is not None: - for name, gen in model['elements']['generator']: - if gen['generator_type']=='thermal': - gen['initial_status'] = t0_state[name]['initial_status'] - gen['initial_p_output'] = t0_state[name]['initial_p_output'] - gen['initial_q_output'] = t0_state[name]['initial_q_output'] + set_t0_data(model.data, rts_gmlc_dir, t0_state) return model.data @@ -472,7 +467,7 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): # add the generators elements["generator"] = {} - RENEWABLE_TYPES = {'WIND', 'HYDRO', 'RTPV', 'PV'} + RENEWABLE_TYPES = {'WIND', 'HYDRO', 'RTPV', 'PV', 'ROR'} gen_df = pd.read_csv(os.path.join(base_dir,'gen.csv')) for idx,row in gen_df.iterrows(): # if this is storage we need to handle it differently @@ -506,6 +501,9 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): UNIT_TYPE = str(row['Unit Type']) if UNIT_TYPE in RENEWABLE_TYPES: gen_dict["generator_type"] = "renewable" + # ROR is treated as HYDRO by Egret + if UNIT_TYPE == 'ROR': + gen_dict["unit_type"] = "HYDRO" elif UNIT_TYPE == 'SYNC_COND': ## TODO: should we have a flag for these? gen_dict["generator_type"] = "thermal" @@ -773,3 +771,62 @@ def _parse_datetimes_if_strings(begin_time:Union[datetime,str], end_time:Union[d raise ValueError("Unable to parse end_time") return begin_time, end_time + +def set_t0_data(md:dict, base_dir:str="", t0_state:dict=None): + """ Put t0 information into the passed in mode dict + + Only t0 data for thermal generators is populated. + + Data comes from: + * t0_state, if provided + * otherwise, a file called initial_status.csv, if present + * otherwise, t0 data is left blank + + If t0_state is provided, it should be organized as t0_state[name][value], + where `name` is the name of a generator, and `value` is 'initial_status', + 'initial_p_output', and 'initial_q_output'. For any generator included in + to_state, all three values must be present. + + If initial_status.csv is used, it must have a header row and may have + from 1 to 3 data rows. Row 1 is 'initial_status'. Row 2 is + 'initial_p_output'. Row 3 is 'initial_q_output'. Column headers are + the generator names. Default values are used for any missing rows. + + Any generators not mentioned in the data source are left untouched. + """ + if t0_state is not None: + for name, gen in md['elements']['generator']: + if gen['generator_type']=='thermal' and name in t0_state: + gen['initial_status'] = t0_state[name]['initial_status'] + gen['initial_p_output'] = t0_state[name]['initial_p_output'] + gen['initial_q_output'] = t0_state[name]['initial_q_output'] + else: + state_fname = os.path.join(base_dir, 'initial_status.csv') + if os.path.exists(state_fname): + import csv + with open(state_fname, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + + # We now have a list of rows, from 1 to 3 rows long. + # Row 1 is 'initial_status', row 2 is 'initial_p_output', and row 3 is 'initial_q_output'. + # Any missing row uses defaults + row_count = len(rows) + for name, gen in md['elements']['generator'].items(): + if gen['generator_type'] != 'thermal': + continue + if name not in reader.fieldnames: + continue + gen['initial_status'] = float(rows[0][name]) + if gen['initial_status'] < 0: + gen['initial_p_output'] = 0.0 + gen['initial_q_output'] = 0.0 + else: + if row_count >= 2: + gen['initial_p_output'] = float(rows[1][name]) + else: + gen["initial_p_output"] = gen["p_min"] + if row_count >= 3: + gen['initial_q_output'] = float(rows[2][name]) + else: + gen["initial_q_output"] = max(0., gen["q_min"]) From b0d8a7c93e770d31a7ee6bed599f8d1154e459a0 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Fri, 26 Feb 2021 16:12:41 -0700 Subject: [PATCH 06/13] Make some RTS-GMLC fields optional or more flexible. Shunt-related columns can be omitted if they are never used. There is a variable number of fuel-related columns. The files in the RTS-GMLC repository use 5 columns, but the last of these 5 columns is never used (it is always 'NA'). The parser was hard-coded to read 4 columns and ignore the 5th. While this works for the repository files, it is overly rigid. The new code allows any number of columns, using whatever columns are present and appropriately populated for each generator. --- egret/parsers/rts_gmlc/parser.py | 81 ++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index 17d1d935..815c41ea 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -348,6 +348,7 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): bus_id_to_name = {} bus_areas = set() bus_df = pd.read_csv(os.path.join(base_dir,'bus.csv')) + has_shunt_cols = 'MW Shunt G' in bus_df and 'MVAR Shunt B' in bus_df for idx,row in bus_df.iterrows(): BUS_TYPE = row['Bus Type'] if not BUS_TYPE in bus_types: @@ -382,16 +383,17 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): } elements["load"][bus_name] = load_dict - GS = float(row['MW Shunt G']) - BS = float(row['MVAR Shunt B']) - if GS != 0 or BS != 0: - shunt_dict = { - "shunt_type":"fixed", - "bus": bus_name, - "gs": GS, - "bs": BS - } - elements["shunt"][bus_name] = shunt_dict + if has_shunt_cols: + GS = float(row['MW Shunt G']) + BS = float(row['MVAR Shunt B']) + if GS != 0 or BS != 0: + shunt_dict = { + "shunt_type":"fixed", + "bus": bus_name, + "gs": GS, + "bs": BS + } + elements["shunt"][bus_name] = shunt_dict if BUS_TYPE == 'Ref': va = bus_dict['va'] @@ -519,28 +521,41 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): # Gen cost ## round as in RTS-GMLC Prescient/topysp.py pmax = float(row['PMax MW']) - x = {i: round(float(row[f'Output_pct_{i}'])*pmax, 1) - for i in range(4) - } - - ## /1000. from the RTS-GMLC MATPOWER writer -- - ## heat rates are in BTU/kWh, 1BTU == 10^-6 MMBTU, 1kWh == 10^-3 MWh, so MMBTU/MWh == 10^3/10^6 * BTU/kWh - f = {} - f[0] = (float(row['HR_avg_0'])*1000./ 1000000.)*x[0] - for i in range(1,4): - f[i] = (((x[i]-x[i-1])*(float(row[f'HR_incr_{i}'])*1000. / 1000000.))) + f[i-1] - - fuel_price = float(row['Fuel Price $/MMBTU']) - y = {i: fuel_price*f[i] for i in range(4)} - - # only include the cost coeffecients that matter - P_COEFF = [ (x[i], round(y[i],2)) for i in range(4) if (((i == 0) or (x[i-1],y[i-1]) != (x[i], y[i])) and (x[i], y[i]) != (0.,0.)) ] - if P_COEFF == []: - P_COEFF = [(pmax, 0.0)] - - F_COEFF = [ (x[i], round(f[i],2)) for i in range(4) if (((i == 0) or (x[i-1],f[i-1]) != (x[i], f[i])) and (x[i], f[i]) != (0.,0.)) ] - if F_COEFF == []: - F_COEFF = [(pmax, 0.0)] + # There can be any number of 'Output_pct_' columns. + # Stop at the first one that doesn't exist or doesn't hold a number + def valid_output_pcts(): + for i in range(50): + try: + val = float(row[f'Output_pct_{i}']) + yield (i, val) + except: + return + x = {i: round(val*pmax, 1) + for i,val in valid_output_pcts() + } + fuel_field_count = len(x) + + if fuel_field_count > 0: + ## /1000. from the RTS-GMLC MATPOWER writer -- + ## heat rates are in BTU/kWh, 1BTU == 10^-6 MMBTU, 1kWh == 10^-3 MWh, so MMBTU/MWh == 10^3/10^6 * BTU/kWh + f = {} + f[0] = (float(row['HR_avg_0'])*1000./ 1000000.)*x[0] + for i in range(1,fuel_field_count): + f[i] = (((x[i]-x[i-1])*(float(row[f'HR_incr_{i}'])*1000. / 1000000.))) + f[i-1] + + fuel_price = float(row['Fuel Price $/MMBTU']) + y = {i: fuel_price*f[i] for i in range(fuel_field_count)} + + # only include the cost coeffecients that matter + P_COEFF = [ (x[i], round(y[i],2)) for i in range(fuel_field_count) if (((i == 0) or (x[i-1],y[i-1]) != (x[i], y[i])) and (x[i], y[i]) != (0.,0.)) ] + if P_COEFF == []: + P_COEFF = [(pmax, 0.0)] + gen_dict["p_cost"] = {"data_type": "cost_curve", "cost_curve_type":"piecewise", "values": P_COEFF } + + F_COEFF = [ (x[i], round(f[i],2)) for i in range(fuel_field_count) if (((i == 0) or (x[i-1],f[i-1]) != (x[i], f[i])) and (x[i], f[i]) != (0.,0.)) ] + if F_COEFF == []: + F_COEFF = [(pmax, 0.0)] + gen_dict["p_fuel"] = {"data_type": "fuel_curve", "values": F_COEFF } # UC Data MIN_DN_TIME = float(row['Min Down Time Hr']) @@ -595,8 +610,6 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): gen_dict["ramp_down_60min"] = 60.*ramp_q gen_dict["power_factor"] = 0.0 - gen_dict["p_cost"] = {"data_type": "cost_curve", "cost_curve_type":"piecewise", "values": P_COEFF } - gen_dict["p_fuel"] = {"data_type": "fuel_curve", "values": F_COEFF } gen_dict["fuel_cost"] = fuel_price # these assumptions are the same as prescient-rtsgmlc From 4316c9c2a81a8fbeab51fb154d79ceb3125c54a7 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Fri, 26 Feb 2021 16:14:53 -0700 Subject: [PATCH 07/13] A parsed data cache now allows you to get the model skeleton and populate it in two separate steps. --- egret/parsers/rts_gmlc/parsed_cache.py | 39 ++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/egret/parsers/rts_gmlc/parsed_cache.py b/egret/parsers/rts_gmlc/parsed_cache.py index 429fa326..e81db208 100644 --- a/egret/parsers/rts_gmlc/parsed_cache.py +++ b/egret/parsers/rts_gmlc/parsed_cache.py @@ -48,11 +48,44 @@ def __init__(self, model_skeleton:dict, def generate_model(self, simulation_type:str, begin_time:datetime, end_time:datetime) -> ModelData: - md = copy.deepcopy(self.skeleton) - self._process_timeseries_data(md, simulation_type, begin_time, end_time) - self._insert_system_data(md, simulation_type, begin_time, end_time) + """ Create a new model populated with requested data + + Parameters + ---------- + simulation_type:str + Either 'DAY_AHEAD' or 'REAL_TIME' + begin_time:datetime + The earliest time to include in the returned data + end_time:datetime + The earliest time to NOT include in the returned data + """ + md = self.get_new_skeleton() + self.populate_skeleton_with_data(md, simulation_type, begin_time, end_time) return ModelData(md) + def get_new_skeleton(self) -> dict: + """ Get a new model dict with system elements but no time-specific data + """ + return copy.deepcopy(self.skeleton) + + def populate_skeleton_with_data(self, skeleton_dict:dict, simulation_type:str, + begin_time:datetime, end_time:datetime) -> None: + """ Update an existing model dict with requested data + + Parameters + ---------- + skeleton_dict:dict + The skeleton model dict to populate with data + simulation_type:str + Either 'DAY_AHEAD' or 'REAL_TIME' + begin_time:datetime + The earliest time to include in the returned data + end_time:datetime + The earliest time to NOT include in the returned data + """ + self._process_timeseries_data(skeleton_dict, simulation_type, begin_time, end_time) + self._insert_system_data(skeleton_dict, simulation_type, begin_time, end_time) + def _process_timeseries_data(self, md:dict, simulation_type:str, begin_time:datetime, end_time:datetime) -> None: df = self.timeseries_df From 17226faa3c3530973d9396393033410928d40d03 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Thu, 8 Apr 2021 17:31:34 -0600 Subject: [PATCH 08/13] Fixes/tweaks to RTS-GMLC parser. * Zone name can be any string, not just integers * Don't include startup_cost or p_cost. Include startup_fuel, p_fuel, and non_fuel_startup_cost instead. * More flexibility in how startup fuel is specified when you have fewer than 3 points. It no longer matters which of the 3 startup fuel columns you leave blank in this case, as long as the provided data is consistent (cold is longer than hot, for example). * More flexibility in how p_fuel fuel curves are specified. You can have any number of columns (up to 50). For a fuel curve with N valid points, you only have to fill in the first N fuel curve columns and leave the rest blank. The number of points in the fuel curve can be different for each generator. * Omit several properties that were in the original RTS-GMLC parser but have no meaning to Egret. * Some optional properties are now left out of the JSON if the corresponding cells in the csv are left blank --- egret/parsers/rts_gmlc/parser.py | 86 ++++++++++++++------------------ 1 file changed, 38 insertions(+), 48 deletions(-) diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index 815c41ea..c0b534bb 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -324,6 +324,7 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): data : dict Returns a dict loaded from the RTS-GMLC data """ + from math import isnan base_dir = rts_gmlc_dir @@ -364,7 +365,7 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): "v_min": 0.95, "v_max": 1.05, "area": str(row['Area']), - "zone": str(int(row['Zone'])), + "zone": str(row['Zone']), } if bus_dict["base_kv"] <= 0: @@ -448,6 +449,7 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): name = str(row['UID']) elements["branch"][name] = branch_dict + branch_df = None # add the DC branches if os.path.exists(os.path.join(base_dir,'dc_branch.csv')): @@ -466,6 +468,7 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): name = str(row['UID']) elements["branch"][name] = branch_dict + branch_df = None # add the generators elements["generator"] = {} @@ -500,6 +503,11 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir): "zone": elements['bus'][bus_name]['zone'] } + # Remove optional values if not present + for key in ('p_min', 'p_max', 'q_min', 'q_max', 'ramp_q'): + if isnan(gen_dict[key]): + del gen_dict[key] + UNIT_TYPE = str(row['Unit Type']) if UNIT_TYPE in RENEWABLE_TYPES: gen_dict["generator_type"] = "renewable" @@ -527,6 +535,8 @@ def valid_output_pcts(): for i in range(50): try: val = float(row[f'Output_pct_{i}']) + if isnan(val): + return yield (i, val) except: return @@ -543,15 +553,6 @@ def valid_output_pcts(): for i in range(1,fuel_field_count): f[i] = (((x[i]-x[i-1])*(float(row[f'HR_incr_{i}'])*1000. / 1000000.))) + f[i-1] - fuel_price = float(row['Fuel Price $/MMBTU']) - y = {i: fuel_price*f[i] for i in range(fuel_field_count)} - - # only include the cost coeffecients that matter - P_COEFF = [ (x[i], round(y[i],2)) for i in range(fuel_field_count) if (((i == 0) or (x[i-1],y[i-1]) != (x[i], y[i])) and (x[i], y[i]) != (0.,0.)) ] - if P_COEFF == []: - P_COEFF = [(pmax, 0.0)] - gen_dict["p_cost"] = {"data_type": "cost_curve", "cost_curve_type":"piecewise", "values": P_COEFF } - F_COEFF = [ (x[i], round(f[i],2)) for i in range(fuel_field_count) if (((i == 0) or (x[i-1],f[i-1]) != (x[i], f[i])) and (x[i], f[i]) != (0.,0.)) ] if F_COEFF == []: F_COEFF = [(pmax, 0.0)] @@ -560,66 +561,55 @@ def valid_output_pcts(): # UC Data MIN_DN_TIME = float(row['Min Down Time Hr']) - # Startup types and costs - COLD_HEAT = float(row['Start Heat Cold MBTU']) - WARM_HEAT = float(row['Start Heat Warm MBTU']) - HOT_HEAT = float(row['Start Heat Hot MBTU']) - - COLD_TIME = float(row['Start Time Cold Hr']) - WARM_TIME = float(row['Start Time Warm Hr']) - HOT_TIME = float(row['Start Time Hot Hr']) - - FIXED_START_COST = float(row['Non Fuel Start Cost $']) + # Startup types and costs, from hot to cold + startup_heat = (float(row['Start Heat Hot MBTU']), + float(row['Start Heat Warm MBTU']), + float(row['Start Heat Cold MBTU'])) + startup_time = (float(row['Start Time Hot Hr']), + float(row['Start Time Warm Hr']), + float(row['Start Time Cold Hr'])) + + # Arrange fuel requirements from hottest to coldest, ignoring missing values. + startup_fuel = [] + for i in range(3): + # Skip blank values + if isnan(startup_time[i]) or isnan(startup_heat[i]): + continue - if (COLD_TIME <= MIN_DN_TIME) or (COLD_TIME == WARM_TIME == HOT_TIME): - STARTUP_COSTS = [(MIN_DN_TIME, round(COLD_HEAT*fuel_price + FIXED_START_COST, 2))] - STARTUP_FUEL = [(MIN_DN_TIME, COLD_HEAT)] + t = max(startup_time[i], MIN_DN_TIME) + f = startup_heat[i] - elif WARM_TIME <= MIN_DN_TIME: - STARTUP_COSTS = [(MIN_DN_TIME, round(WARM_HEAT*fuel_price + FIXED_START_COST, 2)),\ - (COLD_TIME, round(COLD_HEAT*fuel_price + FIXED_START_COST, 2))] - STARTUP_FUEL = [(MIN_DN_TIME, WARM_HEAT),\ - (COLD_TIME, COLD_HEAT)] + # For entries with matching times, use to the colder data + if len(startup_fuel) > 0 and startup_fuel[-1][0] == t: + startup_fuel[-1] = (t,f) + else: + startup_fuel.append((t,f)) - else: - STARTUP_COSTS = [(MIN_DN_TIME, round(HOT_HEAT*fuel_price+FIXED_START_COST,2)),\ - (WARM_TIME, round(WARM_HEAT*fuel_price+FIXED_START_COST,2)),\ - (COLD_TIME, round(COLD_HEAT*fuel_price+FIXED_START_COST,2))] - STARTUP_FUEL = [(MIN_DN_TIME, HOT_HEAT),\ - (WARM_TIME, WARM_HEAT),\ - (COLD_TIME, COLD_HEAT)] - gen_dict["startup_cost"] = STARTUP_COSTS - gen_dict["startup_fuel"] = STARTUP_FUEL + gen_dict["startup_fuel"] = startup_fuel + fixed_startup_cost = float(row['Non Fuel Start Cost $']) + if not isnan(fixed_startup_cost): + gen_dict["non_fuel_startup_cost"] = fixed_startup_cost gen_dict["shutdown_cost"] = 0.0 - gen_dict["pc1"] = 0.0 - gen_dict["pc2"] = 0.0 - gen_dict["qc1_min"] = 0.0 - gen_dict["qc1_max"] = 0.0 - gen_dict["qc2_min"] = 0.0 - gen_dict["qc2_max"] = 0.0 gen_dict["agc_capable"] = True gen_dict["p_min_agc"] = gen_dict["p_min"] gen_dict["p_max_agc"] = gen_dict["p_max"] ramp_q = gen_dict['ramp_q'] gen_dict["ramp_agc"] = ramp_q - gen_dict["ramp_10"] = 10.*ramp_q - gen_dict["ramp_30"] = 30.*ramp_q gen_dict["ramp_up_60min"] = 60.*ramp_q gen_dict["ramp_down_60min"] = 60.*ramp_q - gen_dict["power_factor"] = 0.0 - gen_dict["fuel_cost"] = fuel_price + gen_dict["fuel_cost"] = float(row['Fuel Price $/MMBTU']) # these assumptions are the same as prescient-rtsgmlc gen_dict["startup_capacity"] = gen_dict['p_min'] gen_dict["shutdown_capacity"] = gen_dict['p_min'] gen_dict["min_up_time"] = float(row['Min Up Time Hr']) gen_dict["min_down_time"] = MIN_DN_TIME - gen_dict["must_run"] = False elements["generator"][name] = gen_dict + gen_df = None # Add the reserves reserve_df = pd.read_csv(os.path.join(base_dir,'reserves.csv')) From f18d48205d8213b10425c6778e100c6beb0363be Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Fri, 9 Apr 2021 13:51:14 -0600 Subject: [PATCH 09/13] Don't include end_time in model instances returned from a parsed_cache --- egret/parsers/rts_gmlc/parsed_cache.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/egret/parsers/rts_gmlc/parsed_cache.py b/egret/parsers/rts_gmlc/parsed_cache.py index e81db208..fb0c52a6 100644 --- a/egret/parsers/rts_gmlc/parsed_cache.py +++ b/egret/parsers/rts_gmlc/parsed_cache.py @@ -16,6 +16,7 @@ from datetime import datetime import copy +from datetime import timedelta from egret.data.model_data import ModelData @@ -83,6 +84,9 @@ def populate_skeleton_with_data(self, skeleton_dict:dict, simulation_type:str, end_time:datetime The earliest time to NOT include in the returned data """ + + #Because pandas includes the end of a range, reduce our end time by one second + end_time = end_time - timedelta(seconds=1) self._process_timeseries_data(skeleton_dict, simulation_type, begin_time, end_time) self._insert_system_data(skeleton_dict, simulation_type, begin_time, end_time) From 43a95a19d9d56e94ba4067502a3982b0b50df489 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Fri, 9 Apr 2021 18:37:08 -0600 Subject: [PATCH 10/13] A few tweaks to rts-gmlc parser to keep prescient happy --- egret/parsers/rts_gmlc/parsed_cache.py | 5 +++-- egret/parsers/rts_gmlc/parser.py | 23 +++++++++++++---------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/egret/parsers/rts_gmlc/parsed_cache.py b/egret/parsers/rts_gmlc/parsed_cache.py index fb0c52a6..c59ef8ab 100644 --- a/egret/parsers/rts_gmlc/parsed_cache.py +++ b/egret/parsers/rts_gmlc/parsed_cache.py @@ -136,6 +136,7 @@ def _process_area_timeseries(self, md:dict, begin_time:datetime, assert(param == "MW Load") data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time] + skeleton_loads = self.skeleton['elements']['load'] for bus, load_dict in md['elements']['load'].items(): # Skip loads from other areas if load_dict['area'] != area_name: @@ -146,8 +147,8 @@ def _process_area_timeseries(self, md:dict, begin_time:datetime, # Also replace q_load, if present, with timeseries p_factor = self.load_participation_factors[bus] # save skeleton's scalar p_load - p_load = load_dict['p_load'] if 'p_load' in load_dict else None - # overwrite skeleton's p_load with timeseries + p_load = skeleton_loads[bus]['p_load'] if 'p_load' in skeleton_loads[bus] else None + # overwrite p_load with timeseries load_dict['p_load'] = { 'data_type': 'time_series', 'values' : [v*p_factor for v in data] } if p_load is not None and 'q_load' in load_dict: diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index c0b534bb..a6b7cb4e 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -11,7 +11,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Dict, Union + from typing import Dict, Union, Optional import os.path import pandas as pd @@ -25,7 +25,7 @@ def create_ModelData(rts_gmlc_dir:str, begin_time:Union[datetime,str], end_time:Union[datetime,str], - simulation:str="DAY_AHEAD", t0_state:dict = None): + simulation:str="DAY_AHEAD", t0_state:Optional[dict] = None): """ Create a ModelData object from the RTS-GMLC data. @@ -43,11 +43,12 @@ def create_ModelData(rts_gmlc_dir:str, simulation : str Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, default is "DAY_AHEAD". - t0_state : dict or Nonetype + t0_state : dict or None Keys of this dict are thermal generator names, each element of which is another dictionary with keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, default values are loaded. + If t0_state is None, values are read from initial_status.csv in the rts_gmlc_dir. + If that file does not exist, default values are loaded. Returns ------- @@ -58,7 +59,7 @@ def create_ModelData(rts_gmlc_dir:str, def create_model_data_dict(rts_gmlc_dir:str, begin_time:Union[datetime,str], end_time:Union[datetime,str], - simulation:str="DAY_AHEAD", t0_state:dict = None): + simulation:str="DAY_AHEAD", t0_state:Optional[dict]=None): """ Create a model_data dictionary from the RTS-GMLC data. @@ -86,15 +87,15 @@ def create_model_data_dict(rts_gmlc_dir:str, ------- dict : A dictionary in the format required for the ModelData object. """ - cache = parse_to_cache(rts_gmlc_dir, begin_time, end_time) + cache = parse_to_cache(rts_gmlc_dir, begin_time, end_time, t0_state) model = cache.generate_model(simulation, begin_time, end_time) - set_t0_data(model.data, rts_gmlc_dir, t0_state) return model.data def parse_to_cache(rts_gmlc_dir:str, begin_time:Union[datetime,str], - end_time:Union[datetime,str]) -> ParsedCache: + end_time:Union[datetime,str], + t0_state:Optional[dict]=None) -> ParsedCache: ''' Parse data in RTS-GMLC format, keeping the portions between a start and end time rts_gmlc_dir : str @@ -129,6 +130,8 @@ def parse_to_cache(rts_gmlc_dir:str, load_participation_factors = _compute_bus_load_participation_factors(model_data) + set_t0_data(model_data, rts_gmlc_dir, None) + return ParsedCache(model_data, begin_time, end_time, minutes_per_period['DAY_AHEAD'], minutes_per_period['REAL_TIME'], timeseries_df, load_participation_factors) @@ -775,7 +778,7 @@ def _parse_datetimes_if_strings(begin_time:Union[datetime,str], end_time:Union[d return begin_time, end_time -def set_t0_data(md:dict, base_dir:str="", t0_state:dict=None): +def set_t0_data(md:dict, base_dir:str="", t0_state:Optional[dict]=None): """ Put t0 information into the passed in mode dict Only t0 data for thermal generators is populated. @@ -788,7 +791,7 @@ def set_t0_data(md:dict, base_dir:str="", t0_state:dict=None): If t0_state is provided, it should be organized as t0_state[name][value], where `name` is the name of a generator, and `value` is 'initial_status', 'initial_p_output', and 'initial_q_output'. For any generator included in - to_state, all three values must be present. + t0_state, all three values must be present. If initial_status.csv is used, it must have a header row and may have from 1 to 3 data rows. Row 1 is 'initial_status'. Row 2 is From 689f359f00bf4d1393998ff88272dea534711762 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Thu, 20 May 2021 16:48:48 -0600 Subject: [PATCH 11/13] Address issues found during review --- egret/parsers/rts_gmlc/parser.py | 97 +++++++++++++++++++------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index a6b7cb4e..de4d52d7 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -11,11 +11,13 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Dict, Union, Optional + from typing import Dict, Union, Optional, Tuple +import sys import os.path import pandas as pd from datetime import datetime, timedelta +import dateutil.parser from collections import namedtuple import egret.data.model_data as md @@ -25,7 +27,7 @@ def create_ModelData(rts_gmlc_dir:str, begin_time:Union[datetime,str], end_time:Union[datetime,str], - simulation:str="DAY_AHEAD", t0_state:Optional[dict] = None): + simulation:str="DAY_AHEAD", t0_state:Optional[dict] = None) -> md.ModelData: """ Create a ModelData object from the RTS-GMLC data. @@ -33,7 +35,7 @@ def create_ModelData(rts_gmlc_dir:str, Parameters ---------- rts_gmlc_dir : str - Path to RTS-GMLC directory + Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). begin_time : datetime.datetime or str Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, the later of which assumes a midnight start. @@ -48,7 +50,7 @@ def create_ModelData(rts_gmlc_dir:str, keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the generator is on at t0, the real power output at t0, and the reactive power output at t0. If t0_state is None, values are read from initial_status.csv in the rts_gmlc_dir. - If that file does not exist, default values are loaded. + If that file does not exist, no initial state data is set in the model. Returns ------- @@ -59,7 +61,7 @@ def create_ModelData(rts_gmlc_dir:str, def create_model_data_dict(rts_gmlc_dir:str, begin_time:Union[datetime,str], end_time:Union[datetime,str], - simulation:str="DAY_AHEAD", t0_state:Optional[dict]=None): + simulation:str="DAY_AHEAD", t0_state:Optional[dict]=None) -> dict: """ Create a model_data dictionary from the RTS-GMLC data. @@ -67,7 +69,7 @@ def create_model_data_dict(rts_gmlc_dir:str, Parameters ---------- rts_gmlc_dir : str - Path to RTS-GMLC directory + Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). begin_time : datetime.datetime or str Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, the later of which assumes a midnight start. @@ -77,16 +79,20 @@ def create_model_data_dict(rts_gmlc_dir:str, simulation : str Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, default is "DAY_AHEAD". - t0_state : dict or Nonetype + t0_state : dict or None Keys of this dict are thermal generator names, each element of which is another dictionary with keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, default values are loaded. + If this is None, no initial state data is included in the dict. Returns ------- dict : A dictionary in the format required for the ModelData object. """ + + # Convert date string to datetimes, if necessary + begin_time, end_time = _parse_datetimes_if_strings(begin_time, end_time) + cache = parse_to_cache(rts_gmlc_dir, begin_time, end_time, t0_state) model = cache.generate_model(simulation, begin_time, end_time) return model.data @@ -99,7 +105,7 @@ def parse_to_cache(rts_gmlc_dir:str, ''' Parse data in RTS-GMLC format, keeping the portions between a start and end time rts_gmlc_dir : str - Path to RTS-GMLC directory + Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). begin_time : datetime.datetime or str Beginning of time horizon. end_time : datetime.datetime or str @@ -107,6 +113,11 @@ def parse_to_cache(rts_gmlc_dir:str, simulation : str Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, default is "DAY_AHEAD". + t0_state : dict or None + Keys of this dict are thermal generator names, each element of which is another dictionary with + keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the + generator is on at t0, the real power output at t0, and the reactive power output at t0. + If this is None, initial state data is not included in the cache. ''' if not os.path.exists(rts_gmlc_dir): raise ValueError(f'RTS-GMLC directory "{rts_gmlc_dir}" does not exist') @@ -130,7 +141,7 @@ def parse_to_cache(rts_gmlc_dir:str, load_participation_factors = _compute_bus_load_participation_factors(model_data) - set_t0_data(model_data, rts_gmlc_dir, None) + set_t0_data(model_data, rts_gmlc_dir, t0_state) return ParsedCache(model_data, begin_time, end_time, minutes_per_period['DAY_AHEAD'], minutes_per_period['REAL_TIME'], @@ -147,10 +158,9 @@ def _read_metadata(base_dir:str) -> pd.DataFrame: return metadata_df -def _get_data_date_range(metadata_df): +def _get_data_date_range(metadata_df) -> Tuple[datetime, datetime]: ''' Get the range of dates for which there is data available ''' - import dateutil.parser # Data start time row = metadata_df.loc['Date_From'] @@ -313,14 +323,14 @@ def _read_2D_timeseries_file(file_name:str, minutes_per_period:int, # Create and return a new 1-column DataFrame return pd.DataFrame({column_name: s}) -def _create_rtsgmlc_skeleton(rts_gmlc_dir): +def _create_rtsgmlc_skeleton(rts_gmlc_dir:str): """ Creates a data dictionary from the RTS-GMLC data files, without loading hourly data Parameters ---------- rts_gmlc_dir : str - Path to RTS-GMLC directory + Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). Returns ------- @@ -588,6 +598,11 @@ def valid_output_pcts(): else: startup_fuel.append((t,f)) + # If the warmest fuel requirement has a time longer than the minimum + # down time, extend that warmest requirement down to minimum down time. + if len(startup_fuel) > 0 and startup_fuel[0][0] > MIN_DN_TIME: + startup_fuel[0] = (MIN_DN_TIME, startup_fuel[0][1]) + gen_dict["startup_fuel"] = startup_fuel fixed_startup_cost = float(row['Non Fuel Start Cost $']) if not isnan(fixed_startup_cost): @@ -741,6 +756,32 @@ def _read_timeseries_data(model_data:dict, rts_gmlc_dir:str, return timeseries_pointer_df +def _convert_to_datetime(when:Union[datetime,str]): + ''' + Convert an object to a datetime, if it is not already one. + + Parameters + ---------- + when: datetime or str + The date and time to be returned or parsed + + Returns + ------- + datetime + The passed in object as a datetime, parsing it if necessary + + If `when` is a datetime it is simply returned. + If `when` is a string it is parsed, inferring the format + If `when` is any other type, a TypeError is raised + + ''' + if isinstance(when, datetime): + return when + elif isinstance(when, str): + return dateutil.parser.parse(when) + else: + raise TypeError(f'Invalid argument, expected a datetime or str, got a {type(when)}') + def _parse_datetimes_if_strings(begin_time:Union[datetime,str], end_time:Union[datetime,str]): ''' Ensure both dates are datetimes, parsing date strings if necessary. @@ -752,34 +793,14 @@ def _parse_datetimes_if_strings(begin_time:Union[datetime,str], end_time:Union[d end_time:datetime The end_time as a datetime, parsing it if necessary ''' - - datetime_format = "%Y-%m-%d %H:%M:%S" - - datestr = "YYYY-DD-MM" - midnight = " 00:00:00" - - if isinstance(begin_time,datetime): - pass - elif isinstance(begin_time,str): - if len(begin_time) == len(datestr): - begin_time += midnight - begin_time = datetime.strptime(begin_time,datetime_format) - else: - raise ValueError("Unable to parse begin_time") - - if isinstance(end_time,datetime): - pass - elif isinstance(end_time,str): - if len(end_time) == len(datestr): - end_time += midnight - end_time = datetime.strptime(end_time,datetime_format) - else: - raise ValueError("Unable to parse end_time") + + begin_time = _convert_to_datetime(begin_time) + end_time = _convert_to_datetime(end_time) return begin_time, end_time def set_t0_data(md:dict, base_dir:str="", t0_state:Optional[dict]=None): - """ Put t0 information into the passed in mode dict + """ Put t0 information into the passed in model dict Only t0 data for thermal generators is populated. From f29bd46c2cfbae1face8b9eebe13307fbcaa370a Mon Sep 17 00:00:00 2001 From: bknueven <30801372+bknueven@users.noreply.github.com> Date: Fri, 21 May 2021 11:34:55 -0600 Subject: [PATCH 12/13] ignoring dc_branches and adding default initial status (#1) --- egret/parsers/rts_gmlc/parser.py | 105 +++++++++++++++++-------------- 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index de4d52d7..b2ed836d 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -20,6 +20,7 @@ import dateutil.parser from collections import namedtuple +from egret.common.log import logger import egret.data.model_data as md from .parsed_cache import ParsedCache @@ -465,23 +466,25 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir:str): branch_df = None # add the DC branches - if os.path.exists(os.path.join(base_dir,'dc_branch.csv')): - branch_df = pd.read_csv(os.path.join(base_dir,'dc_branch.csv')) - for idx,row in branch_df.iterrows(): - - # TODO: I have no idea what field names Egrets expects or supports for DC branches. - # The code below is just a placeholder. - branch_dict = { - "from_bus": bus_id_to_name[str(row['From Bus'])], - "to_bus": bus_id_to_name[str(row['To Bus'])], - "in_service": True, - "branch_type": "dc", - "resistance": float(row['R Line']) - } - - name = str(row['UID']) - elements["branch"][name] = branch_dict - branch_df = None + # TODO: see issue #229 + #if os.path.exists(os.path.join(base_dir,'dc_branch.csv')): + # elements["dc_branch"] = {} + # branch_df = pd.read_csv(os.path.join(base_dir,'dc_branch.csv')) + # for idx,row in branch_df.iterrows(): + + # # TODO: I have no idea what field names Egrets expects or supports for DC branches. + # # The code below is just a placeholder. + # branch_dict = { + # "from_bus": bus_id_to_name[str(row['From Bus'])], + # "to_bus": bus_id_to_name[str(row['To Bus'])], + # "in_service": True, + # "branch_type": "dc", + # "resistance": float(row['R Line']) + # } + + # name = str(row['UID']) + # elements["dc_branch"][name] = branch_dict + # branch_df = None # add the generators elements["generator"] = {} @@ -822,38 +825,46 @@ def set_t0_data(md:dict, base_dir:str="", t0_state:Optional[dict]=None): Any generators not mentioned in the data source are left untouched. """ if t0_state is not None: - for name, gen in md['elements']['generator']: + for name, gen in md['elements']['generator'].items(): if gen['generator_type']=='thermal' and name in t0_state: gen['initial_status'] = t0_state[name]['initial_status'] gen['initial_p_output'] = t0_state[name]['initial_p_output'] gen['initial_q_output'] = t0_state[name]['initial_q_output'] - else: - state_fname = os.path.join(base_dir, 'initial_status.csv') - if os.path.exists(state_fname): - import csv - with open(state_fname, 'r') as f: - reader = csv.DictReader(f) - rows = list(reader) - - # We now have a list of rows, from 1 to 3 rows long. - # Row 1 is 'initial_status', row 2 is 'initial_p_output', and row 3 is 'initial_q_output'. - # Any missing row uses defaults - row_count = len(rows) - for name, gen in md['elements']['generator'].items(): - if gen['generator_type'] != 'thermal': - continue - if name not in reader.fieldnames: - continue - gen['initial_status'] = float(rows[0][name]) - if gen['initial_status'] < 0: - gen['initial_p_output'] = 0.0 - gen['initial_q_output'] = 0.0 + return + + state_fname = os.path.join(base_dir, 'initial_status.csv') + if os.path.exists(state_fname): + import csv + with open(state_fname, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + + # We now have a list of rows, from 1 to 3 rows long. + # Row 1 is 'initial_status', row 2 is 'initial_p_output', and row 3 is 'initial_q_output'. + # Any missing row uses defaults + row_count = len(rows) + for name, gen in md['elements']['generator'].items(): + if gen['generator_type'] != 'thermal': + continue + if name not in reader.fieldnames: + continue + gen['initial_status'] = float(rows[0][name]) + if gen['initial_status'] < 0: + gen['initial_p_output'] = 0.0 + gen['initial_q_output'] = 0.0 + else: + if row_count >= 2: + gen['initial_p_output'] = float(rows[1][name]) else: - if row_count >= 2: - gen['initial_p_output'] = float(rows[1][name]) - else: - gen["initial_p_output"] = gen["p_min"] - if row_count >= 3: - gen['initial_q_output'] = float(rows[2][name]) - else: - gen["initial_q_output"] = max(0., gen["q_min"]) + gen["initial_p_output"] = gen["p_min"] + if row_count >= 3: + gen['initial_q_output'] = float(rows[2][name]) + else: + gen["initial_q_output"] = max(0., gen["q_min"]) + else: + logger.warning("Setting default t0 state in RTS-GMLC parser") + for name, gen in md['elements']['generator'].items(): + if gen['generator_type']=='thermal': + gen['initial_status'] = gen['min_up_time']+1 + gen['initial_p_output'] = gen['p_min'] + gen['initial_q_output'] = 0. From dece0ee6c7905408d62150cdc71b8879b6dafd31 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Fri, 21 May 2021 11:36:52 -0600 Subject: [PATCH 13/13] Cleaning up a comment --- egret/parsers/rts_gmlc/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index b2ed836d..c1938c6b 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -472,7 +472,7 @@ def _create_rtsgmlc_skeleton(rts_gmlc_dir:str): # branch_df = pd.read_csv(os.path.join(base_dir,'dc_branch.csv')) # for idx,row in branch_df.iterrows(): - # # TODO: I have no idea what field names Egrets expects or supports for DC branches. + # # TODO: The fields below don't match what Egrets expects or supports for DC branches. # # The code below is just a placeholder. # branch_dict = { # "from_bus": bus_id_to_name[str(row['From Bus'])],