Skip to content

Commit

Permalink
Merge pull request #124 from darrylmelander/custom-data-provider
Browse files Browse the repository at this point in the history
Custom data provider
  • Loading branch information
darrylmelander authored Nov 18, 2021
2 parents 42bc691 + 4e4ef0d commit ce1069a
Show file tree
Hide file tree
Showing 9 changed files with 5,220 additions and 11 deletions.
2 changes: 1 addition & 1 deletion prescient/data/providers/dat_data_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class DatDataProvider():

def __init__(self, options:Options):
self._uc_model_template = get_uc_model()
self._instance_directory_name = os.path.join(os.path.expanduser(options.data_directory),
self._instance_directory_name = os.path.join(os.path.expanduser(options.data_path),
"pyspdir_twostage")
self._actuals_by_date = {}
self._forecasts_by_date = {}
Expand Down
2 changes: 1 addition & 1 deletion prescient/data/providers/gmlc_data_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def __init__(self, options:Options):
# midnight start
self._start_time = datetime.combine(options.start_date, datetime.min.time())
self._end_time = self._start_time + timedelta(days=options.num_days)
self._cache = parser.parse_to_cache(options.data_directory, self._start_time, self._end_time)
self._cache = parser.parse_to_cache(options.data_path, self._start_time, self._end_time)

def negotiate_data_frequency(self, desired_frequency_minutes:int):
''' Get the number of minutes between each timestep of actuals data this provider will supply,
Expand Down
4 changes: 2 additions & 2 deletions prescient/data/providers/shortcut_data_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ def __init__(self, options:Options):
# TODO: option-drive
self._virtual_bus_capacity = 1e6

self._generator_characteristics = _load_generator_characteristics(options.data_directory)
self._generator_characteristics = _load_generator_characteristics(options.data_path)
self._historical_prices, self._frequency_minutes = \
_load_historical_prices(options.data_directory, self._start_time, self._end_time)
_load_historical_prices(options.data_path, self._start_time, self._end_time)

self._initial_model = { 'elements' : { 'bus' : {'virtual_bus':{}},
'generator' : self._generator_characteristics,
Expand Down
2 changes: 1 addition & 1 deletion prescient/engine/egret/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def initialize(self, options:Options) -> None:
self._ptdf_manager = PTDFManager()
self._last_sced_pyo_model = None
self._last_sced_pyo_solver = None
self._data_provider = data_provider_factory.get_data_provider(options)
self._data_provider = options.data_provider.get_data_provider(options)
self._actuals_step_frequency = 60 if not options.simulate_out_of_sample \
else self._data_provider.negotiate_data_frequency(options.sced_frequency_minutes)

Expand Down
13 changes: 10 additions & 3 deletions prescient/simulator/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from prescient.plugins import PluginRegistrationContext
from prescient.data.data_provider_factory import InputFormats
from prescient.engine.modeling_engine import PricingType
from prescient.data import data_provider_factory

prescient_persistent_solvers = ("cplex", "gurobi", "xpress")
prescient_solvers = [ s+sa for sa in ["", "_direct", "_persistent"] for s in prescient_persistent_solvers ]
Expand Down Expand Up @@ -120,17 +121,23 @@ def register_plugin(key, value):
"associated data are written.",
)).declare_as_argument()

self.declare("data_provider", ConfigValue(
domain=Module(),
default=data_provider_factory,
description="Python module that supplies a data provider implementation"
)).declare_as_argument()

#############################
# PRESCIENT ONLY OPTIONS #
#############################

# # PRESCIENT_INPUT_OPTIONS

self.declare("data_directory", ConfigValue(
self.declare("data_path", ConfigValue(
domain=Path(),
default="input_data",
description="Specifies the directory to pull data from",
)).declare_as_argument()
description="Specifies the file or directory to pull data from",
)).declare_as_argument('--data-path', '--data-directory')

self.declare("input_format", ConfigValue(
domain=_InEnumStr(InputFormats),
Expand Down
86 changes: 86 additions & 0 deletions tests/simulator_tests/custom_data_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from prescient.data.data_provider import DataProvider
from prescient.simulator.options import Options

from datetime import datetime, timedelta
from dateutil import parser as date_parser
import pandas as pd

from prescient.data.providers.gmlc_data_provider import GmlcDataProvider
from egret.parsers.rts_gmlc.parsed_cache import ParsedCache, ScalarReserveData

def get_data_provider(options:Options) -> DataProvider:
''' Get a CustomDataProvider instance '''
return CustomDataProvider(options)

class CustomDataProvider(GmlcDataProvider):
def __init__(self, options:Options):
self._start_time = datetime.combine(options.start_date, datetime.min.time())
self._end_time = self._start_time + timedelta(days=options.num_days)
self._cache = self.read_data_to_cache(options.data_path)

def read_data_to_cache(self, filepath):
import json
from dateutil import parser

# Read the JSON file
with open(filepath) as f:
data = json.load(f)

# Convert Series arrays into pd.Series objects with time indices
series_indices = {key: [date_parser.parse(val) for val in data['timeseries_indices'][key]]
for key in data['timeseries_indices']}
series_data = data['timeseries_data']['Series']
sim_types = data['timeseries_data']['Simulation']
for i in range(len(series_data)):
index = series_indices[sim_types[i]]
series_data[i] = pd.Series(series_data[i], index=index)

# Convert the whole set of timeseries data to a DataFrame
ts_df = pd.DataFrame(data['timeseries_data'])

return ParsedCache(data['skeleton'],
parser.parse(data['begin_time']),
parser.parse(data['end_time']),
data['minutes_per_day_ahead_period'],
data['minutes_per_real_time_period'],
ts_df,
data['load_participation_factors'],
ScalarReserveData(data['scalar_reserve_data']['da_scalars'],
data['scalar_reserve_data']['rt_scalars'])
)



def parsed_cache_to_json(f, cache):
import json
import numpy as np

ts_df = cache.timeseries_df

cur_sim = ts_df['Simulation'].iat[0]
first_indices = {cur_sim:0}
for i in range(1, len(ts_df)):
if ts_df['Simulation'].iat[i] != cur_sim:
cur_sim = ts_df['Simulation'].iat[i]
first_indices[cur_sim] = i

s = {'skeleton':cache.skeleton,
'begin_time':str(cache.begin_time),
'end_time':str(cache.end_time),
'minutes_per_day_ahead_period':cache.minutes_per_period['DAY_AHEAD'],
'minutes_per_real_time_period':cache.minutes_per_period['REAL_TIME'],
'timeseries_data':cache.timeseries_df.to_dict('list'),
'timeseries_indices':{ key: np.datetime_as_string(
ts_df['Series'].iat[first_indices[key]].index.values, 'm'
).tolist()
for key in first_indices},
'load_participation_factors':cache.load_participation_factors,
'scalar_reserve_data':{'da_scalars':cache.scalar_reserve_data.da_scalars,
'rt_scalars':cache.scalar_reserve_data.rt_scalars}
}
s['timeseries_data']['Series'] = [series.values.tolist()
for series in s['timeseries_data']['Series']]
json.dump(s, f)
Loading

0 comments on commit ce1069a

Please sign in to comment.