Skip to content

Commit

Permalink
Adding Transformer for ERCOT System Level Data to Meters Data Model (#…
Browse files Browse the repository at this point in the history
…610)

* Adding ERCOT to MDM Transfomer class

Signed-off-by: Shivam Saxena <[email protected]>

* Adding ERCOT Raw to MDM Transformer

Signed-off-by: Shivam Saxena <[email protected]>

* Adding ERCOT to MDM Transformer related docs

Signed-off-by: Shivam Saxena <[email protected]>

* Improving ERCOT Raw to MDM Transformer test cases

Signed-off-by: Shivam Saxena <[email protected]>

* Improving ERCOT to MDM Transformer Test Code Structure

Signed-off-by: Shivam Saxena <[email protected]>

---------

Signed-off-by: Shivam Saxena <[email protected]>
  • Loading branch information
IW-SS authored Dec 13, 2023
1 parent aa37d75 commit 611536d
Show file tree
Hide file tree
Showing 10 changed files with 223 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.iso.ercot_to_mdm
1 change: 1 addition & 0 deletions docs/sdk/pipelines/components.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ Transformers are components that perform transformations on data. These will tar
|[MISO To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|
|[Raw Forecast to Weather Data Model](../code-reference/pipelines/transformers/spark/the_weather_company/raw_forecast_to_weather_data_model.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|
|[PJM To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|
|[ERCOT To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|

!!! note "Note"
This list will dynamically change as the framework is further developed and new components are added.
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ nav:
- ISO:
- MISO To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md
- PJM To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md
- ERCOT To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md
- The Weather Company:
- Raw Forecast To Weather Data Model: sdk/code-reference/pipelines/transformers/spark/the_weather_company/raw_forecast_to_weather_data_model.md
- ECMWF:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .ercot_to_mdm import ERCOTToMDMTransformer
from .miso_to_mdm import MISOToMDMTransformer
from .pjm_to_mdm import PJMToMDMTransformer
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyspark.sql import DataFrame, SparkSession, functions as F

from ..base_raw_to_mdm import BaseRawToMDMTransformer
from ...._pipeline_utils.iso import ERCOT_SCHEMA, melt
from .....data_models.timeseries import SeriesType, ModelType, ValueType


class ERCOTToMDMTransformer(BaseRawToMDMTransformer):
"""
Converts ERCOT Raw data into Meters Data Model.
Please check the BaseRawToMDMTransformer for the required arguments and methods.
Example
--------
```python
from rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer
from rtdip_sdk.pipelines.utilities import SparkSessionUtility
# Not required if using Databricks
spark = SparkSessionUtility(config={}).execute()
ercot_to_mdm_transformer = ERCOTToMDMTransformer(
spark=spark,
data=df,
output_type="usage",
name=None,
description=None,
value_type=None,
version=None,
series_id=None,
series_parent_id=None
)
result = ercot_to_mdm_transformer.transform()
```
"""

spark: SparkSession
data: DataFrame
input_schema = ERCOT_SCHEMA
uid_col = "variable"
series_id_col = "'series_std_001'"
timestamp_col = "to_utc_timestamp(StartTime, 'America/Chicago')"
interval_timestamp_col = "Timestamp + INTERVAL 1 HOURS"
value_col = "value"
series_parent_id_col = "'series_parent_std_001'"
name_col = "'ERCOT API'"
uom_col = "'mwh'"
description_col = "'ERCOT data pulled from ERCOT ISO API'"
timestamp_start_col = "StartTime"
timestamp_end_col = "StartTime + INTERVAL 1 HOURS"
time_zone_col = "'America/Chicago'"
version_col = "'1'"
series_type = SeriesType.Hour
model_type = ModelType.Default
value_type = ValueType.Usage
properties_col = "null"

def _pre_process(self) -> DataFrame:
df: DataFrame = super(ERCOTToMDMTransformer, self)._pre_process()
df = melt(
df,
id_vars=["Date", "HourEnding", "DstFlag"],
value_vars=[
"Coast",
"East",
"FarWest",
"North",
"NorthCentral",
"SouthCentral",
"Southern",
"West",
"SystemTotal",
],
)
df = df.withColumn(
"StartTime",
F.expr(
"Date + MAKE_INTERVAL(0,0,0,0,cast(split(HourEnding,':')[0] as integer),0,0)"
),
)
return df
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Date,HourEnding,Coast,East,FarWest,North,NorthCentral,SouthCentral,Southern,West,SystemTotal,DstFlag
2023-12-10T00:00:00,1:00,10173.7002,1384.3101,6210.73,1213.63,12670.0996,6350.9702,3160.3601,1292.41,42456.2102,N
2023-12-10T00:00:00,2:00,9950.5098,1389.42,6276.6201,1215.9399,12627.5,6270.8701,3046.97,1288.1801,42066.01,N
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{"Uid":"Coast","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"East","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"FarWest","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"North","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"NorthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"SouthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"Southern","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"West","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"SystemTotal","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"Coast","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"East","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"FarWest","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"North","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"NorthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"SouthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"Southern","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"West","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"SystemTotal","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Date,HourEnding,Coast,East,FarWest,North,NorthCentral,SouthCentral,Southern,West,SystemTotal,DstFlag
2023-12-10T00:00:00,1:00,10173.7002,1384.3101,6210.73,1213.63,12670.0996,6350.9702,3160.3601,1292.41,42456.2102,N
2023-12-10T00:00:00,2:00,9950.5098,1389.42,6276.6201,1215.9399,12627.5,6270.8701,3046.97,1288.1801,42066.01,N
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Uid,SeriesId,Timestamp,IntervalTimestamp,Value
Coast,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,10173.7002
East,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1384.3101
FarWest,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,6210.73
North,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1213.63
NorthCentral,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,12670.0996
SouthCentral,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,6350.9702
Southern,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,3160.3601
West,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1292.41
SystemTotal,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,42456.2102
Coast,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,9950.5098
East,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1389.42
FarWest,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,6276.6201
North,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1215.9399
NorthCentral,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,12627.5
SouthCentral,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,6270.8701
Southern,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,3046.97
West,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1288.1801
SystemTotal,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,42066.01
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from pyspark.sql import SparkSession, DataFrame

from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.iso import ERCOT_SCHEMA
from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.mdm import (
MDM_USAGE_SCHEMA,
MDM_META_SCHEMA,
)
from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import (
Libraries,
SystemType,
)
from src.sdk.python.rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer

parent_base_path: str = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "test_data"
)


def ercot_to_mdm_test(
spark_session: SparkSession,
output_type: str,
expected_df: DataFrame,
base_path: str,
):
input_df: DataFrame = spark_session.read.csv(
f"{base_path}/input.csv", header=True, schema=ERCOT_SCHEMA
)

transformer = ERCOTToMDMTransformer(
spark_session, input_df, output_type=output_type
)
assert transformer.system_type() == SystemType.PYSPARK
assert isinstance(transformer.libraries(), Libraries)
assert transformer.settings() == dict()

actual_df = transformer.transform()

cols = list(
filter(
lambda column: column in expected_df.columns,
["Uid", "Timestamp", "TimestampStart"],
)
)
assert actual_df.orderBy(cols).collect() == expected_df.orderBy(cols).collect()


def test_ercot_to_mdm_usage(spark_session: SparkSession):
usage_path = os.path.join(parent_base_path, "ercot_usage")
usage_expected_df: DataFrame = spark_session.read.csv(
f"{usage_path}/output.csv", header=True, schema=MDM_USAGE_SCHEMA
)

ercot_to_mdm_test(spark_session, "usage", usage_expected_df, usage_path)


def test_ercot_to_mdm_meta(spark_session: SparkSession):
meta_path: str = os.path.join(parent_base_path, "ercot_meta")

meta_expected_df: DataFrame = spark_session.read.json(
f"{meta_path}/output.json", schema=MDM_META_SCHEMA
)

ercot_to_mdm_test(spark_session, "meta", meta_expected_df, meta_path)

0 comments on commit 611536d

Please sign in to comment.