diff --git a/docs/sdk/code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md b/docs/sdk/code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md new file mode 100644 index 000000000..7b01b5ec5 --- /dev/null +++ b/docs/sdk/code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md @@ -0,0 +1 @@ +::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.iso.ercot_to_mdm diff --git a/docs/sdk/pipelines/components.md b/docs/sdk/pipelines/components.md index 40f7bd98f..4d06716ff 100644 --- a/docs/sdk/pipelines/components.md +++ b/docs/sdk/pipelines/components.md @@ -69,6 +69,7 @@ Transformers are components that perform transformations on data. These will tar |[MISO To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| |[Raw Forecast to Weather Data Model](../code-reference/pipelines/transformers/spark/the_weather_company/raw_forecast_to_weather_data_model.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| |[PJM To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| +|[ERCOT To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:| !!! note "Note" This list will dynamically change as the framework is further developed and new components are added. diff --git a/mkdocs.yml b/mkdocs.yml index b27f43d5e..4e7c97054 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -199,6 +199,7 @@ nav: - ISO: - MISO To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md - PJM To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md + - ERCOT To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md - The Weather Company: - Raw Forecast To Weather Data Model: sdk/code-reference/pipelines/transformers/spark/the_weather_company/raw_forecast_to_weather_data_model.md - ECMWF: diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/__init__.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/__init__.py index fb1d22d4a..583a5bd99 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/__init__.py +++ b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/__init__.py @@ -1,2 +1,3 @@ +from .ercot_to_mdm import ERCOTToMDMTransformer from .miso_to_mdm import MISOToMDMTransformer from .pjm_to_mdm import PJMToMDMTransformer diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/ercot_to_mdm.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/ercot_to_mdm.py new file mode 100644 index 000000000..99f01285e --- /dev/null +++ b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/ercot_to_mdm.py @@ -0,0 +1,97 @@ +# Copyright 2022 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyspark.sql import DataFrame, SparkSession, functions as F + +from ..base_raw_to_mdm import BaseRawToMDMTransformer +from ...._pipeline_utils.iso import ERCOT_SCHEMA, melt +from .....data_models.timeseries import SeriesType, ModelType, ValueType + + +class ERCOTToMDMTransformer(BaseRawToMDMTransformer): + """ + Converts ERCOT Raw data into Meters Data Model. + + Please check the BaseRawToMDMTransformer for the required arguments and methods. + + Example + -------- + ```python + from rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer + from rtdip_sdk.pipelines.utilities import SparkSessionUtility + + # Not required if using Databricks + spark = SparkSessionUtility(config={}).execute() + + ercot_to_mdm_transformer = ERCOTToMDMTransformer( + spark=spark, + data=df, + output_type="usage", + name=None, + description=None, + value_type=None, + version=None, + series_id=None, + series_parent_id=None + ) + + result = ercot_to_mdm_transformer.transform() + ``` + """ + + spark: SparkSession + data: DataFrame + input_schema = ERCOT_SCHEMA + uid_col = "variable" + series_id_col = "'series_std_001'" + timestamp_col = "to_utc_timestamp(StartTime, 'America/Chicago')" + interval_timestamp_col = "Timestamp + INTERVAL 1 HOURS" + value_col = "value" + series_parent_id_col = "'series_parent_std_001'" + name_col = "'ERCOT API'" + uom_col = "'mwh'" + description_col = "'ERCOT data pulled from ERCOT ISO API'" + timestamp_start_col = "StartTime" + timestamp_end_col = "StartTime + INTERVAL 1 HOURS" + time_zone_col = "'America/Chicago'" + version_col = "'1'" + series_type = SeriesType.Hour + model_type = ModelType.Default + value_type = ValueType.Usage + properties_col = "null" + + def _pre_process(self) -> DataFrame: + df: DataFrame = super(ERCOTToMDMTransformer, self)._pre_process() + df = melt( + df, + id_vars=["Date", "HourEnding", "DstFlag"], + value_vars=[ + "Coast", + "East", + "FarWest", + "North", + "NorthCentral", + "SouthCentral", + "Southern", + "West", + "SystemTotal", + ], + ) + df = df.withColumn( + "StartTime", + F.expr( + "Date + MAKE_INTERVAL(0,0,0,0,cast(split(HourEnding,':')[0] as integer),0,0)" + ), + ) + return df diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_meta/input.csv b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_meta/input.csv new file mode 100644 index 000000000..9ff017a08 --- /dev/null +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_meta/input.csv @@ -0,0 +1,3 @@ +Date,HourEnding,Coast,East,FarWest,North,NorthCentral,SouthCentral,Southern,West,SystemTotal,DstFlag +2023-12-10T00:00:00,1:00,10173.7002,1384.3101,6210.73,1213.63,12670.0996,6350.9702,3160.3601,1292.41,42456.2102,N +2023-12-10T00:00:00,2:00,9950.5098,1389.42,6276.6201,1215.9399,12627.5,6270.8701,3046.97,1288.1801,42066.01,N diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_meta/output.json b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_meta/output.json new file mode 100644 index 000000000..e019308e6 --- /dev/null +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_meta/output.json @@ -0,0 +1,18 @@ +{"Uid":"Coast","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"East","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"FarWest","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"North","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"NorthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"SouthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"Southern","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"West","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"SystemTotal","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"Coast","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"East","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"FarWest","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"North","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"NorthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"SouthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"Southern","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"West","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} +{"Uid":"SystemTotal","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16} diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_usage/input.csv b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_usage/input.csv new file mode 100644 index 000000000..9ff017a08 --- /dev/null +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_usage/input.csv @@ -0,0 +1,3 @@ +Date,HourEnding,Coast,East,FarWest,North,NorthCentral,SouthCentral,Southern,West,SystemTotal,DstFlag +2023-12-10T00:00:00,1:00,10173.7002,1384.3101,6210.73,1213.63,12670.0996,6350.9702,3160.3601,1292.41,42456.2102,N +2023-12-10T00:00:00,2:00,9950.5098,1389.42,6276.6201,1215.9399,12627.5,6270.8701,3046.97,1288.1801,42066.01,N diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_usage/output.csv b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_usage/output.csv new file mode 100644 index 000000000..7fd1770a3 --- /dev/null +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_data/ercot_usage/output.csv @@ -0,0 +1,19 @@ +Uid,SeriesId,Timestamp,IntervalTimestamp,Value +Coast,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,10173.7002 +East,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1384.3101 +FarWest,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,6210.73 +North,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1213.63 +NorthCentral,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,12670.0996 +SouthCentral,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,6350.9702 +Southern,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,3160.3601 +West,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1292.41 +SystemTotal,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,42456.2102 +Coast,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,9950.5098 +East,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1389.42 +FarWest,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,6276.6201 +North,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1215.9399 +NorthCentral,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,12627.5 +SouthCentral,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,6270.8701 +Southern,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,3046.97 +West,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1288.1801 +SystemTotal,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,42066.01 diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_ercot_to_mdm.py b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_ercot_to_mdm.py new file mode 100644 index 000000000..b0db47eab --- /dev/null +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/iso/test_ercot_to_mdm.py @@ -0,0 +1,79 @@ +# Copyright 2022 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from pyspark.sql import SparkSession, DataFrame + +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.iso import ERCOT_SCHEMA +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.mdm import ( + MDM_USAGE_SCHEMA, + MDM_META_SCHEMA, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) +from src.sdk.python.rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer + +parent_base_path: str = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "test_data" +) + + +def ercot_to_mdm_test( + spark_session: SparkSession, + output_type: str, + expected_df: DataFrame, + base_path: str, +): + input_df: DataFrame = spark_session.read.csv( + f"{base_path}/input.csv", header=True, schema=ERCOT_SCHEMA + ) + + transformer = ERCOTToMDMTransformer( + spark_session, input_df, output_type=output_type + ) + assert transformer.system_type() == SystemType.PYSPARK + assert isinstance(transformer.libraries(), Libraries) + assert transformer.settings() == dict() + + actual_df = transformer.transform() + + cols = list( + filter( + lambda column: column in expected_df.columns, + ["Uid", "Timestamp", "TimestampStart"], + ) + ) + assert actual_df.orderBy(cols).collect() == expected_df.orderBy(cols).collect() + + +def test_ercot_to_mdm_usage(spark_session: SparkSession): + usage_path = os.path.join(parent_base_path, "ercot_usage") + usage_expected_df: DataFrame = spark_session.read.csv( + f"{usage_path}/output.csv", header=True, schema=MDM_USAGE_SCHEMA + ) + + ercot_to_mdm_test(spark_session, "usage", usage_expected_df, usage_path) + + +def test_ercot_to_mdm_meta(spark_session: SparkSession): + meta_path: str = os.path.join(parent_base_path, "ercot_meta") + + meta_expected_df: DataFrame = spark_session.read.json( + f"{meta_path}/output.json", schema=MDM_META_SCHEMA + ) + + ercot_to_mdm_test(spark_session, "meta", meta_expected_df, meta_path)