Skip to content

Commit

Permalink
add ANALYSIS_TIMEBASE_S to smarteole_example.py
Browse files Browse the repository at this point in the history
  • Loading branch information
aclerc committed May 7, 2024
1 parent 6c0431a commit c8eb23d
Showing 1 changed file with 27 additions and 21 deletions.
48 changes: 27 additions & 21 deletions examples/smarteole_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
from wind_up.models import PlotConfig, WindUpConfig
from wind_up.reanalysis_data import ReanalysisDataset

CACHE_FLD = PROJECTROOT_DIR / "cache" / "smarteole_example_data"
CACHE_DIR = PROJECTROOT_DIR / "cache" / "smarteole_example_data"

ANALYSIS_TIMEBASE_S = 600
CACHE_SUBDIR =CACHE_DIR / f"timebase_{ANALYSIS_TIMEBASE_S}"
CACHE_SUBDIR.mkdir(exist_ok=True, parents=True)

ENSURE_DOWNLOAD = 1
CHECK_RESULTS = 1
Expand All @@ -26,8 +30,8 @@
MINIMUM_DATA_COUNT_COVERAGE = 0.5 # 50% of the data must be present


@with_parquet_cache(CACHE_FLD / "_smarteole_scada.parquet")
def _unpack_scada(timebase_minutes: int = 10) -> pd.DataFrame:
@with_parquet_cache(CACHE_SUBDIR / "smarteole_scada.parquet")
def _unpack_scada(timebase_s) -> pd.DataFrame:
"""
Function that translates 1-minute SCADA data to x minute data in the wind-up expected format
"""
Expand All @@ -50,7 +54,7 @@ def _make_turbine_id_a_column(df: pd.DataFrame) -> pd.DataFrame:
return df.stack(level=0, future_stack=True).reset_index(DataColumns.turbine_name) # noqa: PD013

def _map_and_mask_cols(df: pd.DataFrame) -> pd.DataFrame:
x_minutes_count_lower_limit = 60 * timebase_minutes * MINIMUM_DATA_COUNT_COVERAGE
x_minutes_count_lower_limit = timebase_s * MINIMUM_DATA_COUNT_COVERAGE
mask_active_power = df["active_power_count"] < x_minutes_count_lower_limit
mask_wind_speed = df["wind_speed_count"] < x_minutes_count_lower_limit
mask_pitch_angle = df["blade_1_pitch_angle_count"] < x_minutes_count_lower_limit
Expand All @@ -76,12 +80,12 @@ def _map_and_mask_cols(df: pd.DataFrame) -> pd.DataFrame:
# unzipping the data in memory and only reading the relevant files
scada_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_SCADA_1minData.csv"
circular_mean = partial(circmean, low=0, high=360)
with zipfile.ZipFile(CACHE_FLD / ZIP_FILENAME) as zf:
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
return (
pd.read_csv(zf.open(scada_fpath), parse_dates=[0], index_col=0)
.pipe(_make_turbine_id_a_column)
.groupby(DataColumns.turbine_name)
.resample(f"{timebase_minutes}min")
.resample(f"{timebase_s}s")
.aggregate(
{
"active_power_avg": "mean",
Expand Down Expand Up @@ -110,10 +114,10 @@ def _map_and_mask_cols(df: pd.DataFrame) -> pd.DataFrame:
)


@with_parquet_cache(CACHE_FLD / "_smarteole_metadata.parquet")
@with_parquet_cache(CACHE_DIR / "smarteole_metadata.parquet")
def _unpack_metadata() -> pd.DataFrame:
md_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_Coordinates_staticData.csv"
with zipfile.ZipFile(CACHE_FLD / ZIP_FILENAME) as zf:
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
return (
pd.read_csv(zf.open(md_fpath), index_col=0)
.reset_index()
Expand All @@ -124,13 +128,13 @@ def _unpack_metadata() -> pd.DataFrame:
)


@with_parquet_cache(CACHE_FLD / "_smarteole_toggle.parquet")
def _unpack_toggle_data(timebase_minutes: int = 10) -> pd.DataFrame:
ten_minutes_count_lower_limit = 60 * timebase_minutes * MINIMUM_DATA_COUNT_COVERAGE
@with_parquet_cache(CACHE_SUBDIR / "smarteole_toggle.parquet")
def _unpack_toggle_data(timebase_s: int) -> pd.DataFrame:
ten_minutes_count_lower_limit = timebase_s * MINIMUM_DATA_COUNT_COVERAGE
toggle_value_threshold: float = 0.95

_fpath = "SMARTEOLE-WFC-open-dataset/SMARTEOLE_WakeSteering_ControlLog_1minData.csv"
with zipfile.ZipFile(CACHE_FLD / ZIP_FILENAME) as zf:
with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
raw_df = pd.read_csv(zf.open(_fpath), parse_dates=[0], index_col=0)

required_in_cols = [
Expand All @@ -139,7 +143,7 @@ def _unpack_toggle_data(timebase_minutes: int = 10) -> pd.DataFrame:
]
toggle_df = (
raw_df[required_in_cols]
.resample(f"{timebase_minutes}min")
.resample(f"{timebase_s}s")
.agg({"control_log_offset_active_avg": "mean", "control_log_offset_active_count": "sum"})
)
toggle_df["toggle_on"] = (toggle_df["control_log_offset_active_avg"] >= toggle_value_threshold) & (
Expand All @@ -162,14 +166,14 @@ def _unpack_toggle_data(timebase_minutes: int = 10) -> pd.DataFrame:
logger = logging.getLogger(__name__)

logger.info("Downloading example data from Zenodo")
download_zenodo_data(record_id="7342466", output_dir=CACHE_FLD, filenames={ZIP_FILENAME})
download_zenodo_data(record_id="7342466", output_dir=CACHE_DIR, filenames={ZIP_FILENAME})

logger.info("Preprocessing (and caching) turbine SCADA data")
scada_df = _unpack_scada()
scada_df = _unpack_scada(ANALYSIS_TIMEBASE_S)
logger.info("Preprocessing (and caching) turbine metadata")
metadata_df = _unpack_metadata()
logger.info("Preprocessing (and caching) toggle data")
toggle_df = _unpack_toggle_data()
toggle_df = _unpack_toggle_data(ANALYSIS_TIMEBASE_S)

logger.info("Loading reference reanalysis data")
reanalysis_dataset = ReanalysisDataset(
Expand Down Expand Up @@ -204,11 +208,13 @@ def _unpack_toggle_data(timebase_minutes: int = 10) -> pd.DataFrame:
("SMV7", pd.Timestamp("2020-02-17 16:30:00+0000"), 4.605999999999972),
]

wd_filter_margin = 3 + 7 * ANALYSIS_TIMEBASE_S / 600
cfg = WindUpConfig(
assessment_name="smarteole_example",
timebase_s=ANALYSIS_TIMEBASE_S,
require_ref_wake_free=True,
detrend_min_hours=12,
ref_wd_filter=[197.0, 246.0],
ref_wd_filter=[207 - wd_filter_margin, 236 + wd_filter_margin], # steer is from 207-236
filter_all_test_wtgs_together=True,
use_lt_distribution=False,
out_dir=analysis_output_dir,
Expand All @@ -218,11 +224,11 @@ def _unpack_toggle_data(timebase_minutes: int = 10) -> pd.DataFrame:
non_wtg_ref_names=[],
analysis_first_dt_utc_start=pd.Timestamp("2020-02-17 16:30:00+0000"),
upgrade_first_dt_utc_start=pd.Timestamp("2020-02-17 16:30:00+0000"),
analysis_last_dt_utc_start=pd.Timestamp("2020-05-24 23:50:00+0000"),
analysis_last_dt_utc_start=pd.Timestamp("2020-05-25 00:00:00+0000") - pd.Timedelta(seconds=ANALYSIS_TIMEBASE_S),
lt_first_dt_utc_start=pd.Timestamp("2020-02-17 16:30:00+0000"),
lt_last_dt_utc_start=pd.Timestamp("2020-05-24 23:50:00+0000"),
lt_last_dt_utc_start=pd.Timestamp("2020-05-25 00:00:00+0000") - pd.Timedelta(seconds=ANALYSIS_TIMEBASE_S),
detrend_first_dt_utc_start=pd.Timestamp("2020-02-17 16:30:00+0000"),
detrend_last_dt_utc_start=pd.Timestamp("2020-05-24 23:50:00+0000"),
detrend_last_dt_utc_start=pd.Timestamp("2020-05-25 00:00:00+0000") - pd.Timedelta(seconds=ANALYSIS_TIMEBASE_S),
years_for_lt_distribution=0,
years_for_detrend=0,
ws_bin_width=1.0,
Expand Down Expand Up @@ -251,7 +257,7 @@ def _unpack_toggle_data(timebase_minutes: int = 10) -> pd.DataFrame:
scada_df=scada_df,
metadata_df=metadata_df,
reanalysis_datasets=[reanalysis_dataset],
cache_dir=CACHE_FLD,
cache_dir=CACHE_SUBDIR,
)
results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)

Expand Down

0 comments on commit c8eb23d

Please sign in to comment.