Skip to content

Commit

Permalink
speeding up pre_post_pp_analysis_with_reversal from 143ms to ~100ms b…
Browse files Browse the repository at this point in the history
…y improving `pp_raw_df`
  • Loading branch information
gabrielecalvo committed Jan 14, 2025
1 parent c7e17bb commit 7d05834
Showing 1 changed file with 24 additions and 21 deletions.
45 changes: 24 additions & 21 deletions wind_up/pp_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,34 +27,37 @@ def pp_raw_df(
pw_col: str,
timebase_s: int,
) -> pd.DataFrame:
pp_df = (
pre_or_post_df.dropna(subset=[pw_col, ws_col])
return (
pre_or_post_df.loc[:, [pw_col, ws_col]]
.dropna()
.groupby(
by=pd.cut(pre_or_post_df[ws_col], bins=ws_bin_edges, retbins=False),
observed=False,
)
.agg(
count=pd.NamedAgg(column=pw_col, aggfunc=lambda x: len(x)),
ws_mean=pd.NamedAgg(column=ws_col, aggfunc=lambda x: x.mean()),
ws_std=pd.NamedAgg(column=ws_col, aggfunc=lambda x: x.std()),
pw_mean=pd.NamedAgg(column=pw_col, aggfunc=lambda x: x.mean()),
pw_std=pd.NamedAgg(column=pw_col, aggfunc=lambda x: x.std()),
count=pd.NamedAgg(column=pw_col, aggfunc=len),
ws_mean=pd.NamedAgg(column=ws_col, aggfunc="mean"),
ws_std=pd.NamedAgg(column=ws_col, aggfunc="std"),
pw_mean=pd.NamedAgg(column=pw_col, aggfunc="mean"),
pw_std=pd.NamedAgg(column=pw_col, aggfunc="std"),
)
.assign(
ws_std=lambda x: x["ws_std"].fillna(0),
pw_std=lambda x: x["pw_std"].fillna(0),
hours=lambda x: x["count"] * timebase_s / 3600,
ws_sem=lambda x: x["ws_std"] / np.sqrt(x["count"].clip(lower=1)),
pw_sem=lambda x: x["pw_std"] / np.sqrt(x["count"].clip(lower=1)),
)
.pipe(lambda d: d.set_axis(d.columns.map(lambda x: f"{x}_{pre_or_post}"), axis="columns"))
.assign(
bin_left=lambda x: [i.left for i in x.index],
bin_mid=lambda x: [i.mid for i in x.index],
bin_right=lambda x: [i.right for i in x.index],
bin_closed_right=lambda x: [i.closed_right for i in x.index],
)
.set_index("bin_mid", drop=False, verify_integrity=True)
.rename_axis(f"{ws_col}_bin_mid", axis=0)
)
pp_df["ws_std"] = pp_df["ws_std"].fillna(0)
pp_df["pw_std"] = pp_df["pw_std"].fillna(0)
rows_per_hour = 3600 / timebase_s
pp_df["hours"] = pp_df["count"] / rows_per_hour
pp_df["ws_sem"] = pp_df["ws_std"] / np.sqrt(pp_df["count"].clip(lower=1))
pp_df["pw_sem"] = pp_df["pw_std"] / np.sqrt(pp_df["count"].clip(lower=1))
pp_df.columns = [x + f"_{pre_or_post}" for x in pp_df.columns]
pp_df["bin_left"] = [x.left for x in pp_df.index]
pp_df["bin_mid"] = [x.mid for x in pp_df.index]
pp_df["bin_right"] = [x.right for x in pp_df.index]
pp_df["bin_closed_right"] = [x.closed_right for x in pp_df.index]
pp_df = pp_df.set_index("bin_mid", drop=False, verify_integrity=True)
pp_df.index.name = f"{ws_col}_bin_mid"
return pp_df


def _calc_rated_ws(*, pp_df: pd.DataFrame, pw_col: str, rated_power: float) -> float:
Expand Down

0 comments on commit 7d05834

Please sign in to comment.