Skip to content

Commit

Permalink
Merge pull request #70 from Nixtla/feat/historic_arg
Browse files Browse the repository at this point in the history
feat/historic arg
  • Loading branch information
AzulGarza authored Aug 8, 2023
2 parents bda293f + b68327b commit 6a7bcc4
Show file tree
Hide file tree
Showing 3 changed files with 791 additions and 179 deletions.
781 changes: 658 additions & 123 deletions nbs/timegpt.ipynb

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions nixtlats/_modidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,21 @@
'lib_path': 'nixtlats'},
'syms': { 'nixtlats.timegpt': { 'nixtlats.timegpt.TimeGPT': ('timegpt.html#timegpt', 'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT.__init__': ('timegpt.html#timegpt.__init__', 'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._get_to_dict_args': ( 'timegpt.html#timegpt._get_to_dict_args',
'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._hit_multi_series_endpoint': ( 'timegpt.html#timegpt._hit_multi_series_endpoint',
'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._hit_multi_series_historic_endpoint': ( 'timegpt.html#timegpt._hit_multi_series_historic_endpoint',
'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._infer_freq': ('timegpt.html#timegpt._infer_freq', 'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._model_params': ('timegpt.html#timegpt._model_params', 'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._multi_series': ('timegpt.html#timegpt._multi_series', 'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._parse_response': ( 'timegpt.html#timegpt._parse_response',
'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._preprocess_inputs': ( 'timegpt.html#timegpt._preprocess_inputs',
'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._preprocess_dataframes': ( 'timegpt.html#timegpt._preprocess_dataframes',
'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._transform_dataframes': ( 'timegpt.html#timegpt._transform_dataframes',
'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._validate_inputs': ( 'timegpt.html#timegpt._validate_inputs',
'nixtlats/timegpt.py'),
'nixtlats.timegpt.TimeGPT._validate_outputs': ( 'timegpt.html#timegpt._validate_outputs',
Expand Down
177 changes: 123 additions & 54 deletions nixtlats/timegpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ def _validate_inputs(
time_col: str,
target_col: str,
):
renamer = {id_col: "unique_id", time_col: "ds", target_col: "y"}
renamer = {
id_col: "unique_id",
time_col: "ds",
target_col: "y",
}
df = df.rename(columns=renamer)
if df.dtypes.ds != "object":
df["ds"] = df["ds"].astype(str)
Expand Down Expand Up @@ -128,44 +132,19 @@ def _infer_freq(self, df: pd.DataFrame):
)
return freq

def _preprocess_inputs(
def _preprocess_dataframes(
self,
df: pd.DataFrame,
h: int,
freq: str,
X_df: Optional[pd.DataFrame] = None,
level: Optional[List] = None,
finetune_steps: int = 0,
):
# restrict input only if we dont want
# to finetune the model
restrict_input = finetune_steps == 0
if restrict_input:
model_params = self._model_params(freq)
input_size, model_horizon = (
model_params["input_size"],
model_params["horizon"],
)
if level is not None:
# add sufficient info to compute
# conformal interval
input_size = 2 * input_size + model_horizon
else:
input_size = model_horizon = None
if restrict_input:
df = df.groupby("unique_id").tail(input_size)
"""Returns Y_df and X_df dataframes in the structure expected by the endpoints."""
y_cols = ["unique_id", "ds", "y"]
y = df[y_cols]
to_dict_args = {"orient": "split"}
if "index" in inspect.signature(pd.DataFrame.to_dict).parameters:
to_dict_args["index"] = False
if y["y"].isna().any():
Y_df = df[y_cols]
if Y_df["y"].isna().any():
raise Exception("Your target variable contains NA, please check")
y = y.to_dict(**to_dict_args)
x_cols = []
if X_df is None:
x = None
else:
if X_df is not None:
x_cols = X_df.drop(columns=["unique_id", "ds"]).columns.to_list()
if not all(col in df.columns for col in x_cols):
raise Exception(
Expand All @@ -177,36 +156,63 @@ def _preprocess_inputs(
f"You have to pass the {h} future values of your "
"exogenous variables for each time series"
)
x = df[["unique_id", "ds"] + x_cols]
x = pd.concat([x, X_df])
if x[x_cols].isna().any().any():
X_df_history = df[["unique_id", "ds"] + x_cols]
X_df = pd.concat([X_df_history, X_df])
if X_df[x_cols].isna().any().any():
raise Exception(
"Some of your exogenous variables contain NA, please check"
)
x = x.sort_values(["unique_id", "ds"]).reset_index(drop=True)
x = x.to_dict(**to_dict_args)
return y, x, x_cols
X_df = X_df.sort_values(["unique_id", "ds"]).reset_index(drop=True)
return Y_df, X_df, x_cols

def _multi_series(
def _get_to_dict_args(self):
to_dict_args = {"orient": "split"}
if "index" in inspect.signature(pd.DataFrame.to_dict).parameters:
to_dict_args["index"] = False
return to_dict_args

def _transform_dataframes(self, Y_df: pd.DataFrame, X_df: pd.DataFrame):
# contruction of y and x for the payload
to_dict_args = self._get_to_dict_args()
y = Y_df.to_dict(**to_dict_args)
x = X_df.to_dict(**to_dict_args) if X_df is not None else None
return y, x

def _hit_multi_series_endpoint(
self,
df: pd.DataFrame,
Y_df: pd.DataFrame,
X_df: pd.DataFrame,
x_cols: List[str],
h: int,
freq: str,
X_df: Optional[pd.DataFrame] = None,
level: Optional[List[int]] = None,
finetune_steps: int = 0,
clean_ex_first: bool = True,
finetune_steps: int,
clean_ex_first: bool,
level: Optional[List[Union[int, float]]],
):
if freq is None:
freq = self._infer_freq(df)
y, x, x_cols = self._preprocess_inputs(
df=df,
h=h,
freq=freq,
X_df=X_df,
level=level,
finetune_steps=finetune_steps,
)
# restrict input only if we dont want
# to finetune the model
restrict_input = finetune_steps == 0
if restrict_input:
model_params = self._model_params(freq)
input_size, model_horizon = (
model_params["input_size"],
model_params["horizon"],
)
if level is not None:
# add sufficient info to compute
# conformal interval
input_size = 2 * input_size + model_horizon
else:
input_size = model_horizon = None
# restricting the inputs if necessary
if restrict_input:
Y_df = Y_df.groupby("unique_id").tail(input_size)
if X_df is not None:
X_df = X_df.groupby("unique_id").tail(
input_size + h
) # history plus exogenous
y, x = self._transform_dataframes(Y_df, X_df)
# Contruct payload
payload = dict(
y=y,
x=x,
Expand All @@ -231,6 +237,65 @@ def _multi_series(
)
return pd.DataFrame(**response_timegpt["data"]["forecast"])

def _hit_multi_series_historic_endpoint(
self,
Y_df: pd.DataFrame,
freq: str,
level: Optional[List[Union[int, float]]],
):
y, x = self._transform_dataframes(Y_df, None)
# Contruct payload
payload = dict(
y=y,
freq=freq,
level=level,
)
response_timegpt = requests.post(
f"{self.api_url}/timegpt_multi_series_historic",
json=payload,
headers=self.request_headers,
)
response_timegpt = self._parse_response(response_timegpt)
return pd.DataFrame(**response_timegpt["data"]["forecast"])

def _multi_series(
self,
df: pd.DataFrame,
h: int,
freq: str,
X_df: Optional[pd.DataFrame],
level: Optional[List[Union[int, float]]],
finetune_steps: int,
clean_ex_first: bool,
add_history: bool,
):
if freq is None:
freq = self._infer_freq(df)
Y_df, X_df, x_cols = self._preprocess_dataframes(
df=df,
h=h,
X_df=X_df,
)
fcst_df = self._hit_multi_series_endpoint(
Y_df=Y_df,
X_df=X_df,
h=h,
freq=freq,
clean_ex_first=clean_ex_first,
finetune_steps=finetune_steps,
x_cols=x_cols,
level=level,
)
if add_history:
fitted_df = self._hit_multi_series_historic_endpoint(
Y_df=Y_df,
freq=freq,
level=level,
)
fitted_df = fitted_df.drop(columns="y")
fcst_df = pd.concat([fitted_df, fcst_df]).sort_values(["unique_id", "ds"])
return fcst_df

def forecast(
self,
df: pd.DataFrame,
Expand All @@ -244,6 +309,7 @@ def forecast(
finetune_steps: int = 0,
clean_ex_first: bool = True,
validate_token: bool = False,
add_history: bool = False,
):
"""Forecast your time series using TimeGPT.
Expand Down Expand Up @@ -282,9 +348,11 @@ def forecast(
clean_ex_first : bool (default=True)
Clean exogenous signal before making forecasts
using TimeGPT.
validate_token: bool (default=False)
validate_token : bool (default=False)
If True, validates token before
sending requests.
add_history : bool (default=False)
Return fitted values of the model.
Returns
-------
Expand Down Expand Up @@ -312,6 +380,7 @@ def forecast(
level=level,
finetune_steps=finetune_steps,
clean_ex_first=clean_ex_first,
add_history=add_history,
)
fcst_df = self._validate_outputs(
fcst_df=fcst_df,
Expand Down

0 comments on commit 6a7bcc4

Please sign in to comment.