Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/auto arima #77

Merged
merged 5 commits into from
Jun 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions arbitragelab/cointegration_approach/johansen.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,13 @@ def fit(self, price_data: pd.DataFrame, dependent_variable: str = None, det_orde
hedge_ratios = cointegration_vectors.iloc[vector].to_dict()
for ticker, ratio in hedge_ratios.items():
if ticker != dependent_variable:
hedge_ratios[ticker] = -ratio / hedge_ratios[dependent_variable]
hedge_ratios[dependent_variable] = 1.0
# Set value to be list to make it easier to read into pandas DataFrame
hedge_ratios[ticker] = [-ratio / hedge_ratios[dependent_variable]]
# Set value to be list to make it easier to read into pandas DataFrame
hedge_ratios[dependent_variable] = [1.0]

# Add all to one dataframe
all_hedge_ratios = all_hedge_ratios.append(hedge_ratios, ignore_index=True)
all_hedge_ratios = all_hedge_ratios[price_data.columns]
# Concat together in one DataFrame
all_hedge_ratios = pd.concat([all_hedge_ratios, pd.DataFrame(hedge_ratios)])

self.hedge_ratios = all_hedge_ratios

Expand Down
90 changes: 60 additions & 30 deletions arbitragelab/time_series_approach/arima_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_trend_order(y_train: pd.Series, max_order: int = 10) -> int:
stationarity_flag = True

# Avoiding infinite loop
if (order >= max_order):
if order >= max_order:
stationarity_flag = True

order += 1
Expand All @@ -56,7 +56,9 @@ class AutoARIMAForecast:
Auto ARIMA forecast generator function.
"""

def __init__(self, start_p: int = 0, start_q: int = 0, max_p: int = 5, max_q: int = 5):
def __init__(
self, start_p: int = 0, start_q: int = 0, max_p: int = 5, max_q: int = 5
):
"""
Init AutoARIMA (p, i, q) prediction class.

Expand All @@ -74,11 +76,13 @@ def __init__(self, start_p: int = 0, start_q: int = 0, max_p: int = 5, max_q: in
self.arima_model = None
self.y_train = None

segment.track('AutoARIMAForecast')
segment.track("AutoARIMAForecast")

def get_best_arima_model(self, y_train: pd.Series, verbose: bool = False, silence_warnings: bool = True):
def get_best_arima_model(
self, y_train: pd.Series, verbose: bool = False, silence_warnings: bool = True
):
"""
Using the AIC approach from pmdarima library, choose the best fir ARIMA(d, p, q) parameters.
Using the AIC approach from pmdarima library, choose the best fit ARIMA(d, p, q) parameters.

:param y_train: (pd.Series) Training series.
:param verbose: (bool) Flag to print model fit logs.
Expand All @@ -95,17 +99,25 @@ def get_best_arima_model(self, y_train: pd.Series, verbose: bool = False, silenc
context = nullcontext()

with context: # Silencing Warnings

if silence_warnings:
warnings.filterwarnings('ignore')
warnings.filterwarnings("ignore")

# Fitting the ARIMA model without warnings
self.arima_model = auto_arima(y=y_train, d=trend_order, start_p=self.start_p, start_q=self.start_q,
max_p=self.max_p, max_q=self.max_q,
max_order=self.max_q + self.max_p + trend_order, trace=verbose)
self.arima_model = auto_arima(
y=y_train,
d=trend_order,
start_p=self.start_p,
start_q=self.start_q,
max_p=self.max_p,
max_q=self.max_q,
max_order=self.max_q + self.max_p + trend_order,
trace=verbose,
)

@staticmethod
def _print_progress(iteration, max_iterations, prefix='', suffix='', decimals=1, bar_length=50):
def _print_progress(
iteration, max_iterations, prefix="", suffix="", decimals=1, bar_length=50
):
# pylint: disable=expression-not-assigned
"""
Calls in a loop to create a terminal progress bar.
Expand All @@ -126,17 +138,22 @@ def _print_progress(iteration, max_iterations, prefix='', suffix='', decimals=1,
filled_length = int(round(bar_length * iteration / float(max_iterations)))

# Fill the bar
block = '█' * filled_length + '-' * (bar_length - filled_length)
block = "█" * filled_length + "-" * (bar_length - filled_length)
# Print new line
sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, block, percents, '%', suffix)),
sys.stdout.write("\r%s |%s| %s%s %s" % (prefix, block, percents, "%", suffix)),

if iteration == max_iterations:
sys.stdout.write('\n')
sys.stdout.write("\n")
sys.stdout.flush()

# pylint: disable=invalid-name
def predict(self, y: pd.Series, retrain_freq: int = 1, train_window: int = None,
silence_warnings: bool = True) -> pd.Series:
def predict(
self,
y: pd.Series,
retrain_freq: int = 1,
train_window: int = None,
silence_warnings: bool = True,
) -> pd.Series:
"""
Predict out-of-sample series using already fit ARIMA model. The algorithm retrains the model with `retrain_freq`
either by appending new observations to train data (`train_window` = None) or by using the latest `train_window`
Expand All @@ -160,31 +177,44 @@ def predict(self, y: pd.Series, retrain_freq: int = 1, train_window: int = None,
context = nullcontext()

with context: # Silencing Warnings

if silence_warnings:
warnings.filterwarnings('ignore')
warnings.filterwarnings("ignore")

# Iterating through observations
for i in range(1, y.shape[0]):
if retrain_idx >= retrain_freq: # Retraining model
for i in range(1, len(y)):
if retrain_idx >= retrain_freq: # Retraining model
retrain_idx = 0

if train_window is None: # If no window provided, fitting to all previous observations
if (
train_window is None
): # If no training window, fit to all previous observations
# i-1 to avoid look-ahead bias.
prediction.loc[y.index[i]] = \
self.arima_model.fit_predict(self.y_train.append(y.iloc[:i - 1]), n_periods=1)[0]
out_of_sample_y_train = pd.concat(
[self.y_train, y.iloc[: i - 1]]
)
prediction.loc[y.index[i]] = self.arima_model.fit_predict(
out_of_sample_y_train, n_periods=1
).values[0]

else:
prediction.loc[y.index[i]] = \
self.arima_model.fit_predict(self.y_train.iloc[(-1 * train_window):].append(y.iloc[:i - 1]),
n_periods=1)[0]

else: # Using trained model
prediction.loc[y.index[i]] = self.arima_model.predict(n_periods=1)[0]
out_of_sample_y_train = pd.concat(
[self.y_train.iloc[-1 * train_window :], y.iloc[: i - 1]]
)
prediction.loc[y.index[i]] = self.arima_model.fit_predict(
out_of_sample_y_train,
n_periods=1,
).values[0]

else: # Using trained model
prediction.loc[y.index[i]] = self.arima_model.predict(
n_periods=1
).values[0]

retrain_idx += 1

# Print progress to inform user
self._print_progress(i + 1, y.shape[0], prefix='Progress:', suffix='Complete')
self._print_progress(
i + 1, y.shape[0], prefix="Progress:", suffix="Complete"
)

return prediction
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ analytics-python>=1.2.7, <2.0.0
arch==5.5.0
cvxpy==1.3.1
cython==0.29.28
dash>=1.0.0, <2.0.0
dash==2.10.2
getmac>=0.8.0, <1.0.0
jupyter-dash>=0.2.0, <1.0.0
keras==2.12.0
Expand All @@ -25,7 +25,7 @@ seaborn==0.12.2
statsmodels>=0.9.0, <1.0.0
tensorflow-macos==2.12.0; sys_platform == 'darwin' and platform_machine == 'arm64'
tensorflow==2.12.0; sys_platform != 'darwin' or platform_machine != 'arm64'
werkzeug==2.0.0
werkzeug==2.2.3
yahoo-fin==0.8.6
yfinance==0.2.18

Expand Down