Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use latest stable_baseline3 #7

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 91 additions & 35 deletions env/StockTradingEnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,55 @@

LOOKBACK_WINDOW_SIZE = 40


def factor_pairs(val):
return [(i, val / i) for i in range(1, int(val**0.5)+1) if val % i == 0]

# np.seterr(all='raise')

class StockTradingEnv(gym.Env):
"""A stock trading environment for OpenAI gym"""
metadata = {'render.modes': ['live', 'file', 'none']}
visualization = None

def __init__(self, df):
super(StockTradingEnv, self).__init__()

"""stock trading gym environment

"""
metadata = {'render.modes':['live','human','file','none']}

def __init__(self, df) -> None:
super(StockTradingEnv,self).__init__()

self.df = self._adjust_prices(df)
self.reward_range = (0, MAX_ACCOUNT_BALANCE)

# Actions of the format Buy x%, Sell x%, Hold, etc.
self.action_space = spaces.Box(
low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16)

# Prices contains the OHCL values for the last five prices
self.observation_space = spaces.Box(
low=0, high=1, shape=(5, LOOKBACK_WINDOW_SIZE + 2), dtype=np.float16)

self.visualization = None

"""
Reward, we want to incentivize profit that is sustained over long periods of time.
At each step, we will set the reward to the account balance multiplied by
some fraction of the number of time steps so far.
The purpose of this is to delay rewarding the agent too fast in the early stages
and allow it to explore sufficiently before optimizing a single strategy too deeply.
It will also reward agents that maintain a higher balance for longer,
rather than those who rapidly gain money using unsustainable strategies.
"""
self.reward_range = (0, MAX_ACCOUNT_BALANCE)

"""
Action space that has a discrete number of action types (buy, sell, and hold),
as well as a continuous spectrum of amounts to buy/sell
(0-100% of the account balance/position size respectively).
You’ll notice the amount is not necessary for the hold action,
but will be provided anyway. Our agent does not initially know this,
but over time should learn that the amount is extraneous for this action.
"""
self.action_space = spaces.Box(low=np.array([0,0]), high=np.array([3,1]),dtype=np.float16)

"""
Observation_space contains all of the input variables we want our agent to consider before making,
or not making a trade. We want our agent to “see” the forex data points
(open price, high, low, close, and daily volume) for the last five days,
as well a couple other data points like its account balance, current stock positions, and current profit.
The intuition here is that for each time step, we want our agent to consider the price action
leading up to the current price, as well as their own portfolio’s status in order to make
an informed decision for the next action.
"""
self.observation_space = spaces.Box(low=0, high=1, shape=(5, LOOKBACK_WINDOW_SIZE + 2), dtype=np.float16)

def _adjust_prices(self, df):
adjust_ratio = df['Adjusted_Close'] / df['Close']

Expand All @@ -50,7 +75,7 @@ def _adjust_prices(self, df):
df['Close'] = df['Close'] * adjust_ratio

return df

def _next_observation(self):
frame = np.zeros((5, LOOKBACK_WINDOW_SIZE + 1))

Expand Down Expand Up @@ -80,6 +105,7 @@ def _next_observation(self):
return obs

def _take_action(self, action):
# Set the current price to a random price within the time step
current_price = random.uniform(
self.df.loc[self.current_step, "Open"], self.df.loc[self.current_step, "Close"])

Expand All @@ -94,28 +120,30 @@ def _take_action(self, action):
additional_cost = shares_bought * current_price

self.balance -= additional_cost
self.cost_basis = (
prev_cost + additional_cost) / (self.shares_held + shares_bought)
if (self.shares_held + shares_bought) != 0 :
self.cost_basis = (prev_cost + additional_cost) / (self.shares_held + shares_bought)
else:
self.cost_basis = 0

self.shares_held += shares_bought

if shares_bought > 0:
self.trades.append({'step': self.current_step,
'shares': shares_bought, 'total': additional_cost,
'type': "buy"})

elif action_type < 2:
# Sell amount % of shares held
shares_sold = int(self.shares_held * amount)
self.balance += shares_sold * current_price
self.shares_held -= shares_sold
self.total_shares_sold += shares_sold
self.total_sales_value += shares_sold * current_price

if shares_sold > 0:
self.trades.append({'step': self.current_step,
'shares': shares_sold, 'total': shares_sold * current_price,
'type': "sell"})

self.net_worth = self.balance + self.shares_held * current_price

if self.net_worth > self.max_net_worth:
Expand All @@ -124,6 +152,24 @@ def _take_action(self, action):
if self.shares_held == 0:
self.cost_basis = 0

def step(self, action):
# Execute one time step within the environment
self._take_action(action)

self.current_step += 1

if self.current_step > len(self.df.loc[:, 'Open'].values) - 6:
self.current_step = 0

delay_modifier = (self.current_step / MAX_STEPS)

reward = self.balance * delay_modifier
done = self.net_worth <= 0

obs = self._next_observation()

return obs, reward, done, {}

def step(self, action):
# Execute one time step within the environment
self._take_action(action)
Expand Down Expand Up @@ -151,7 +197,7 @@ def reset(self):
self.total_sales_value = 0
self.current_step = 0
self.trades = []

self.visualization = None
return self._next_observation()

def _render_to_file(self, filename='render.txt'):
Expand All @@ -171,21 +217,31 @@ def _render_to_file(self, filename='render.txt'):

file.close()

def render(self, mode='live', **kwargs):
def render(self, mode='live', title=None, **kwargs):
# Render the environment to the screen
if mode == 'human':
profit = self.net_worth - INITIAL_ACCOUNT_BALANCE

print(f'Step: {self.current_step}')
print(f'Balance: {self.balance}')
print(
f'Shares held: {self.shares_held} (Total sold: {self.total_shares_sold})')
print(
f'Avg cost for held shares: {self.cost_basis} (Total sales value: {self.total_sales_value})')
print(
f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})')
print(f'Profit: {profit}')
if mode == 'file':
self._render_to_file(kwargs.get('filename', 'render.txt'))

elif mode == 'live':
if self.visualization == None:
self.visualization = StockTradingGraph(
self.df, kwargs.get('title', None))

if self.current_step > LOOKBACK_WINDOW_SIZE:
self.visualization.render(
self.current_step, self.net_worth, self.trades, window_size=LOOKBACK_WINDOW_SIZE)

self.visualization = StockTradingGraph(self.df, title)

if self.current_step > LOOKBACK_WINDOW_SIZE:
self.visualization.render(self.current_step, self.net_worth,
self.trades, window_size=LOOKBACK_WINDOW_SIZE)

def close(self):
if self.visualization != None:
self.visualization.close()
self.visualization = None
self.visualization = None
1 change: 1 addition & 0 deletions env/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__="0.0.2"
13 changes: 6 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import gym

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import PPO

from env.StockTradingEnv import StockTradingEnv

Expand All @@ -14,11 +12,12 @@
# The algorithms require a vectorized environment to run
env = DummyVecEnv([lambda: StockTradingEnv(df)])

model = PPO2(MlpPolicy, env, verbose=1)
model = PPO(MlpPolicy, env, verbose=1)
model.learn(total_timesteps=50)

obs = env.reset()

for i in range(len(df['Date'])):
action, _states = model.predict(obs)
obs, rewards, done, info = env.step(action)
env.render(title="MSFT")
env.render(mode='live')
19 changes: 8 additions & 11 deletions render/StockTradingGraph.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@


import numpy as np
import matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.dates as dates
from matplotlib import style

# finance module is no longer part of matplotlib
# see: https://github.com/matplotlib/mpl_finance
from mpl_finance import candlestick_ochl as candlestick

from datetime import datetime
# import mplfinance as mpf
from mplfinance.original_flavor import candlestick_ohlc as candlestick
style.use('dark_background')

VOLUME_CHART_HEIGHT = 0.33
Expand All @@ -21,15 +17,16 @@


def date2num(date):
converter = mdates.strpdate2num('%Y-%m-%d')
return converter(date)
converter = dates.datestr2num(datetime.strptime(date,'%Y-%m-%d').strftime('%Y-%m-%d'))
return converter


class StockTradingGraph:
"""A stock trading visualization using matplotlib made to render OpenAI gym environments"""

def __init__(self, df, title=None):
self.df = df
df['dt'] = pd.to_datetime(df['Date'])
self.net_worths = np.zeros(len(df['Date']))

# Create a figure on screen and set the title
Expand Down
1 change: 1 addition & 0 deletions render/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__="0.0.2"