Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update APIs to Tensorflow 1.0 #2

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions ddqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from keras.models import Sequential
from keras.layers import Convolution2D, Flatten, Dense

ENV_NAME = 'Breakout-v0' # Environment name
ENV_NAME = 'Pong-v0' # Environment name
FRAME_WIDTH = 84 # Resized frame width
FRAME_HEIGHT = 84 # Resized frame height
NUM_EPISODES = 12000 # Number of episodes the agent plays
Expand All @@ -30,7 +30,7 @@
MIN_GRAD = 0.01 # Constant added to the squared gradient in the denominator of the RMSProp update
SAVE_INTERVAL = 300000 # The frequency with which the network is saved
NO_OP_STEPS = 30 # Maximum number of "do nothing" actions to be performed by the agent at the start of an episode
LOAD_NETWORK = False
LOAD_NETWORK = True
TRAIN = True
SAVE_NETWORK_PATH = 'saved_networks/' + ENV_NAME
SAVE_SUMMARY_PATH = 'summary/' + ENV_NAME
Expand Down Expand Up @@ -71,12 +71,12 @@ def __init__(self, num_actions):
self.sess = tf.InteractiveSession()
self.saver = tf.train.Saver(q_network_weights)
self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary()
self.summary_writer = tf.train.SummaryWriter(SAVE_SUMMARY_PATH, self.sess.graph)
self.summary_writer = tf.summary.FileWriter(SAVE_SUMMARY_PATH, self.sess.graph)

if not os.path.exists(SAVE_NETWORK_PATH):
os.makedirs(SAVE_NETWORK_PATH)

self.sess.run(tf.initialize_all_variables())
self.sess.run(tf.global_variables_initializer())

# Load network
if LOAD_NETWORK:
Expand Down Expand Up @@ -105,7 +105,7 @@ def build_training_op(self, q_network_weights):

# Convert action to one hot vector
a_one_hot = tf.one_hot(a, self.num_actions, 1.0, 0.0)
q_value = tf.reduce_sum(tf.mul(self.q_values, a_one_hot), reduction_indices=1)
q_value = tf.reduce_sum(tf.multiply(self.q_values, a_one_hot), reduction_indices=1)

# Clip the error, the loss is quadratic when the error is in (-1, 1), and linear outside of that region
error = tf.abs(y - q_value)
Expand All @@ -120,7 +120,7 @@ def build_training_op(self, q_network_weights):

def get_initial_state(self, observation, last_observation):
processed_observation = np.maximum(observation, last_observation)
processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT)) * 255)
processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT), mode='reflect') * 255)
state = [processed_observation for _ in range(STATE_LENGTH)]
return np.stack(state, axis=0)

Expand Down Expand Up @@ -234,17 +234,17 @@ def train_network(self):

def setup_summary(self):
episode_total_reward = tf.Variable(0.)
tf.scalar_summary(ENV_NAME + '/Total Reward/Episode', episode_total_reward)
tf.summary.scalar(ENV_NAME + '/Total Reward/Episode', episode_total_reward)
episode_avg_max_q = tf.Variable(0.)
tf.scalar_summary(ENV_NAME + '/Average Max Q/Episode', episode_avg_max_q)
tf.summary.scalar(ENV_NAME + '/Average Max Q/Episode', episode_avg_max_q)
episode_duration = tf.Variable(0.)
tf.scalar_summary(ENV_NAME + '/Duration/Episode', episode_duration)
tf.summary.scalar(ENV_NAME + '/Duration/Episode', episode_duration)
episode_avg_loss = tf.Variable(0.)
tf.scalar_summary(ENV_NAME + '/Average Loss/Episode', episode_avg_loss)
tf.summary.scalar(ENV_NAME + '/Average Loss/Episode', episode_avg_loss)
summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]
summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]
update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]
summary_op = tf.merge_all_summaries()
summary_op = tf.summary.merge_all()
return summary_placeholders, update_ops, summary_op

def load_network(self):
Expand All @@ -268,7 +268,7 @@ def get_action_at_test(self, state):

def preprocess(observation, last_observation):
processed_observation = np.maximum(observation, last_observation)
processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT)) * 255)
processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT), mode='reflect') * 255)
return np.reshape(processed_observation, (1, FRAME_WIDTH, FRAME_HEIGHT))


Expand Down
24 changes: 12 additions & 12 deletions dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from keras.models import Sequential
from keras.layers import Convolution2D, Flatten, Dense

ENV_NAME = 'Breakout-v0' # Environment name
ENV_NAME = 'Pong-v0' # Environment name
FRAME_WIDTH = 84 # Resized frame width
FRAME_HEIGHT = 84 # Resized frame height
NUM_EPISODES = 12000 # Number of episodes the agent plays
Expand All @@ -30,7 +30,7 @@
MIN_GRAD = 0.01 # Constant added to the squared gradient in the denominator of the RMSProp update
SAVE_INTERVAL = 300000 # The frequency with which the network is saved
NO_OP_STEPS = 30 # Maximum number of "do nothing" actions to be performed by the agent at the start of an episode
LOAD_NETWORK = False
LOAD_NETWORK = True
TRAIN = True
SAVE_NETWORK_PATH = 'saved_networks/' + ENV_NAME
SAVE_SUMMARY_PATH = 'summary/' + ENV_NAME
Expand Down Expand Up @@ -71,12 +71,12 @@ def __init__(self, num_actions):
self.sess = tf.InteractiveSession()
self.saver = tf.train.Saver(q_network_weights)
self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary()
self.summary_writer = tf.train.SummaryWriter(SAVE_SUMMARY_PATH, self.sess.graph)
self.summary_writer = tf.summary.FileWriter(SAVE_SUMMARY_PATH, self.sess.graph)

if not os.path.exists(SAVE_NETWORK_PATH):
os.makedirs(SAVE_NETWORK_PATH)

self.sess.run(tf.initialize_all_variables())
self.sess.run(tf.global_variables_initializer())

# Load network
if LOAD_NETWORK:
Expand Down Expand Up @@ -105,7 +105,7 @@ def build_training_op(self, q_network_weights):

# Convert action to one hot vector
a_one_hot = tf.one_hot(a, self.num_actions, 1.0, 0.0)
q_value = tf.reduce_sum(tf.mul(self.q_values, a_one_hot), reduction_indices=1)
q_value = tf.reduce_sum(tf.multiply(self.q_values, a_one_hot), reduction_indices=1)

# Clip the error, the loss is quadratic when the error is in (-1, 1), and linear outside of that region
error = tf.abs(y - q_value)
Expand All @@ -120,7 +120,7 @@ def build_training_op(self, q_network_weights):

def get_initial_state(self, observation, last_observation):
processed_observation = np.maximum(observation, last_observation)
processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT)) * 255)
processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT), mode='reflect') * 255)
state = [processed_observation for _ in range(STATE_LENGTH)]
return np.stack(state, axis=0)

Expand Down Expand Up @@ -232,17 +232,17 @@ def train_network(self):

def setup_summary(self):
episode_total_reward = tf.Variable(0.)
tf.scalar_summary(ENV_NAME + '/Total Reward/Episode', episode_total_reward)
tf.summary.scalar(ENV_NAME + '/Total Reward/Episode', episode_total_reward)
episode_avg_max_q = tf.Variable(0.)
tf.scalar_summary(ENV_NAME + '/Average Max Q/Episode', episode_avg_max_q)
tf.summary.scalar(ENV_NAME + '/Average Max Q/Episode', episode_avg_max_q)
episode_duration = tf.Variable(0.)
tf.scalar_summary(ENV_NAME + '/Duration/Episode', episode_duration)
tf.summary.scalar(ENV_NAME + '/Duration/Episode', episode_duration)
episode_avg_loss = tf.Variable(0.)
tf.scalar_summary(ENV_NAME + '/Average Loss/Episode', episode_avg_loss)
tf.summary.scalar(ENV_NAME + '/Average Loss/Episode', episode_avg_loss)
summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]
summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]
update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]
summary_op = tf.merge_all_summaries()
summary_op = tf.summary.merge_all()
return summary_placeholders, update_ops, summary_op

def load_network(self):
Expand All @@ -266,7 +266,7 @@ def get_action_at_test(self, state):

def preprocess(observation, last_observation):
processed_observation = np.maximum(observation, last_observation)
processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT)) * 255)
processed_observation = np.uint8(resize(rgb2gray(processed_observation), (FRAME_WIDTH, FRAME_HEIGHT), mode='reflect') * 255)
return np.reshape(processed_observation, (1, FRAME_WIDTH, FRAME_HEIGHT))


Expand Down