-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathModel.py
127 lines (97 loc) · 4.43 KB
/
Model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import glob
import os
import sys
import random
import time
import numpy as np
import cv2
import math
import matplotlib.pyplot as plt
from collections import deque
from keras.applications.xception import Xception
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Sequential, Model
from keras.layers import Dense, GlobalAveragePooling2D, Input, Concatenate, Conv2D, AveragePooling2D, Activation, Flatten
from keras.optimizers import Adam
from keras.models import Model
from keras.callbacks import TensorBoard
import tensorflow as tf
import keras.backend.tensorflow_backend as backend
from threading import Thread
from Environment import *
class DQNAgent:
def __init__(self):
self.model = self.create_model()
self.target_model = self.create_model()
self.target_model.set_weights(self.model.get_weights())
self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
self.target_update_counter = 0
self.graph = tf.compat.v1.get_default_graph()
self.terminate = False
self.last_logged_episode = 0
self.training_initialized = False
def create_model(self):
IM_WIDTH = 640
IM_HEIGHT = 480
model = Sequential()
model.add(Conv2D(64, (3, 3), input_shape=(IM_HEIGHT, IM_WIDTH,3), padding='same'))
model.add(Activation('relu'))
model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same'))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same'))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(AveragePooling2D(pool_size=(5, 5), strides=(3, 3), padding='same'))
model.add(Flatten())
return model
def update_replay_memory(self, transition):
# transition = (current_state, action, reward, new_state, done)
self.replay_memory.append(transition)
def train(self):
if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
return
minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
current_states = np.array([transition[0] for transition in minibatch])/255
with self.graph.as_default():
current_qs_list = self.model.predict(current_states, PREDICTION_BATCH_SIZE)
new_current_states = np.array([transition[3] for transition in minibatch])/255
with self.graph.as_default():
future_qs_list = self.target_model.predict(new_current_states, PREDICTION_BATCH_SIZE)
X = []
y = []
for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
if not done:
max_future_q = np.max(future_qs_list[index])
new_q = reward + DISCOUNT * max_future_q
else:
new_q = reward
current_qs = current_qs_list[index]
current_qs[action] = new_q
X.append(current_state)
y.append(current_qs)
log_this_step = False
if self.tensorboard.step > self.last_logged_episode:
log_this_step = True
self.last_log_episode = self.tensorboard.step
with self.graph.as_default():
self.model.fit(np.array(X)/255, np.array(y), batch_size=TRAINING_BATCH_SIZE, verbose=0, shuffle=False, callbacks=[self.tensorboard] if log_this_step else None)
if log_this_step:
self.target_update_counter += 1
if self.target_update_counter > UPDATE_TARGET_EVERY:
self.target_model.set_weights(self.model.get_weights())
self.target_update_counter = 0
def get_qs(self, state):
return self.model.predict(np.array(state).reshape(-1, *state.shape)/255)[0]
def train_in_loop(self):
X = np.random.uniform(size=(1, IM_HEIGHT, IM_WIDTH, 3)).astype(np.float32)
y = np.random.uniform(size=(1, 3)).astype(np.float32)
with self.graph.as_default():
self.model.fit(X,y, verbose=False, batch_size=1)
self.training_initialized = True
while True:
if self.terminate:
return
self.train()
time.sleep(0.01)