-
Notifications
You must be signed in to change notification settings - Fork 0
/
intelligent_agent.py
44 lines (33 loc) · 1.18 KB
/
intelligent_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import gym # pip install gym
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent # pip install keras-rl2
from rl.policy import BoltzmannQPolicy # important to have gym==0.25.2
from rl.memory import SequentialMemory
env = gym.make("CartPole-v1") # no render mode to prevent display while training
states = env.observation_space.shape[0]
actions = env.action_space.n
print(states)
print(actions)
model = Sequential()
model.add(Flatten(input_shape=(1, states)))
model.add(Dense(24, activation="relu"))
model.add(Dense(24, activation="relu"))
model.add(Dense(actions, activation="linear"))
agent = DQNAgent(
model=model,
memory=SequentialMemory(limit=50000, window_length=1),
policy=BoltzmannQPolicy(),
nb_actions=actions,
nb_steps_warmup=10,
target_model_update=0.01
)
print(env)
print(env.observation_space)
agent.compile(Adam(lr=0.001), metrics=["mae"])
agent.fit(env, nb_steps=10000, visualize=False, verbose=1)
results = agent.test(env, nb_episodes=10, visualize=True)
print(np.mean(results.history["episode_reward"]))
env.close()