-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
56 lines (46 loc) · 1.22 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from random import random
from env import Backgammon
from random import randint
import random
import tensorflow as tf
import numpy as np
RED = 1
BLACK = 0
model = tf.keras.models.load_model('model/policy.model')
env = Backgammon()
agent = randint(BLACK, RED)
env.reset()
done = False
score = 0
roll = []
while not done:
if len(roll) == 0:
roll = [randint(1, 6),randint(1, 6)]
if roll[0] == roll[1]:
roll.append(roll[0])
roll.append(roll[0])
if agent == RED:
agent = BLACK
else:
agent = RED
board = []
board.extend(env.state.points)
board.extend(env.state.barCheckers)
board.extend(env.state.homeCheckers)
board.append(roll[0])
board.append(roll[1])
board.append(agent)
print(len(board))
input = tf.keras.utils.normalize(board)
predictions = model.predict([input])
topThree = np.argsort(predictions[0])[-3:]
action = None
while True:
action = random.choice(topThree)
if env.state.isValidMove(agent,action):
break
else:
action = random.choice(topThree)
n_state, reward, done, info = env.step(agent,action)
score += reward
print(score)