forked from MultiAgentLearning/playground
-
Notifications
You must be signed in to change notification settings - Fork 4
/
train.sh
executable file
·143 lines (122 loc) · 3.49 KB
/
train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/bin/bash
#
#This script generates a number of games/playouts/rollouts using worker.py and then trains a neural network on those games using optimize_nn.py.
#Playouts are stored in game_dir. Neural network checkpoints for each iteration/loop step are stored in nn_model_dir
#
#Example call of this script: source train.sh params log.txt
#
#Consumes params file and log file.
#params are defined in pommerman/agents/nn_model/optimize_nn.py
#params should contain:
#n_workers
#start_iteration --note if not blank/0, you should have a checkpoint file (.pt) in your nn_model_dir that is = start_iteration - 1
#max_loop_step
#game_dir
#nn_model_dir
#env_id
#opponent
#Note that 1v1 games are currently not supported by worker.py as the code makes assumptions about having a teammate.
#if a key is missing, default will be attempted (will likely fail if directory paths don't exist)
#if games fail to generate, you will get an assert(len(gameBuffer) == buffer_size) error
if [ $# -lt 2 ] ; then
echo "Usage: $0 param_config_file log_file "
exit 1
fi
#Run multiple workers in parallel
param_config_file=$1
log_file=$2
#clear the log_file
echo ' ' > $log_file
n_workers=0
max_loop_step=0
game_dir="GAMES"
nn_model_dir="NN_MODELS"
env_id="PommeTeamCompetition-v0"
start_iteration=0
opponent="static"
n_games_per_worker=10
buffer_size=100
n_epochs=1
save_param=true
params=''
while IFS=":" read key data
do
echo "${key} : ${data}"
if [[ ${key} == 'n_workers' ]]; then
n_workers=${data}
fi
if [[ ${key} == 'start_iteration' ]]; then
start_iteration=${data}
save_param=false
fi
if [[ ${key} == 'max_loop_step' ]]; then
max_loop_step=${data}
max_loop_step=$((max_loop_step + start_iteration))
fi
if [[ ${key} == 'game_dir' ]]; then
game_dir=${data}
fi
if [[ ${key} == 'nn_model_dir' ]]; then
nn_model_dir=${data}
fi
if [[ ${key} == 'env_id' ]]; then
env_id=${data}
fi
if [[ ${key} == 'opponent' ]]; then
opponent=${data}
fi
if [[ ${key} == 'n_games_per_worker' ]]; then
n_games_per_worker=${data}
fi
if [[ ${key} == 'buffer_size' ]]; then
buffer_size=${data}
fi
if [[ ${key} == 'n_epochs' ]]; then
n_epochs=${data}
fi
if [ "$save_param" = true ] ; then
a="--${key}=${data} "
params=$params$a
fi
save_param=true
done < $param_config_file
echo "---all params: $params"
echo "n_workers: $n_workers"
echo "start_iteration: $start_iteration"
echo "max_loop_step: $max_loop_step"
echo "game_dir: $game_dir"
echo "nn_model_dir: $nn_model_dir"
echo "env_id: $env_id"
echo "opponent: $opponent"
echo "n_games_per_worker: $n_games_per_worker"
echo "buffer_size: $buffer_size"
echo "n_epochs: $n_epochs"
visible_gpus="$CUDA_VISIBLE_DEVICES"
IFS=',' read -r -a ARR <<< "$visible_gpus"
cnt=${#ARR[@]}
if [[ $cnt == 0 ]] ; then
cnt=$((cnt+1))
fi
echo "cnt == $cnt"
#${ARR[0]}, ${ARR[1]}
play_games() {
echo -e "play games: $1 processes in parallel"
hname=`hostname`
N=$1
for((i=0; i<$N; i++))
do
SEED=$(($RANDOM))
k=$((i%cnt))
python pommerman/agents/worker.py ${params} --device_id="$k" 2>&1 >>$log_file &
done
wait
}
for ((ite=$start_iteration; ite<$max_loop_step; ite++)) do
echo "iteration $ite"
play_games $n_workers
wait
echo "finished working, optimize nn"
python pommerman/agents/optimize_nn.py ${params} --iteration=$ite 2>&1 >>$log_file
wait
done
echo "Done"