-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.py
151 lines (122 loc) · 5.17 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
""" This file contains all input flags used by experiment_network.
"""
# ----- generic imports -----
import argparse
def parse_flags():
parser = argparse.ArgumentParser()
# ----- related to modelling the game -----
parser.add_argument('--N',
help='Number of nodes',
type=int,
default=100)
parser.add_argument('--clow',
help='Punishment for underflow',
type=int,
default=0)
parser.add_argument('--chigh',
help='Punishment for overflow',
type=int,
default=100)
parser.add_argument('--utility',
help='Reward for being alive (not over-flown or '
'under-flown)',
type=int,
default=8)
parser.add_argument('--capacity',
help='Capacity of nodes',
type=int,
default=3)
parser.add_argument('--K',
help='Number of adversaries',
type=int,
default=1)
parser.add_argument('--topology',
help='The network topology. Choose between Ring and '
'star.',
type=str,
default="ring")
parser.add_argument('--network_type',
help='The type of network determines the modelling of '
'nodes. Choose between A, B and C.',
type=str,
default="A")
# ----- related to learning parameters ------
parser.add_argument('--learning_rate',
help='Learning rate for temporal difference learning.',
type=float,
default=0.01)
parser.add_argument('--discount_factor',
help='Discount factor for temporal difference learning.',
type=float,
default=0.9)
parser.add_argument('--epsilon',
help='Exploration rate for temporal difference learning.',
type=float,
default=0.1)
parser.add_argument('--algorithm',
help='Indicates the learning algorithm used. Choose '
'between Qlearning, minimaxQ and RomQ.',
type=str,
default="Qlearning")
parser.add_argument('--adversary',
help='Choose adversarial policy. Choices are Qlearning '
' minimaxQ and RomQ.',
type=str,
default="Qlearning")
parser.add_argument('--horizon',
help='Number of iterations in episode',
type=int,
default=50)
# ----- configuring the simulation ------
parser.add_argument('--project',
help='Name of project',
type=str,
default="temp")
parser.add_argument('--trials',
help='Number of monte carlo trials.',
type=int,
default=5)
parser.add_argument('--epochs',
help='Number of epochs (for saving intermediate '
'results).',
type=int,
default=5)
parser.add_argument('--train_samples',
help='Number of training samples',
type=int,
default=1000000)
parser.add_argument('--eval_samples',
help='Number of evaluation samples',
type=int,
default=20000)
parser.add_argument('--evaluate',
help='Evaluate existing policies.',
default=False,
action="store_true")
parser.add_argument('--train',
help='Train new policies.',
default=False,
action="store_true")
parser.add_argument('--evaluate_interm',
help='Indicates whether all intermediate trained '
'policies will be evaluated. Otherwise, only the '
'policy after convergence is evaluated.',
default=False,
action="store_true")
parser.add_argument('--adversarial_interm',
help='Indicates whether intermediate adversarial '
'policies will be computed, stored and used for '
'evaluation.',
default=False,
action="store_true")
parser.add_argument('--attack_type',
help='Choose between rand, rand_nodes, rand_actions and'
' worst.',
type=str,
default="worst")
parser.add_argument('--eval_attack_prob',
help='Probability of attack during evaluation.',
type=float,
default=1)
args = parser.parse_args()
return args