-
Notifications
You must be signed in to change notification settings - Fork 1
/
q-learning.py
168 lines (138 loc) · 9.63 KB
/
q-learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import numpy as np
import random
print("We are looking for a maze!")
adjacence= np.array([[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
"""
This function will generate a random grid 4*4.
It also process an adjacence matrix for the next step of resolution
"""
def generate_grid_and_adjacence():
grid_barrier = np.array([[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],
[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],
[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]],
[[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1],[-1,-1,-1,-1]]])
#We construct a random map
#O is index of possible wall to the left.
#1 is index of possible wall to the Top.
#2 is index of possible wall to the right.
#3 is index of possible wall to the bottom.
for i in range (0,len(grid_barrier)):
for j in range (0,len(grid_barrier[0])):
for k in range (0,len(grid_barrier[0][0])):
if (grid_barrier[i][j][k] == -1):
grid_barrier[i][j][k] = random.randint(0, 1) # random number between 0 and 1 : generate a barrier
# propagation of wall
if ( k == 0 ) & ( i > 0 ) :
grid_barrier[i-1][j][2] = grid_barrier[i][j][k]
if ( k == 1 ) & ( j > 0 ) :
grid_barrier[i][j-1][3] = grid_barrier[i][j][k]
if ( k == 2 ) & ( i < 3 ) :
grid_barrier[i+1][j][0] = grid_barrier[i][j][k]
if ( k == 3 ) & ( j < 3 ) :
grid_barrier[i][j+1][1] = grid_barrier[i][j][k]
else :
print("nothing")
#All the points of the grid/maze are reshape with the following order
# 0 | 4 | 8 | 12
# 1 | 5 | 9 | 13
# 2 | 6 | 10 | 14
# 3 | 7 | 11 | 15
# We process Adjacency matrix: our matrix is symetrical ( non-oriented graph )
# (Adjacency_matrix^N indicates all the path of size N in the map )
for i in range (0,len(grid_barrier)):
for j in range (0,len(grid_barrier[0])):
for k in range (0,len(grid_barrier[0][0])):
if ( (grid_barrier[i][j][0] == 0) & ( 4 * (i) + j -4 > 0 ) & (abs( 4* i + j - (4 * (i) + j -4) ) < 6 ) & ( i > 0) ) :
adjacence[4* i + j] [ 4 * (i) + j -4] = 1 # these 2 case are linked
adjacence[ 4 * (i) + j -4][4* i + j] = 1 # these 2 case are linked
if ( (grid_barrier[i][j][1] == 0) & ( 4 * (i) + j -1 > 0 ) & (abs( 4* i + j - ( 4 * i-1 + j )) < 6 ) & ( j > 0) ):
adjacence[4* i + j] [ 4 * (i) + j -1 ] = 1 # these 2 case are linked
adjacence[ 4 * i-1 + j ][4* i + j] = 1 # these 2 case are linked
if ( (grid_barrier[i][j][2] == 0) & ( 4 * (i) + j + 4 < 16 ) & (abs( 4* i + j - ( 4 * (i) + j + 4 ) ) < 6 ) & ( j < 3 )) :
adjacence[4* i + j] [ 4 * (i) + j + 4] = 1 # these 2 case are linked
adjacence[ 4 * (i) + j + 4][4* i + j] = 1 # these 2 case are linked
if ( (grid_barrier[i][j][3] == 0) & ( 4 * i+1 + j < 16 ) & (abs( 4* i + j - ( 4 * (i) + j + 1 ) ) < 6 ) & (i < 3) ):
adjacence[4* i + j] [ 4 * i+1 + j] = 1 # these 2 case are linked
adjacence[ 4 *i + j + 1] [4* i + j] = 1 # these 2 case are linked
if ( (grid_barrier[i][j][0] == 1) & (grid_barrier[i][j][1] == 1) & ( 4 * i+ j -5 > 0 ) ) : # no line between i and i-1
adjacence[4* i + j] [ 4 * i+ j -5] = 0 # these 2 case are not linked
adjacence[ 4 * i+ j -5] [4* i + j] = 0 # these 2 case are not linked
if ( (grid_barrier[i][j][1] == 1) & (grid_barrier[i][j][2] == 1) & ( 4 * i + j + 3 < 16 ) ) : # no line between i and j+1
adjacence[4* i + j] [ 4 * i + j + 3] = 0 # these 2 case are not linked
adjacence[ 4 * i + j + 3] [4* i + j] = 0
if ( (grid_barrier[i][j][2] == 1) & (grid_barrier[i][j][3] == 1) & ( 4 * i + j + 5 < 16 ) ) : # no line between i and i-1
adjacence[4* i + j] [ 4 * i + j + 5] = 0 # these 2 case are not linked
adjacence[ 4 * i + j + 5 ][4* i + j] = 0
if ( (grid_barrier[i][j][3] == 1) & (grid_barrier[i][j][0] == 1) & ( 4 *i + j -3 > 0 ) ) : # no line between i and i-1
adjacence[4* i + j] [ 4 *i + j -3 ] = 0 # these 2 case are not linked
adjacence[4 *i + j -3 ][4* i + j] = 0
#refilter diag : we check 4 configurations / per diagonal where two case of maze are not linked
#diag sup right
if ( (4 * (i) + j +3 < 16) & (i < 3) & (j > 0) ):
if ( ( (grid_barrier[i+1][j-1][0] == 1) & ( grid_barrier[i+1][j-1][3] == 1 ) ) | ( (grid_barrier[i][j][2] == 1) & ( grid_barrier[i][j][1] == 1 ) ) | ( (grid_barrier[i][j][1] == 1) & ( grid_barrier[i+1][j][1] == 1 ) ) | ( (grid_barrier[i][j][2] == 1) & ( grid_barrier[i+1][j-1][2] == 1 ) ) ):
adjacence[4* i + j] [ 4 * i + j +3] = 0 # these 2 case are not linked
adjacence[ 4 * i + j +3] [4* i + j] = 0 # these 2 case are not linked
#diag infer right
if ( 4 * i + j + 5 < 15 ) & (i < 3) & (j < 3) :
if ( (grid_barrier[i][j][2] == 1) & (grid_barrier[i][j][3] == 1 ) ) | ( (grid_barrier[i][j][3] == 1) & (grid_barrier[i+1][j][3] == 1 ) ) | ( (grid_barrier[i][j][2] == 1) & (grid_barrier[i][j+1][2] == 1 )) | ( (grid_barrier[i+1][j+1][0] == 1) & (grid_barrier[i+1][j+1][1] == 1 ) ) :
adjacence[4* i + j] [ 4 * i + j + 5] = 0 # these 2 case are not linked
adjacence[ 4 * i + j +5] [4* i + j] = 0 # these 2 case are not linked
#diag sup left
if ( (4 * (i) + j -5) > 0 ) & (i > 0) & (j > 0) :
if ( (grid_barrier[i][j][0] == 1) & (grid_barrier[i][j][1] == 1) ) | ( (grid_barrier[i-1][j-1][3] == 1) & (grid_barrier[i][j-1][3] == 1) ) | ( (grid_barrier[i][j][0] == 1) & (grid_barrier[i][j-1][0] == 1) ) | ( (grid_barrier[i-1][j-1][3] == 1) & (grid_barrier[i-1][j-1][2] == 1) ) :
adjacence[4* i + j] [ 4 * (i) + j -5] = 0 # these 2 case are not linked
adjacence[ 4 * (i) + j -5] [4* i + j] = 0 # these 2 case are not linked
#diag inf left
if ( (4 * (i) + j -3) > 0 ) & (i > 0) & (j < 3):
if ( (grid_barrier[i][j][0] == 1) & (grid_barrier[i][j][3] == 1 ) ) | ( (grid_barrier[i][j][3] == 1) & (grid_barrier[i-1][j +1][3] == 1 ) ) | ( (grid_barrier[i][j][0] == 1) & (grid_barrier[i][j+1][0] == 1 ) ) | ( (grid_barrier[i-1][j+1][0] == 1) & (grid_barrier[i][j][3] == 1 )) | ( (grid_barrier[i][j][0] == 1) & (grid_barrier[i][j][3] == 1 ) ) :
adjacence[4* i + j] [ 4 * (i-1) + j +1] = 0 # these 2 case are not linked
adjacence[ 4 * (i-1) + j +1] [4* i + j] = 0 # these 2 case are not linked
print("Display grid")
print(grid_barrier)
print("Display Adjacence")
print(adjacence)
"""
This function will solve the problem of maze.
The kpi process is number of path which could solve the problem.
So we introduce basically the following "loss" ;
more the grid is difficult : less the number of path to solve the issue is large.
"""
def resolve_maze_process_kpi(start, end, treasure_grid):
print("Resolve maze and find treasure")
maze = adjacence
puissance = adjacence
# we process all the paths of different length
for i in range (2,15):
maze = maze + adjacence.dot(puissance)
puissance = adjacence.dot(puissance)
print(maze)
# With a starting point a treasure point and an exit point we process our KPI 'difficulty'
print("search" , maze[start][treasure_grid])
print("exit" , maze[treasure_grid][end] )
if ( (maze[start][treasure_grid] > 0) & (maze[treasure_grid][end] > 0 ) ) :
print("Loss/difficulty of generated grid" , maze[start][treasure_grid]+maze[treasure_grid][end] )
else :
print("Impossible to solve")
"""
Start program
"""
def main():
generate_grid_and_adjacence()
resolve_maze_process_kpi(0,10,4)
if __name__ == "__main__":
main()