-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrnn.py
70 lines (49 loc) · 1.94 KB
/
rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
File for the base Recurrent-Neural-Net model. NO IMPORTS ALLOWED!
"""
import numpy as np
from loss_functions import MSE_loss
from nonlinearity import Softmax
HIDDEN_LAYER_SIZE = 100
RANDOMIZER_CONSTANT = 1250
LEARNING_RATE = 0.001
class Base_RNN:
def __init__(self, input_dims, output_dims, hidden_layer_size=HIDDEN_LAYER_SIZE):
"""
Initializes the RNN, given input, output dims and hidden layer size.
Weights are initialized to random values, and bias vectors are all just 1s.
"""
# RANDOMIZER_CONSTANT to make sure initial weights aren't too large
self.U = np.randn(hidden_layer_size, input_dims) / RANDOMIZER_CONSTANT
# intermediary
self.W = np.randn(hidden_layer_size, hidden_layer_size) / RANDOMIZER_CONSTANT
# final
self.V = np.randn(output_dims, hidden_layer_size)/ RANDOMIZER_CONSTANT
#biases
self.b1 = np.zeros((hidden_layer_size, 1))
self.b2 = np.zeros((output_dims, 1))
self._hidden_layer_size = hidden_layer_size
self._input_dims = input_dims
self.hidden_state = np.zeros((self._hidden_layer_size, 1))
def forward_pass(self, input):
"""
Forward pass for the RNN
each input is a word from the sequence, and so it is in vector form,
"""
self.hidden_state = np.tanh(self.U @ input + self.W @ self.hidden_state + self.b1)
z = self.V @ self.hidden_state + self.b2
return z, self.hidden_state
def backward(self, y, t):
L = MSE_loss(y, t)
# TODO
dU = None
dW = None
dV = None
db1 = None
db2 = None
# only have gradient updates below
self.U = self.U - LEARNING_RATE * dU
self.W = self.W - LEARNING_RATE * dW
self.V = self.V - LEARNING_RATE * dV
self.b1 = self.b1 - LEARNING_RATE * db1
self.b2 = self.b2 - LEARNING_RATE * db2