-
Notifications
You must be signed in to change notification settings - Fork 1
/
statoil.q
168 lines (140 loc) · 6.17 KB
/
statoil.q
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/ Statoil competition at Kaggle - https://www.kaggle.com/c/statoil-iceberg-classifier-challenge/
\l p.q
np:.p.import `numpy
npar:{np[`array;<;x]};
fnpar:{np[`array;>;x]};
/ Data is in json
show "Reading data..."
train:.j.k (read0 `:train.json )0;
/ Use HH and HV data, and create a 3rd column, for RGB - 3 channels
band1:(npar (75,75)#/:) train`band_1;
band2:(npar (75,75)#/:) train`band_2;
Xtrain:flip `band_1`band_2`band_3!(npar band1;npar band2;npar (band1+band2)%2);
/ This code adds extra training data, by flipping the existing images
/band1:(npar (75,75)#/:) train`band_1;
/fband1:npar flip each band1;
/band2:(npar (75,75)#/:) train`band_2;
/fband2:npar flip each band2;
/band3:(band1+band2)%2;
/fband3:npar flip each band3;
/Xtrain:flip `band_1`band_2`band_3!(npar band1,fband1;npar band2,fband2;npar band3,fband3);
show "Data reading done...";
pyplot:.p.import `matplotlib.pyplot;
models:.p.import `keras.models;
layers:.p.import `keras.layers;
init:.p.import `keras.initializers;
opti:.p.import `keras.optimizers;
ms:.p.import `sklearn.model_selection;
p)from keras.models import Sequential;
p)from keras.layers import Flatten;
p)from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
p)l = list(['accuracy']);
l:.p.get`l;
/ Callback stuff - still testing ***
getCallbacks:{[filepath;patience]
.p.set[`filepath;filepath];
.p.set[`patience;patience];
es:.p.pyeval"EarlyStopping('val_loss', patience=patience, mode=\"min\")";
msave:.p.pyeval"ModelCheckpoint(filepath, save_best_only=True)";
:(es;msave)};
getModel:{
/ Start with Sequential(), then keep add[]ing to it.
/ Layer 1
gmodel:.p.eval"Sequential()";
gmodel[`add;<;layers[`Conv2D;<;64;pykwargs `kernel_size`activation`input_shape!(3 3;`relu;75 75 3)]];
gmodel[`add;<;layers[`MaxPooling2D;<;pykwargs `pool_size`strides!(3 3;2 2)]] gmodel[`add;<;layers[`Dropout;<;0.2]];
/ Layer 2
gmodel[`add;<;layers[`Conv2D;<;128;pykwargs `kernel_size`activation!(3 3;`relu)]];
gmodel[`add;<;layers[`MaxPooling2D;<;pykwargs `pool_size`strides!(2 2;2 2)]];
gmodel[`add;<;layers[`Dropout;<;0.2]];
/ Layer 3
gmodel[`add;<;layers[`Conv2D;<;128;pykwargs `kernel_size`activation!(3 3;`relu)]];
gmodel[`add;<;layers[`MaxPooling2D;<;pykwargs `pool_size`strides!(2 2;2 2)]];
gmodel[`add;<;layers[`Dropout;<;0.2]];
/ Layer 3
gmodel[`add;<;layers[`Conv2D;<;128;pykwargs `kernel_size`activation!(2 2;`relu)]];
gmodel[`add;<;layers[`MaxPooling2D;<;pykwargs `pool_size`strides!(1 1;1 1)]];
gmodel[`add;<;layers[`Dropout;<;0.2]];
/ Layer 3
gmodel[`add;<;layers[`Conv2D;<;128;pykwargs `kernel_size`activation!(2 2;`relu)]];
gmodel[`add;<;layers[`MaxPooling2D;<;pykwargs `pool_size`strides!(1 1;1 1)]];
gmodel[`add;<;layers[`Dropout;<;0.2]];
/Layer 4
gmodel[`add;<;layers[`Conv2D;<;64;pykwargs `kernel_size`activation!(3 3;`relu)]];
gmodel[`add;<;layers[`MaxPooling2D;<;pykwargs `pool_size`strides!(2 2;2 2)]];
gmodel[`add;<;layers[`Dropout;<;0.2]];
/ Flatten
/ Have to .p.eval or I get TypeError: "The added layer must be an instance of class Layer. Found: <class 'keras.layers.core.Flatten'>"
gmodel[`add;<;.p.eval"Flatten()"];
/Dense layer 1
gmodel[`add;<;layers[`Dense;<;512]];
gmodel[`add;<;layers[`Activation;<;`relu]];
gmodel[`add;<;layers[`Dropout;<;0.2]];
/ Dense layer 2
gmodel[`add;<;layers[`Dense;<;256]];
gmodel[`add;<;layers[`Activation;<;`relu]];
gmodel[`add;<;layers[`Dropout;<;0.2]];
/ Sigmoid layer
gmodel[`add;<;layers[`Dense;<;1]];
gmodel[`add;<;layers[`Activation;<;`sigmoid]];
adamopti:opti[`Adam;<;pykwargs `lr`beta_1`beta_2`epsilon`decay!(0.001;0.9;0.999;1e-8;0.0)];
gmodel[`compile;<;pykwargs `loss`optimizer!(`binary_crossentropy;adamopti)];
gmodel[`summary;<];
:gmodel};
y:train`is_iceberg;
/y:y,y; /extra data now
/ Split into train and validate sets
splitdata:ms[`train_test_split;<;Xtrain;y;pykwargs `random_state`train_size`test_size!(1;0.75;0.25)];
t:flip (splitdata 0)[`band_1`band_2`band_3];
Xtraincv:fnpar ((count t),75,75,3)#raze over t;
t:flip (splitdata 1)[`band_1`band_2`band_3];
Xvalid:fnpar ((count t),75,75,3)#raze over t;
Ytraincv:fnpar splitdata 2;
Yvalid:fnpar splitdata 3;
/ Training here
show "Training commences ...";
gmodel:getModel[];
/ Callback stuff - still testing ***
/filepath:raze string "model_weights.hdf5";
/callbacks:getCallbacks[filepath;5];
/.p.set[`es;callbacks 0 ];
/.p.set[`msave;callbacks 1];
/ Callback stuff - still testing ***
/ gmodel[`fit;<;Xtraincv;Ytraincv;`callbacks pykw .p.pyeval "list(callbacks)";pykwargs `batch_size`epochs`verbose`validation_data!(24;50;1;(Xvalid;Yvalid))];
/ Fit data and train here
gmodel[`fit;<;Xtraincv;Ytraincv;pykwargs `batch_size`epochs`verbose`validation_data!(24;50;1;(Xvalid;Yvalid))];
/.p.set[`gmodel;gmodel];
/.p.set[`Xtraincv;Xtraincv];
/.p.set[`Ytraincv;Ytraincv];
/.p.set[`Xvalid;Xvalid];
/.p.set[`Yvalid;Yvalid];
/p)gmodel.fit(Xtraincv,Ytraincv,callbacks=list([es,msave]),batch_size=24,epochs=50,verbose=1,validation_data=(Xvalid,Yvalid));
/ Callback stuff - still testing ***
/ Load the best weights
/ gmodel[`load_weights;`filepath pykw filepath]
/ Evaluate
scores:gmodel[`evaluate;<;Xvalid;Yvalid;`verbose pykw 1];
show "Scores:";
show scores;
/ Test data preprocessing here
show "Preprocessing test data now...";
test:.j.k (read0 `:test.json )0;
show "json read...";
testband1:(npar (75,75)#/:) test`band_1;
testband2:(npar (75,75)#/:) test`band_2;
Xtest:flip `band_1`band_2`band_3!(npar testband1;npar testband2;npar (testband1+testband2)%2);
Xtest:((count Xtest),75,75,3)#raze over Xtest;
Xtest:fnpar Xtest;
/ Predictions here
show "Running test, predicting...";
preds:gmodel[`predict_proba;<;Xtest;`verbose pykw 1];
/ Generate submission file for Kaggle
show "Generating submission file...";
.p.set[`preds;fnpar preds]
.p.set[`id;fnpar test`id]
p)import pandas as pd
p)submission = pd.DataFrame();
p)submission['id']=id;
p)submission['is_iceberg']=preds.reshape((preds.shape[0]));
p)submission.to_csv('sub.csv', index=False)
show "Done !";