-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathbuilders.jl
132 lines (105 loc) · 4.45 KB
/
builders.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# # BUILDING CHAINS A FROM HYPERPARAMETERS + INPUT/OUTPUT SHAPE
# We introduce chain builders as a way of exposing neural network
# hyperparameters (describing, architecture, dropout rates, etc) to
# the MLJ user. These parameters generally exclude the input/output
# dimensions/shapes, as the MLJ fit methods will determine these from
# the training data. A `Builder` object stores the parameters and an
# associated `fit` method builds a corresponding chain given the
# input/output dimensions/shape.
# Below n or (n1, n2) etc refers to network inputs, while m or (m1,
# m2) etc refers to outputs.
abstract type Builder <: MLJModelInterface.MLJType end
"""
Linear(; σ=Flux.relu)
MLJFlux builder that constructs a fully connected two layer network with activation
function `σ`. The number of input and output nodes is determined from the data. Weights
are initialized using `Flux.glorot_uniform(rng)`, where `rng` is inferred from the `rng`
field of the MLJFlux model.
"""
mutable struct Linear <: Builder
σ
end
Linear(; σ=Flux.relu) = Linear(σ)
build(builder::Linear, rng, n::Integer, m::Integer) =
Flux.Chain(Flux.Dense(n, m, builder.σ, init=Flux.glorot_uniform(rng)))
"""
Short(; n_hidden=0, dropout=0.5, σ=Flux.sigmoid)
MLJFlux builder that constructs a full-connected three-layer network
using `n_hidden` nodes in the hidden layer and the specified `dropout`
(defaulting to 0.5). An activation function `σ` is applied between the
hidden and final layers. If `n_hidden=0` (the default) then `n_hidden`
is the geometric mean of the number of input and output nodes. The
number of input and output nodes is determined from the data.
Each layer is initialized using `Flux.glorot_uniform(rng)`, where `rng` is inferred from
the `rng` field of the MLJFlux model.
"""
mutable struct Short <: Builder
n_hidden::Int # if zero use geometric mean of input/output
dropout::Float64
σ
end
Short(; n_hidden=0, dropout=0.5, σ=Flux.sigmoid) = Short(n_hidden, dropout, σ)
function build(builder::Short, rng, n, m)
n_hidden =
builder.n_hidden == 0 ? round(Int, sqrt(n*m)) : builder.n_hidden
init=Flux.glorot_uniform(rng)
Flux.Chain(
Flux.Dense(n, n_hidden, builder.σ, init=init),
Flux.Dropout(builder.dropout; rng),
Flux.Dense(n_hidden, m, init=init))
end
"""
MLP(; hidden=(100,), σ=Flux.relu)
MLJFlux builder that constructs a Multi-layer perceptron network. The ith element of
`hidden` represents the number of neurons in the ith hidden layer. An activation function
`σ` is applied between each layer.
Each layer is initialized using `Flux.glorot_uniform(rng)`, where `rng` is inferred from
the `rng` field of the MLJFlux model.
"""
mutable struct MLP{N} <: MLJFlux.Builder
hidden::NTuple{N, Int} # count first and last layer
σ
end
MLP(; hidden=(100,), σ=Flux.relu) = MLP(hidden, σ)
function MLJFlux.build(mlp::MLP, rng, n_in, n_out)
init=Flux.glorot_uniform(rng)
hidden = Any[Flux.Dense(n_in, mlp.hidden[1], mlp.σ, init=init)]
for i ∈ 2:length(mlp.hidden)
push!(hidden, Flux.Dense(mlp.hidden[i-1],
mlp.hidden[i],
mlp.σ,
init=init))
end
push!(hidden, Flux.Dense(mlp.hidden[end], n_out, init=init))
return Flux.Chain(hidden... )
end
# # BUILER MACRO
struct GenericBuilder{F} <: Builder
apply::F
end
"""
@builder neural_net
Creates a builder for `neural_net`. The variables `rng`, `n_in`, `n_out` and
`n_channels` can be used to create builders for any random number generator `rng`,
input and output sizes `n_in` and `n_out` and number of input channels `n_channels`.
# Examples
```jldoctest
julia> import MLJFlux: @builder;
julia> nn = NeuralNetworkRegressor(builder = @builder(Chain(Dense(n_in, 64, relu),
Dense(64, 32, relu),
Dense(32, n_out))));
julia> conv_builder = @builder begin
front = Chain(Conv((3, 3), n_channels => 16), Flux.flatten)
d = Flux.outputsize(front, (n_in..., n_channels, 1)) |> first
Chain(front, Dense(d, n_out));
end
julia> conv_nn = NeuralNetworkRegressor(builder = conv_builder);
```
"""
macro builder(ex)
esc(quote
MLJFlux.GenericBuilder((rng, n_in, n_out, n_channels) -> $ex)
end)
end
build(b::GenericBuilder, rng, n_in, n_out, n_channels = 1) =
b.apply(rng, n_in, n_out, n_channels)