Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Print the state of Dropout etc. #2222

Merged
merged 10 commits into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 25 additions & 9 deletions src/functor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,48 @@ using SparseArrays: AbstractSparseArray
"""
testmode!(m, mode = true)

Set a layer or model's test mode (see below).
Using `:auto` mode will treat any gradient computation as training.
Set a layer, or all layers in a model, to test mode.
This disables the effect of [`Dropout`](@ref), and similar layers.

_Note_: if you manually set a model into test mode, you need to manually place
it back into train mode during training phase.

Possible values include:
- `false` for training
Possible values of optional 2nd argument `mode` are:
- `true` for testing
- `:auto` or `nothing` for Flux to detect the mode automatically
- `false` for training, same as [`trainmode!`](@ref)
- `:auto` or `nothing` for Flux to detect training automatically.

# Example

```jldoctest
julia> d = Dropout(0.3)
Dropout(0.3)

julia> testmode!(d) # dropout is now always disabled
Dropout(0.3, active=false)

julia> trainmode!(d) # dropout is now always enabled
Dropout(0.3, active=true)

julia> trainmode!(d, :auto) # back to default
Dropout(0.3)
```
"""
testmode!(m, mode = true) = (foreach(x -> testmode!(x, mode), trainable(m)); m)

"""
trainmode!(m, mode = true)

Set a layer of model's train mode (see below).
Symmetric to [`testmode!`](@ref) (i.e. `trainmode!(m, mode) == testmode!(m, !mode)`).
Set a layer, or all layers in a model, to training mode.
Opposite to [`testmode!`](@ref) (i.e. `trainmode!(m, mode) == testmode!(m, !mode)`).

_Note_: if you manually set a model into train mode, you need to manually place
it into test mode during testing phase.

Possible values include:
Possible values of optional 2nd argument `mode` are:
- `true` for training
- `false` for testing
- `:auto` or `nothing` for Flux to detect the mode automatically
- `:auto` or `nothing` for Flux to detect training automatically
"""
trainmode!(m, mode = true) = mode isa Bool ? testmode!(m, !mode) : testmode!(m, mode)

Expand Down
54 changes: 34 additions & 20 deletions src/layers/normalise.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
# Internal function, used only for layers defined in this file.
_isactive(m, x) = isnothing(m.active) ? NNlib.within_gradient(x) : m.active

# Internal function, used only in this file.
_tidy_active(mode::Bool) = mode
_tidy_active(::Nothing) = nothing
_tidy_active(mode) = mode === :auto ? nothing : throw(ArgumentError("active = $(repr(mode)) is not accepted, must be true/false/nothing or :auto"))

"""
Dropout(p; [dims, rng])
Dropout(p; [dims, rng, active])

Layer implementing [dropout](https://arxiv.org/abs/1207.0580) with the given probability.
This is used as a regularisation, i.e. to reduce overfitting.
Expand All @@ -12,7 +17,8 @@ or else scales it by `1 / (1 - p)`, using the [`NNlib.dropout`](@ref) function.
While testing, it has no effect.

By default the mode will switch automatically, but it can also
be controlled manually via [`Flux.testmode!`](@ref).
be controlled manually via [`Flux.testmode!`](@ref),
or by passing keyword `active=true` for training mode.

By default every input is treated independently. With the `dims` keyword,
instead it takes a random choice only along that dimension.
Expand All @@ -36,7 +42,11 @@ julia> m(ones(2, 7)) # test mode, no effect
2.0 2.0 2.0 2.0 2.0 2.0 2.0
2.0 2.0 2.0 2.0 2.0 2.0 2.0

julia> Flux.trainmode!(m); # equivalent to use within gradient
julia> Flux.trainmode!(m) # equivalent to use within gradient
Chain(
Dense(2 => 3), # 9 parameters
Dropout(0.4, active=true),
)

julia> m(ones(2, 7))
3×7 Matrix{Float64}:
Expand All @@ -63,9 +73,9 @@ mutable struct Dropout{F<:Real,D,R<:AbstractRNG}
end
Dropout(p::Real, dims, active) = Dropout(p, dims, active, default_rng_value())

function Dropout(p::Real; dims=:, rng = default_rng_value())
function Dropout(p::Real; dims=:, active = nothing, rng = default_rng_value())
0 ≤ p ≤ 1 || throw(ArgumentError("Dropout expects 0 ≤ p ≤ 1, got p = $p"))
Dropout(p, dims, nothing, rng)
Dropout(p, dims, _tidy_active(active), rng)
end

@functor Dropout
Expand All @@ -74,16 +84,17 @@ trainable(a::Dropout) = (;)
(a::Dropout)(x) = dropout(a.rng, x, a.p * _isactive(a, x); dims=a.dims)

testmode!(m::Dropout, mode=true) =
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
(m.active = isnothing(_tidy_active(mode)) ? nothing : !mode; m)

function Base.show(io::IO, d::Dropout)
print(io, "Dropout(", d.p)
d.dims != (:) && print(io, ", dims = $(repr(d.dims))")
d.dims != (:) && print(io, ", dims=", d.dims)
d.active == nothing || print(io, ", active=", d.active)
print(io, ")")
end

"""
AlphaDropout(p; rng = default_rng_value())
AlphaDropout(p; [rng, active])

A dropout layer. Used in
[Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515).
Expand Down Expand Up @@ -114,11 +125,11 @@ mutable struct AlphaDropout{F,R<:AbstractRNG}
rng::R
function AlphaDropout(p, active, rng)
@assert 0 ≤ p ≤ 1
new{typeof(p), typeof(rng)}(p, active, rng)
new{typeof(p), typeof(rng)}(p, _tidy_active(active), rng)
end
end
AlphaDropout(p, active) = AlphaDropout(p, active, default_rng_value())
AlphaDropout(p; rng = default_rng_value()) = AlphaDropout(p, nothing, rng)
AlphaDropout(p; rng = default_rng_value(), active=nothing) = AlphaDropout(p, active, rng)

@functor AlphaDropout
trainable(a::AlphaDropout) = (;)
Expand All @@ -138,7 +149,7 @@ function (a::AlphaDropout)(x::AbstractArray{T}) where T
end

testmode!(m::AlphaDropout, mode=true) =
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
(m.active = isnothing(_tidy_active(mode)) ? nothing : !mode; m)

"""
LayerNorm(size..., λ=identity; affine=true, ϵ=1fe-5)
Expand Down Expand Up @@ -310,7 +321,7 @@ end

function BatchNorm(chs::Int, λ=identity;
initβ=zeros32, initγ=ones32,
affine=true, track_stats=true,
affine=true, track_stats=true, active=nothing,
ϵ=1f-5, momentum=0.1f0)

β = affine ? initβ(chs) : nothing
Expand All @@ -321,7 +332,7 @@ function BatchNorm(chs::Int, λ=identity;
return BatchNorm(λ, β, γ,
μ, σ², ϵ, momentum,
affine, track_stats,
nothing, chs)
_tidy_active(active), chs)
end

@functor BatchNorm
Expand All @@ -335,12 +346,13 @@ function (BN::BatchNorm)(x::AbstractArray{T,N}) where {T,N}
end

testmode!(m::BatchNorm, mode=true) =
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
(m.active = isnothing(_tidy_active(mode)) ? nothing : !mode; m)

function Base.show(io::IO, l::BatchNorm)
print(io, "BatchNorm($(l.chs)")
(l.λ == identity) || print(io, ", $(l.λ)")
hasaffine(l) || print(io, ", affine=false")
l.active == nothing || print(io, ", active=", l.active)
print(io, ")")
end

Expand Down Expand Up @@ -399,7 +411,7 @@ end

function InstanceNorm(chs::Int, λ=identity;
initβ=zeros32, initγ=ones32,
affine=false, track_stats=false,
affine=false, track_stats=false, active=nothing,
ϵ=1f-5, momentum=0.1f0)

β = affine ? initβ(chs) : nothing
Expand All @@ -410,7 +422,7 @@ function InstanceNorm(chs::Int, λ=identity;
return InstanceNorm(λ, β, γ,
μ, σ², ϵ, momentum,
affine, track_stats,
nothing, chs)
_tidy_active(active), chs)
end

@functor InstanceNorm
Expand All @@ -424,12 +436,13 @@ function (l::InstanceNorm)(x::AbstractArray{T,N}) where {T,N}
end

testmode!(m::InstanceNorm, mode=true) =
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
(m.active = isnothing(_tidy_active(mode)) ? nothing : !mode; m)

function Base.show(io::IO, l::InstanceNorm)
print(io, "InstanceNorm($(l.chs)")
l.λ == identity || print(io, ", $(l.λ)")
hasaffine(l) || print(io, ", affine=false")
l.active == nothing || print(io, ", active=", l.active)
print(io, ")")
end

Expand Down Expand Up @@ -495,7 +508,7 @@ trainable(gn::GroupNorm) = hasaffine(gn) ? (β = gn.β, γ = gn.γ) : (;)

function GroupNorm(chs::Int, G::Int, λ=identity;
initβ=zeros32, initγ=ones32,
affine=true, track_stats=false,
affine=true, track_stats=false, active=nothing,
ϵ=1f-5, momentum=0.1f0)

if track_stats
Expand All @@ -514,7 +527,7 @@ end
μ, σ²,
ϵ, momentum,
affine, track_stats,
nothing, chs)
_tidy_active(active), chs)
end

function (gn::GroupNorm)(x::AbstractArray)
Expand All @@ -529,13 +542,14 @@ function (gn::GroupNorm)(x::AbstractArray)
end

testmode!(m::GroupNorm, mode = true) =
(m.active = (isnothing(mode) || mode == :auto) ? nothing : !mode; m)
(m.active = isnothing(_tidy_active(mode)) ? nothing : !mode; m)

function Base.show(io::IO, l::GroupNorm)
# print(io, "GroupNorm($(join(size(l.β), ", "))", ", ", l.G)
print(io, "GroupNorm($(l.chs), $(l.G)")
l.λ == identity || print(io, ", ", l.λ)
hasaffine(l) || print(io, ", affine=false")
l.active == nothing || print(io, ", active=", l.active)
print(io, ")")
end

Expand Down