From 97f874abcff1bbfee98d7598bb28a2314fbbf262 Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Mon, 4 Mar 2019 01:05:46 +0530 Subject: [PATCH 1/9] Added AlphaDropout which is used in SNNs. --- src/layers/normalise.jl | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 1783d3ef04..821a9d99f0 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -43,6 +43,37 @@ end _testmode!(a::Dropout, test) = (a.active = !test) +""" + AlphaDropout(p) +A dropout layer. It is used in Self-Normalizing Neural Networks. +(https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf) +The AlphaDropout layer ensures that mean and variance of activations remains the same as before. +""" +mutable struct AlphaDropout{F} + p::F + active::Bool +end + +function AlphaDropout(p) + @assert 0 ≤ p ≤ 1 + AlphaDropout{typeof(p)}(p,true) +end + +function (a::AlphaDropout)(x) + a.active || return x + α = -1.75813631 + noise = randn(Float64, size(x.data)) + y = collect(x) + y .= y .* (noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p)) + A = (a.p + a.p * (1 - a.p) * α ^ 2)^0.5 + B = -A * α * (1 - a.p) + y .= A .* y .+ B + x1 = param(y) + return x1 +end + +_testmode!(a::AlphaDropout, test) = (a.active = !test) + """ LayerNorm(h::Integer) From b5533ee00b617b5cc7c47be054283dcd9e9c5b2e Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Mon, 4 Mar 2019 01:09:05 +0530 Subject: [PATCH 2/9] Exported AlphaDropout --- src/Flux.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Flux.jl b/src/Flux.jl index 32982131ab..f234950f6e 100644 --- a/src/Flux.jl +++ b/src/Flux.jl @@ -7,7 +7,7 @@ using MacroTools, Juno, Requires, Reexport, Statistics, Random using MacroTools: @forward export Chain, Dense, RNN, LSTM, GRU, Conv, ConvTranspose, MaxPool, MeanPool, - DepthwiseConv, Dropout, LayerNorm, BatchNorm, + DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm, params, mapleaves, cpu, gpu, f32, f64 @reexport using NNlib From 922e9c9bc28cc890ed8500671de14597d09c9b43 Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Mon, 4 Mar 2019 01:10:12 +0530 Subject: [PATCH 3/9] Updated docs with AlphaDropout --- docs/src/models/layers.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md index e8c31e235b..e904ed6514 100644 --- a/docs/src/models/layers.md +++ b/docs/src/models/layers.md @@ -50,5 +50,6 @@ These layers don't affect the structure of the network but may improve training Flux.testmode! BatchNorm Dropout +AlphaDropout LayerNorm ``` From 29b853e0bb1c96ce72f5b4f96655c0f16da6b851 Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Mon, 4 Mar 2019 22:17:19 +0530 Subject: [PATCH 4/9] Made sure Gradients are not lost. --- src/layers/normalise.jl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 821a9d99f0..cc222bf9c3 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -63,13 +63,11 @@ function (a::AlphaDropout)(x) a.active || return x α = -1.75813631 noise = randn(Float64, size(x.data)) - y = collect(x) - y .= y .* (noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p)) + x.data .= x.data .* (noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p)) A = (a.p + a.p * (1 - a.p) * α ^ 2)^0.5 B = -A * α * (1 - a.p) - y .= A .* y .+ B - x1 = param(y) - return x1 + x.data .= A .* x.data .+ B + return x end _testmode!(a::AlphaDropout, test) = (a.active = !test) From d6608682fcbad1689b1466a325cb7f8de5352534 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Tue, 5 Mar 2019 16:18:50 +0530 Subject: [PATCH 5/9] Suggested changes made --- src/layers/normalise.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index cc222bf9c3..f4e0f186a9 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -61,12 +61,12 @@ end function (a::AlphaDropout)(x) a.active || return x - α = -1.75813631 - noise = randn(Float64, size(x.data)) - x.data .= x.data .* (noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p)) + α = eltype(x)(-1.75813631) + noise = randn(eltype(x), size(x)) + x = @. x*(noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p)) A = (a.p + a.p * (1 - a.p) * α ^ 2)^0.5 B = -A * α * (1 - a.p) - x.data .= A .* x.data .+ B + x = @. A .* x .+ B return x end From 8e5965ac41abe87088af7f2cdf06f90a46982af9 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Tue, 5 Mar 2019 16:28:05 +0530 Subject: [PATCH 6/9] Indentation fixed --- src/layers/normalise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index f4e0f186a9..3b8628b2a5 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -50,7 +50,7 @@ A dropout layer. It is used in Self-Normalizing Neural Networks. The AlphaDropout layer ensures that mean and variance of activations remains the same as before. """ mutable struct AlphaDropout{F} - p::F + p::F active::Bool end From f4543b7adf764c0cd43d7446bf97303c3e849750 Mon Sep 17 00:00:00 2001 From: thebhatman Date: Fri, 8 Mar 2019 03:21:26 +0530 Subject: [PATCH 7/9] Value of alpha updated and dot operations changed --- src/layers/normalise.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 3b8628b2a5..2323b731e8 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -61,12 +61,14 @@ end function (a::AlphaDropout)(x) a.active || return x - α = eltype(x)(-1.75813631) + λ = 1.0507009873554804934193349852946 + α = 1.6732632423543772848170429916717 + α1 = eltype(x)(-λ*α) noise = randn(eltype(x), size(x)) - x = @. x*(noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p)) - A = (a.p + a.p * (1 - a.p) * α ^ 2)^0.5 - B = -A * α * (1 - a.p) - x = @. A .* x .+ B + x = @. x*(noise > (1 - a.p)) + α1 * (noise <= (1 - a.p)) + A = (a.p + a.p * (1 - a.p) * α1 ^ 2)^0.5 + B = -A * α1 * (1 - a.p) + x = @. A * x + B return x end From 1d310d4532a8f031936849ad9d7e2ffdc1becbc5 Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Thu, 7 Mar 2019 21:55:26 +0530 Subject: [PATCH 8/9] Removed {typeof(p)} --- src/layers/normalise.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 2323b731e8..b81226f6e0 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -56,7 +56,7 @@ end function AlphaDropout(p) @assert 0 ≤ p ≤ 1 - AlphaDropout{typeof(p)}(p,true) + AlphaDropout(p,true) end function (a::AlphaDropout)(x) From c6e51f5cc2c32c844e89b79a4d4cb39d7802bd3f Mon Sep 17 00:00:00 2001 From: Manjunath Bhat Date: Thu, 7 Mar 2019 23:42:38 +0530 Subject: [PATCH 9/9] Made lambda and alpha of eltype(x) --- src/layers/normalise.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl index 7aeaeadebd..5fd93e9db2 100644 --- a/src/layers/normalise.jl +++ b/src/layers/normalise.jl @@ -61,8 +61,8 @@ end function (a::AlphaDropout)(x) a.active || return x - λ = 1.0507009873554804934193349852946 - α = 1.6732632423543772848170429916717 + λ = eltype(x)(1.0507009873554804934193349852946) + α = eltype(x)(1.6732632423543772848170429916717) α1 = eltype(x)(-λ*α) noise = randn(eltype(x), size(x)) x = @. x*(noise > (1 - a.p)) + α1 * (noise <= (1 - a.p))