From 97f874abcff1bbfee98d7598bb28a2314fbbf262 Mon Sep 17 00:00:00 2001
From: Manjunath Bhat <manjunathbhat9920@gmail.com>
Date: Mon, 4 Mar 2019 01:05:46 +0530
Subject: [PATCH 1/9] Added AlphaDropout which is used in SNNs.

---
 src/layers/normalise.jl | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
index 1783d3ef04..821a9d99f0 100644
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@@ -43,6 +43,37 @@ end
 
 _testmode!(a::Dropout, test) = (a.active = !test)
 
+"""
+    AlphaDropout(p)
+A dropout layer. It is used in Self-Normalizing Neural Networks. 
+(https://papers.nips.cc/paper/6698-self-normalizing-neural-networks.pdf)
+The AlphaDropout layer ensures that mean and variance of activations remains the same as before.
+"""
+mutable struct AlphaDropout{F}
+	p::F
+  active::Bool
+end
+
+function AlphaDropout(p)
+  @assert 0 ≤ p ≤ 1
+  AlphaDropout{typeof(p)}(p,true)
+end
+
+function (a::AlphaDropout)(x)
+  a.active || return x
+  α = -1.75813631
+  noise = randn(Float64, size(x.data))
+  y = collect(x)
+  y .= y .* (noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p))
+  A = (a.p + a.p * (1 - a.p) * α ^ 2)^0.5
+  B = -A * α * (1 - a.p)
+  y .= A .* y .+ B
+  x1 = param(y)
+  return x1
+end
+
+_testmode!(a::AlphaDropout, test) = (a.active = !test)
+
 """
     LayerNorm(h::Integer)
 

From b5533ee00b617b5cc7c47be054283dcd9e9c5b2e Mon Sep 17 00:00:00 2001
From: Manjunath Bhat <manjunathbhat9920@gmail.com>
Date: Mon, 4 Mar 2019 01:09:05 +0530
Subject: [PATCH 2/9] Exported AlphaDropout

---
 src/Flux.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Flux.jl b/src/Flux.jl
index 32982131ab..f234950f6e 100644
--- a/src/Flux.jl
+++ b/src/Flux.jl
@@ -7,7 +7,7 @@ using MacroTools, Juno, Requires, Reexport, Statistics, Random
 using MacroTools: @forward
 
 export Chain, Dense, RNN, LSTM, GRU, Conv, ConvTranspose, MaxPool, MeanPool,
-       DepthwiseConv, Dropout, LayerNorm, BatchNorm,
+       DepthwiseConv, Dropout, AlphaDropout, LayerNorm, BatchNorm,
        params, mapleaves, cpu, gpu, f32, f64
 
 @reexport using NNlib

From 922e9c9bc28cc890ed8500671de14597d09c9b43 Mon Sep 17 00:00:00 2001
From: Manjunath Bhat <manjunathbhat9920@gmail.com>
Date: Mon, 4 Mar 2019 01:10:12 +0530
Subject: [PATCH 3/9] Updated docs with AlphaDropout

---
 docs/src/models/layers.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/src/models/layers.md b/docs/src/models/layers.md
index e8c31e235b..e904ed6514 100644
--- a/docs/src/models/layers.md
+++ b/docs/src/models/layers.md
@@ -50,5 +50,6 @@ These layers don't affect the structure of the network but may improve training
 Flux.testmode!
 BatchNorm
 Dropout
+AlphaDropout
 LayerNorm
 ```

From 29b853e0bb1c96ce72f5b4f96655c0f16da6b851 Mon Sep 17 00:00:00 2001
From: Manjunath Bhat <manjunathbhat9920@gmail.com>
Date: Mon, 4 Mar 2019 22:17:19 +0530
Subject: [PATCH 4/9] Made sure Gradients are not lost.

---
 src/layers/normalise.jl | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
index 821a9d99f0..cc222bf9c3 100644
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@@ -63,13 +63,11 @@ function (a::AlphaDropout)(x)
   a.active || return x
   α = -1.75813631
   noise = randn(Float64, size(x.data))
-  y = collect(x)
-  y .= y .* (noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p))
+  x.data .= x.data .* (noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p))
   A = (a.p + a.p * (1 - a.p) * α ^ 2)^0.5
   B = -A * α * (1 - a.p)
-  y .= A .* y .+ B
-  x1 = param(y)
-  return x1
+  x.data .= A .* x.data .+ B
+  return x
 end
 
 _testmode!(a::AlphaDropout, test) = (a.active = !test)

From d6608682fcbad1689b1466a325cb7f8de5352534 Mon Sep 17 00:00:00 2001
From: thebhatman <manjunathbhat9920@gmail.com>
Date: Tue, 5 Mar 2019 16:18:50 +0530
Subject: [PATCH 5/9] Suggested changes made

---
 src/layers/normalise.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
index cc222bf9c3..f4e0f186a9 100644
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@@ -61,12 +61,12 @@ end
 
 function (a::AlphaDropout)(x)
   a.active || return x
-  α = -1.75813631
-  noise = randn(Float64, size(x.data))
-  x.data .= x.data .* (noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p))
+  α = eltype(x)(-1.75813631)
+  noise = randn(eltype(x), size(x))
+  x = @. x*(noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p))
   A = (a.p + a.p * (1 - a.p) * α ^ 2)^0.5
   B = -A * α * (1 - a.p)
-  x.data .= A .* x.data .+ B
+  x = @. A .* x .+ B
   return x
 end
 

From 8e5965ac41abe87088af7f2cdf06f90a46982af9 Mon Sep 17 00:00:00 2001
From: thebhatman <manjunathbhat9920@gmail.com>
Date: Tue, 5 Mar 2019 16:28:05 +0530
Subject: [PATCH 6/9] Indentation fixed

---
 src/layers/normalise.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
index f4e0f186a9..3b8628b2a5 100644
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@@ -50,7 +50,7 @@ A dropout layer. It is used in Self-Normalizing Neural Networks.
 The AlphaDropout layer ensures that mean and variance of activations remains the same as before.
 """
 mutable struct AlphaDropout{F}
-	p::F
+  p::F
   active::Bool
 end
 

From f4543b7adf764c0cd43d7446bf97303c3e849750 Mon Sep 17 00:00:00 2001
From: thebhatman <manjunathbhat9920@gmail.com>
Date: Fri, 8 Mar 2019 03:21:26 +0530
Subject: [PATCH 7/9] Value of alpha updated and dot operations changed

---
 src/layers/normalise.jl | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
index 3b8628b2a5..2323b731e8 100644
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@@ -61,12 +61,14 @@ end
 
 function (a::AlphaDropout)(x)
   a.active || return x
-  α = eltype(x)(-1.75813631)
+  λ = 1.0507009873554804934193349852946
+  α = 1.6732632423543772848170429916717
+  α1 = eltype(x)(-λ*α)
   noise = randn(eltype(x), size(x))
-  x = @. x*(noise .> (1 - a.p)) + α .* (noise .<= (1 - a.p))
-  A = (a.p + a.p * (1 - a.p) * α ^ 2)^0.5
-  B = -A * α * (1 - a.p)
-  x = @. A .* x .+ B
+  x = @. x*(noise > (1 - a.p)) + α1 * (noise <= (1 - a.p))
+  A = (a.p + a.p * (1 - a.p) * α1 ^ 2)^0.5
+  B = -A * α1 * (1 - a.p)
+  x = @. A * x + B
   return x
 end
 

From 1d310d4532a8f031936849ad9d7e2ffdc1becbc5 Mon Sep 17 00:00:00 2001
From: Manjunath Bhat <manjunathbhat9920@gmail.com>
Date: Thu, 7 Mar 2019 21:55:26 +0530
Subject: [PATCH 8/9] Removed {typeof(p)}

---
 src/layers/normalise.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
index 2323b731e8..b81226f6e0 100644
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@@ -56,7 +56,7 @@ end
 
 function AlphaDropout(p)
   @assert 0 ≤ p ≤ 1
-  AlphaDropout{typeof(p)}(p,true)
+  AlphaDropout(p,true)
 end
 
 function (a::AlphaDropout)(x)

From c6e51f5cc2c32c844e89b79a4d4cb39d7802bd3f Mon Sep 17 00:00:00 2001
From: Manjunath Bhat <manjunathbhat9920@gmail.com>
Date: Thu, 7 Mar 2019 23:42:38 +0530
Subject: [PATCH 9/9] Made lambda and alpha of eltype(x)

---
 src/layers/normalise.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/layers/normalise.jl b/src/layers/normalise.jl
index 7aeaeadebd..5fd93e9db2 100644
--- a/src/layers/normalise.jl
+++ b/src/layers/normalise.jl
@@ -61,8 +61,8 @@ end
 
 function (a::AlphaDropout)(x)
   a.active || return x
-  λ = 1.0507009873554804934193349852946
-  α = 1.6732632423543772848170429916717
+  λ = eltype(x)(1.0507009873554804934193349852946)
+  α = eltype(x)(1.6732632423543772848170429916717)
   α1 = eltype(x)(-λ*α)
   noise = randn(eltype(x), size(x))
   x = @. x*(noise > (1 - a.p)) + α1 * (noise <= (1 - a.p))