LuxDL · avik-pal · Nov 15, 2022 · Aug 22, 2022 · Aug 22, 2022 · Aug 22, 2022
diff --git a/docs/src/api/utilities.md b/docs/src/api/utilities.md
@@ -14,6 +14,8 @@ Lux.gpu
 ```@docs
 Lux.glorot_normal
 Lux.glorot_uniform
+Lux.kaiming_normal
+Lux.kaiming_uniform
 Lux.ones32
 Lux.zeros32
 ```

diff --git a/src/autodiff.jl b/src/autodiff.jl
@@ -7,6 +7,8 @@ ChainRulesCore.@non_differentiable _get_reshape_dims(::Any, ::Any)
 ChainRulesCore.@non_differentiable compute_adaptive_pooling_dims(::Any, ::Any)
 ChainRulesCore.@non_differentiable glorot_normal(::Any...)
 ChainRulesCore.@non_differentiable glorot_uniform(::Any...)
+ChainRulesCore.@non_differentiable kaiming_normal(::Any...)
+ChainRulesCore.@non_differentiable kaiming_uniform(::Any...)
 ChainRulesCore.@non_differentiable check_use_cuda()
 ChainRulesCore.@non_differentiable istraining(::Any)
 ChainRulesCore.@non_differentiable _get_norm_except_dims(::Any, ::Any)

diff --git a/src/utils.jl b/src/utils.jl
@@ -65,6 +65,40 @@ function glorot_normal(rng::AbstractRNG, dims::Integer...; gain::Real=1)
     return randn(rng, Float32, dims...) .* std
 end
 
+"""
+    kaiming_uniform(rng::AbstractRNG, size...; gain = √2f0)
+
+Return an `Array{Float32}` of the given `size` containing random numbers drawn from a
+uniform distribution on the interval `[-x, x]`, where `x = gain * sqrt(3/fan_in)`.
+
+# References
+
+[1] He, Kaiming, et al. "Delving deep into rectifiers: Surpassing human-level performance on
+imagenet classification." _Proceedings of the IEEE international conference on computer
+vision_. 2015.
+"""
+function kaiming_uniform(rng::AbstractRNG, dims::Integer...; gain::Real=√2.0f0)
+    bound = Float32(√3.0f0 * gain / sqrt(first(_nfan(dims...))))
+    return (rand(rng, Float32, dims...) .- 0.5f0) .* 2bound
+end
+
+"""
+    kaiming_normal(rng::AbstractRNG, size...; gain = √2f0)
+
+Return an `Array{Float32}` of the given `size` containing random numbers taken from a normal
+distribution standard deviation `gain / sqrt(fan_in)`
+
+# References
+
+[1] He, Kaiming, et al. "Delving deep into rectifiers: Surpassing human-level performance on
+imagenet classification." _Proceedings of the IEEE international conference on computer
+vision_. 2015.
+"""
+function kaiming_normal(rng::AbstractRNG, dims::Integer...; gain::Real=√2.0f0)
+    std = Float32(gain / sqrt(first(_nfan(dims...))))
+    return randn(rng, Float32, dims...) .* std
+end
+
 # PRNG Handling
 """
     replicate(rng::AbstractRNG)

diff --git a/test/utils.jl b/test/utils.jl
@@ -1,4 +1,5 @@
 using Lux, ComponentArrays, CUDA, Functors, ReverseDiff, Random, Optimisers, Zygote, Test
+using Statistics: std
 
 include("test_utils.jl")
 
@@ -29,6 +30,23 @@ end
     end
 end
 
+@testset "kaiming" begin
+    # kaiming_uniform should yield a kernel in range [-sqrt(6/n_out), sqrt(6/n_out)]
+    # and kaiming_normal should yield a kernel with stddev ~= sqrt(2/n_out)
+    for (n_in, n_out) in [(100, 100), (100, 400)]
+        v = Lux.kaiming_uniform(rng, n_in, n_out)
+        σ2 = sqrt(6 / n_out)
+        @test -1σ2 < minimum(v) < -0.9σ2
+        @test 0.9σ2 < maximum(v) < 1σ2
+
+        v = Lux.kaiming_normal(rng, n_in, n_out)
+        σ2 = sqrt(2 / n_out)
+        @test 0.9σ2 < std(v) < 1.1σ2
+    end
+    @test eltype(Lux.kaiming_uniform(rng, 3, 4; gain=1.5)) == Float32
+    @test eltype(Lux.kaiming_normal(rng, 3, 4; gain=1.5)) == Float32
+end
+
 @testset "istraining" begin
     @test Lux.istraining(Val(true))
     @test !Lux.istraining(Val(false))