Merge pull request #77 from MartinuzziFrancesco/fm/unicornn

[CELL] Undamped independent controlled oscillatory recurrent neural network
MartinuzziFrancesco · Feb 19, 2025 · c17f64b · c17f64b · MartinuzziFrancesco · Feb 19, 2025
2 parents 38b1c85 + aa515fc
commit c17f64b
Show file tree

Hide file tree

Showing 10 changed files with 198 additions and 12 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "RecurrentLayers"
 uuid = "78449bcf-6750-4b78-9e82-63d4a1ccdf8c"
 authors = ["Francesco Martinuzzi"]
-version = "0.2.12"
+version = "0.2.13"
 
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"

diff --git a/README.md b/README.md
@@ -69,8 +69,9 @@ level implementations:
 
  - Discretized ordinary differential equation formulations of RNNs:
    [Long expressive memory networks](https://arxiv.org/pdf/2110.04744), 
-   [Coupled oscillatory recurrent neural unit](https://arxiv.org/abs/2010.00951), and
-   [Antisymmetric recurrent neural network](https://arxiv.org/abs/1902.09689) with its gated version
+   [Coupled oscillatory recurrent neural unit](https://arxiv.org/abs/2010.00951),
+   [Antisymmetric recurrent neural network](https://arxiv.org/abs/1902.09689) with its gated version, and
+   [Undamped independent controlled oscillatory recurrent neural network](https://arxiv.org/abs/2010.00951).
 
  - Additional more complex architectures:
    [Recurrent highway networks](https://arxiv.org/pdf/1607.03474),

diff --git a/docs/src/api/cells.md b/docs/src/api/cells.md
@@ -26,4 +26,5 @@ CFNCell
 TRNNCell
 TGRUCell
 TLSTMCell
+UnICORNNCell
 ```
diff --git a/docs/src/api/layers.md b/docs/src/api/layers.md
@@ -25,4 +25,5 @@ CFN
 TRNN
 TGRU
 TLSTM
+UnICORNN
 ```
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -35,9 +35,10 @@ level implementations:
 
  - Discretized ordinary differential equation formulations of RNNs:
    [Long expressive memory networks](https://arxiv.org/pdf/2110.04744), 
-   [Coupled oscillatory recurrent neural unit](https://arxiv.org/abs/2010.00951), and
+   [Coupled oscillatory recurrent neural unit](https://arxiv.org/abs/2010.00951),
    [Antisymmetric recurrent neural network](https://arxiv.org/abs/1902.09689)
-   with its gated version
+   with its gated version, and
+   [Undamped independent controlled oscillatory recurrent neural network](https://arxiv.org/abs/2010.00951).
 
  - Additional more complex architectures:
    [Recurrent highway networks](https://arxiv.org/pdf/1607.03474),

diff --git a/src/RecurrentLayers.jl b/src/RecurrentLayers.jl
@@ -11,10 +11,11 @@ using NNlib: fast_act
 export MGUCell, LiGRUCell, IndRNNCell, RANCell, LightRUCell, RHNCell,
        RHNCellUnit, NASCell, MUT1Cell, MUT2Cell, MUT3Cell, SCRNCell, PeepholeLSTMCell,
        FastRNNCell, FastGRNNCell, FSRNNCell, LEMCell, coRNNCell, AntisymmetricRNNCell,
-       GatedAntisymmetricRNNCell, JANETCell, CFNCell, TRNNCell, TGRUCell, TLSTMCell
+       GatedAntisymmetricRNNCell, JANETCell, CFNCell, TRNNCell, TGRUCell, TLSTMCell,
+       UnICORNNCell
 export MGU, LiGRU, IndRNN, RAN, LightRU, NAS, RHN, MUT1, MUT2, MUT3,
        SCRN, PeepholeLSTM, FastRNN, FastGRNN, FSRNN, LEM, coRNN, AntisymmetricRNN,
-       GatedAntisymmetricRNN, JANET, CFN, TRNN, TGRU, TLSTM
+       GatedAntisymmetricRNN, JANET, CFN, TRNN, TGRU, TLSTM, UnICORNN
 export StackedRNN
 
 @compat(public, (initialstates))
@@ -39,18 +40,19 @@ include("cells/antisymmetricrnn_cell.jl")
 include("cells/janet_cell.jl")
 include("cells/cfn_cell.jl")
 include("cells/trnn_cell.jl")
+include("cells/unicornn_cell.jl")
 
 include("wrappers/stackedrnn.jl")
 
 ### fallbacks for functors ###
 rlayers = (:FastRNN, :FastGRNN, :IndRNN, :LightRU, :LiGRU, :MGU, :MUT1,
     :MUT2, :MUT3, :NAS, :PeepholeLSTM, :RAN, :SCRN, :FSRNN, :LEM, :coRNN,
-    :AntisymmetricRNN, :JANET, :CFN, :TRNN, :TGRU, :TLSTM)
+    :AntisymmetricRNN, :JANET, :CFN, :TRNN, :TGRU, :TLSTM, :UnICORNN)
 
 rcells = (:FastRNNCell, :FastGRNNCell, :IndRNNCell, :LightRUCell, :LiGRUCell,
     :MGUCell, :MUT1Cell, :MUT2Cell, :MUT3Cell, :NASCell, :PeepholeLSTMCell,
     :RANCell, :SCRNCell, :FSRNNCell, :LEMCell, :coRNNCell, :AntisymmetricRNNCell,
-    :JANETCell, :CFNCell, :TRNNCell, :TGRUCell, :TLSTMCell)
+    :JANETCell, :CFNCell, :TRNNCell, :TGRUCell, :TLSTMCell, :UnICORNNCell)
 
 for (rlayer, rcell) in zip(rlayers, rcells)
     @eval begin

diff --git a/src/cells/cornn_cell.jl b/src/cells/cornn_cell.jl
@@ -1,6 +1,6 @@
 #https://arxiv.org/abs/2010.00951
 @doc raw"""
-    coRNNCell(input_size => hidden_size, [dt], [gamma], [epsilon];
+    coRNNCell(input_size => hidden_size, [dt];
         gamma=0.0, epsilon=0.0,
         init_kernel = glorot_uniform,
         init_recurrent_kernel = glorot_uniform,
@@ -40,7 +40,7 @@ See [`coRNN`](@ref) for a layer that processes entire sequences.
 ## Arguments
 - `inp`: The input to the cornncell. It should be a vector of size `input_size`
   or a matrix of size `input_size x batch_size`.
-- `(state, cstate)`: A tuple containing the hidden and cell states of the RANCell.
+- `(state, cstate)`: A tuple containing the hidden and cell states of the coRNNCell.
   They should be vectors of size `hidden_size` or matrices of size
   `hidden_size x batch_size`. If not provided, they are assumed to be vectors of zeros,
   initialized by [`Flux.initialstates`](@extref).

diff --git a/src/cells/unicornn_cell.jl b/src/cells/unicornn_cell.jl
@@ -0,0 +1,166 @@
+#https://arxiv.org/abs/2103.05487
+@doc raw"""
+    UnICORNNCell(input_size => hidden_size, [dt];
+        alpha=0.0, init_kernel = glorot_uniform,
+        init_recurrent_kernel = glorot_uniform, bias = true)
+
+[Undamped independent controlled oscillatory recurrent neural unit](https://arxiv.org/abs/2103.05487).
+See [`coRNN`](@ref) for a layer that processes entire sequences.
+
+# Arguments
+
+- `input_size => hidden_size`: input and inner dimension of the layer
+- `dt`: time step. Default is 1.0.
+
+# Keyword arguments
+
+- `alpha`: Control parameter. Default is 0.0.
+- `init_kernel`: initializer for the input to hidden weights
+- `init_recurrent_kernel`: initializer for the hidden to hidden weights
+- `bias`: include a bias or not. Default is `true`
+
+# Equations
+```math
+\begin{aligned}
+    y_n &= y_{n-1} + \Delta t \, \hat{\sigma}(c) \odot z_n, \\
+    z_n &= z_{n-1} - \Delta t \, \hat{\sigma}(c) \odot \left[ 
+        \sigma \left( w \odot y_{n-1} + V y_{n-1} + b \right) + 
+        \alpha y_{n-1} \right].
+\end{aligned}
+```
+
+# Forward
+
+    unicornncell(inp, (state, cstate))
+    unicornncell(inp)
+
+## Arguments
+- `inp`: The input to the unicornncell. It should be a vector of size `input_size`
+  or a matrix of size `input_size x batch_size`.
+- `(state, cstate)`: A tuple containing the hidden and cell states of the UnICORNNCell.
+  They should be vectors of size `hidden_size` or matrices of size
+  `hidden_size x batch_size`. If not provided, they are assumed to be vectors of zeros,
+  initialized by [`Flux.initialstates`](@extref).
+
+## Returns
+- A tuple `(output, state)`, where `output = new_state` is the new hidden state and
+  `state = (new_state, new_cstate)` is the new hidden and cell state. 
+  They are tensors of size `hidden_size` or `hidden_size x batch_size`.
+"""
+struct UnICORNNCell{I, H, Z, V, D, A} <: AbstractDoubleRecurrentCell
+    Wi::I
+    Wh::H
+    c::Z
+    bias::V
+    dt::D
+    alpha::A
+end
+
+@layer UnICORNNCell
+
+function UnICORNNCell((input_size, hidden_size)::Pair{<:Int, <:Int},
+        dt::Number=1.0f0; alpha::Number=0.0f0,
+        init_kernel=glorot_uniform, init_recurrent_kernel=glorot_uniform,
+        bias::Bool=true)
+    Wi = init_kernel(hidden_size, input_size)
+    Wh = init_recurrent_kernel(hidden_size)
+    c = init_kernel(hidden_size)
+    b = create_bias(Wi, bias, size(Wi, 1))
+    T = eltype(Wi)
+    return UnICORNNCell(Wi, Wh, c, b, T(dt), T(alpha))
+end
+
+function (unicornn::UnICORNNCell)(inp::AbstractVecOrMat, (state, c_state))
+    _size_check(unicornn, inp, 1 => size(unicornn.Wi, 2))
+    Wi, Wh, c, b = unicornn.Wi, unicornn.Wh, unicornn.c, unicornn.bias
+    dt, alpha = unicornn.dt, unicornn.alpha
+    new_cstate = c_state .-
+                 dt .* sigmoid_fast.(c) .*
+                 (tanh_fast.(Wh .* state .+ Wi * inp .+ b) .+ alpha .* state)
+    new_state = state .+ dt .* sigmoid_fast.(c) .* new_cstate
+    return new_state, (new_state, new_cstate)
+end
+
+function initialstates(unicornn::UnICORNNCell)
+    state = zeros_like(unicornn.Wi, size(unicornn.Wi, 1))
+    c_state = zeros_like(unicornn.Wi, size(unicornn.Wi, 1))
+    return state, c_state
+end
+
+function Base.show(io::IO, unicornn::UnICORNNCell)
+    print(io, "UnICORNNCell(", size(unicornn.Wi, 2), " => ", size(unicornn.Wi, 1), ")")
+end
+
+@doc raw"""
+    UnICORNN(input_size => hidden_size, [dt];
+        alpha=0.0, return_state=false, init_kernel = glorot_uniform,
+        init_recurrent_kernel = glorot_uniform, bias = true)
+
+[Undamped independent controlled oscillatory recurrent neural network](https://arxiv.org/abs/2010.00951).
+See [`UnICORNNCell`](@ref) for a layer that processes a single sequence.
+
+# Arguments
+
+- `input_size => hidden_size`: input and inner dimension of the layer
+- `dt`: time step. Default is 1.0.
+
+# Keyword arguments
+
+- `alpha`: Control parameter. Default is 0.0.
+- `init_kernel`: initializer for the input to hidden weights
+- `init_recurrent_kernel`: initializer for the hidden to hidden weights
+- `bias`: include a bias or not. Default is `true`
+- `return_state`: Option to return the last state together with the output.
+  Default is `false`.
+  
+# Equations
+```math
+\begin{aligned}
+    y_n &= y_{n-1} + \Delta t \, \hat{\sigma}(c) \odot z_n, \\
+    z_n &= z_{n-1} - \Delta t \, \hat{\sigma}(c) \odot \left[ 
+        \sigma \left( w \odot y_{n-1} + V y_{n-1} + b \right) + 
+        \alpha y_{n-1} \right].
+\end{aligned}
+```
+
+# Forward
+
+    unicornn(inp, (state, zstate))
+    unicornn(inp)
+
+## Arguments
+- `inp`: The input to the `unicornn`. It should be a vector of size `input_size x len`
+  or a matrix of size `input_size x len x batch_size`.
+- `(state, cstate)`: A tuple containing the hidden and cell states of the `UnICORNN`. 
+  They should be vectors of size `hidden_size` or matrices of size
+  `hidden_size x batch_size`. If not provided, they are assumed to be vectors of zeros,
+  initialized by [`Flux.initialstates`](@extref).
+
+## Returns
+- New hidden states `new_states` as an array of size `hidden_size x len x batch_size`.
+  When `return_state = true` it returns a tuple of the hidden stats `new_states` and
+  the last state of the iteration.
+"""
+struct UnICORNN{S, M} <: AbstractRecurrentLayer{S}
+    cell::M
+end
+
+@layer :noexpand UnICORNN
+
+function UnICORNN((input_size, hidden_size)::Pair{<:Int, <:Int}, args...;
+        return_state::Bool=false, kwargs...)
+    cell = UnICORNNCell(input_size => hidden_size, args...; kwargs...)
+    return UnICORNN{return_state, typeof(cell)}(cell)
+end
+
+function functor(unicornn::UnICORNN{S}) where {S}
+    params = (cell=unicornn.cell,)
+    reconstruct = p -> UnICORNN{S, typeof(p.cell)}(p.cell)
+    return params, reconstruct
+end
+
+function Base.show(io::IO, unicornn::UnICORNN)
+    print(io, "UnICORNN(", size(unicornn.cell.Wi, 2),
+        " => ", size(unicornn.cell.Wi, 1))
+    print(io, ")")
+end
diff --git a/test/test_cells.jl b/test/test_cells.jl
@@ -140,3 +140,17 @@ end
     @test rnncell(inp) ==
           rnncell(inp, (zeros(Float32, 5), zeros(Float32, 5), zeros(Float32, 3)))
 end
+
+@testset "UnICORNNCell" begin
+    rnncell = UnICORNNCell(3 => 5)
+    @test length(Flux.trainables(rnncell)) == 4
+
+    inp = rand(Float32, 3)
+    @test rnncell(inp) == rnncell(inp, (zeros(Float32, 5), zeros(Float32, 5)))
+
+    rnncell = UnICORNNCell(3 => 5; bias=false)
+    @test length(Flux.trainables(rnncell)) == 3
+
+    inp = rand(Float32, 3)
+    @test rnncell(inp) == rnncell(inp, (zeros(Float32, 5), zeros(Float32, 5)))
+end
diff --git a/test/test_layers.jl b/test/test_layers.jl
@@ -3,7 +3,7 @@ import Flux: initialstates
 
 layers = [MGU, LiGRU, RAN, LightRU, NAS, MUT1, MUT2, MUT3,
     SCRN, PeepholeLSTM, FastRNN, FastGRNN, LEM, coRNN, AntisymmetricRNN,
-    GatedAntisymmetricRNN, JANET, CFN, TRNN, TGRU, TLSTM]
+    GatedAntisymmetricRNN, JANET, CFN, TRNN, TGRU, TLSTM, UnICORNN]
 #IndRNN handles internal states diffrently
 #RHN should be checked more for consistency for initialstates
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,4 +25,5 @@ CFN @@
     TRNN
     TGRU
     TLSTM
+    UnICORNN
     ```