From c185f04183d760b84d0dcfa2b49511255cd1e7dc Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Tue, 20 Aug 2024 16:45:29 -0700 Subject: [PATCH] fix: broken qa tests --- src/api/layernorm.jl | 6 +++--- src/deprecations.jl | 8 ++++---- src/impl/batchnorm.jl | 4 ++-- src/impl/groupnorm.jl | 6 +++--- src/impl/normalization.jl | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/api/layernorm.jl b/src/api/layernorm.jl index 915ea24e..d15f0b5c 100644 --- a/src/api/layernorm.jl +++ b/src/api/layernorm.jl @@ -31,9 +31,9 @@ Normalized Array of same size as `x`. [1] Ba, Jimmy Lei, Jamie Ryan Kiros, and Geoffrey E. Hinton. "Layer normalization." arXiv preprint arXiv:1607.06450 (2016). """ -function layernorm(x::AbstractArray{xT}, scale::Optional{<:AbstractArray{scT}}, - bias::Optional{<:AbstractArray{bT}}, σ::F=identity, dims=Colon(), - epsilon::Real=get_utils(:default_epsilon)(x)) where {F, xT, scT, bT} +function layernorm(x::AbstractArray{xT}, scale::Optional{<:AbstractArray}, + bias::Optional{<:AbstractArray}, σ::F=identity, dims=Colon(), + epsilon::Real=get_utils(:default_epsilon)(x)) where {F, xT} σ′ = get_impl(:select_fastest_activation)(σ, x, scale, bias) return get_impl(:layernorm)(x, scale, bias, σ′, dims, epsilon) end diff --git a/src/deprecations.jl b/src/deprecations.jl index 0aefc151..16e4d34d 100644 --- a/src/deprecations.jl +++ b/src/deprecations.jl @@ -35,12 +35,12 @@ import .API: batchnorm, groupnorm, instancenorm, layernorm, dropout, ## conv @deprecate fused_conv_bias_activation( - σ::F, weight::AbstractArray{<:Number, N}, x::AbstractArray{<:Number, N}, - b::AbstractArray{<:Number, N}, cdims::ConvDims) where {F, N} fused_conv_bias_activation( - σ, weight, x, _vec(b), cdims) + σ::F, weight::AbstractArray{<:Any, N}, x::AbstractArray{<:Any, N}, + b::AbstractArray{<:Any, N}, cdims::ConvDims) where {F, N} fused_conv_bias_activation( + σ, weight, x, Utils.vec(b), cdims) ## Private API that was at a point being illegally used in Lux @deprecate __∇conv_data(args...; kwargs...) Impl.∇conv_data(args...; kwargs...) @deprecate __apply_bias_activation(σ::F, x, bias::AbstractArray) where {F} bias_activation( - σ, x, _vec(bias)) + σ, x, Utils.vec(bias)) diff --git a/src/impl/batchnorm.jl b/src/impl/batchnorm.jl index 8b14bb46..9ef017e6 100644 --- a/src/impl/batchnorm.jl +++ b/src/impl/batchnorm.jl @@ -402,10 +402,10 @@ end function ∇batchnorm_affine_normalize!( ∂x::AbstractArray{∂xT, 3}, ∂σ²::AbstractArray{∂σ²T, 3}, - ∂γ::Optional{<:AbstractArray{∂γT, 3}}, ::GPUBroadcastOp, + ∂γ::Optional{<:AbstractArray{<:Any, 3}}, ::GPUBroadcastOp, ∂y::AbstractArray{∂yT, 3}, x::AbstractArray{xT, 3}, μ::AbstractVector, σ²::AbstractVector, γ::Optional{<:AbstractVector}, ϵ::Real, - γ′::AbstractVector) where {∂xT, ∂σ²T, ∂γT, ∂yT, xT} + γ′::AbstractVector) where {∂xT, ∂σ²T, ∂yT, xT} backend = KA.get_backend(∂x) Utils.run_ka_kernel( ∇batchnorm_affine_normalize_kernel!, backend, nothing, size(∂x), diff --git a/src/impl/groupnorm.jl b/src/impl/groupnorm.jl index 2733b4b1..b736aa8b 100644 --- a/src/impl/groupnorm.jl +++ b/src/impl/groupnorm.jl @@ -389,10 +389,10 @@ end function ∇groupnorm_affine_normalize!( ∂x::AbstractArray{∂xT, 4}, ∂σ²::AbstractArray{∂σ²T, 4}, - ∂γ::Optional{<:AbstractArray{∂γT, 4}}, ::GPUBroadcastOp, + ∂γ::Optional{<:AbstractArray{<:Any, 4}}, ::GPUBroadcastOp, ∂y::AbstractArray{∂yT, 4}, x::AbstractArray{xT, 4}, μ::AbstractArray{μT, 4}, - σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{γT, 4}}, - ϵ::Real) where {∂xT, ∂σ²T, ∂γT, ∂yT, xT, μT, σ²T, γT} + σ²::AbstractArray{σ²T, 4}, γ::Optional{<:AbstractArray{<:Any, 4}}, + ϵ::Real) where {∂xT, ∂σ²T, ∂yT, xT, μT, σ²T} backend = KA.get_backend(∂x) Utils.run_ka_kernel( ∇groupnorm_affine_normalize_kernel!, backend, nothing, size(∂x), diff --git a/src/impl/normalization.jl b/src/impl/normalization.jl index f2eefe6a..0e7ef4c6 100644 --- a/src/impl/normalization.jl +++ b/src/impl/normalization.jl @@ -134,9 +134,9 @@ CRC.@non_differentiable get_norm_reshape_dims(::Any...) # Entry Points ## LayerNorm -function layernorm(x::AbstractArray{xT, N}, γ::Optional{<:AbstractArray{γT, N}}, - β::Optional{<:AbstractArray{βT, N}}, act::F, - dims, epsilon::Real) where {N, F, xT, γT, βT} +function layernorm(x::AbstractArray{xT, N}, γ::Optional{<:AbstractArray{<:Any, N}}, + β::Optional{<:AbstractArray{<:Any, N}}, act::F, + dims, epsilon::Real) where {N, F, xT} μ, σ² = mean_var(x; dims, corrected=false) return affine_normalize(act, x, μ, σ², γ, β, epsilon) end