From dfe33816536fbce68d5f7a5b9a5fedf9044172ca Mon Sep 17 00:00:00 2001 From: Yueh-Hua Tu Date: Sun, 12 Dec 2021 19:57:15 +0800 Subject: [PATCH] fix error of gradient over scalar matrix multiplication --- src/cuda/conv.jl | 22 ---- src/layers/conv.jl | 2 +- test/layers/conv.jl | 284 ++++++++++++++++++++++---------------------- 3 files changed, 143 insertions(+), 165 deletions(-) diff --git a/src/cuda/conv.jl b/src/cuda/conv.jl index d29c2fce3..942ece6df 100644 --- a/src/cuda/conv.jl +++ b/src/cuda/conv.jl @@ -1,25 +1,3 @@ -(g::GCNConv)(L̃::AbstractMatrix, X::CuArray) = g(cu(L̃), X) - -(g::GCNConv)(L̃::CuArray, X::CuArray) = g.σ.(g.weight * X * L̃ .+ g.bias) - -(c::ChebConv)(L̃::AbstractMatrix, X::CuArray) = c(cu(L̃), X) - -function (c::ChebConv)(L̃::CuArray, X::CuArray) - @assert size(X, 1) == c.in_channel "Input feature size must match input channel size." - @assert size(X, 2) == size(L̃, 1) "Input vertex number must match Laplacian matrix size." - - Z_prev = X - Z = X * L̃ - Y = view(c.weight,:,:,1) * Z_prev - Y += view(c.weight,:,:,2) * Z - for k = 3:c.k - Z, Z_prev = 2*Z*L̃ - Z_prev, Z - Y += view(c.weight,:,:,k) * Z - end - return Y .+ c.bias -end - - # Avoid ambiguity update_batch_edge(g::GATConv, adj, E::Fill{S,2,Axes}, X::CuMatrix, u) where {S,Axes} = update_batch_edge(g, adj, X) diff --git a/src/layers/conv.jl b/src/layers/conv.jl index 56c3cb1a5..631b92e39 100644 --- a/src/layers/conv.jl +++ b/src/layers/conv.jl @@ -104,7 +104,7 @@ function (c::ChebConv)(fg::FeaturedGraph, X::AbstractMatrix{T}) where T Y = view(c.weight,:,:,1) * Z_prev Y += view(c.weight,:,:,2) * Z for k = 3:c.k - Z, Z_prev = 2*Z*L̃ - Z_prev, Z + Z, Z_prev = 2 .* Z * L̃ - Z_prev, Z Y += view(c.weight,:,:,k) * Z end return Y .+ c.bias diff --git a/test/layers/conv.jl b/test/layers/conv.jl index 4246bc93a..29a295718 100644 --- a/test/layers/conv.jl +++ b/test/layers/conv.jl @@ -259,146 +259,146 @@ end end - # @testset "GatedGraphConv" begin - # num_layers = 3 - # X = rand(T, in_channel, N) - # Xt = transpose(rand(T, N, in_channel)) - # @testset "layer with graph" begin - # ggc = GatedGraphConv(fg, out_channel, num_layers) - # @test adjacency_list(ggc.fg) == [[2,4], [1,3], [2,4], [1,3]] - # @test size(ggc.weight) == (out_channel, out_channel, num_layers) - - # Y = ggc(X) - # @test size(Y) == (out_channel, N) - - - # # Test with transposed features - # Y = ggc(Xt) - # @test size(Y) == (out_channel, N) - - # g = Zygote.gradient(x -> sum(ggc(x)), X)[1] - # @test size(g) == size(X) - - # g = Zygote.gradient(model -> sum(model(X)), ggc)[1] - # @test size(g.weight) == size(ggc.weight) - # end - - # @testset "layer without graph" begin - # ggc = GatedGraphConv(out_channel, num_layers) - # @test size(ggc.weight) == (out_channel, out_channel, num_layers) - - # fg = FeaturedGraph(adj, nf=X) - # fg_ = ggc(fg) - # @test size(node_feature(fg_)) == (out_channel, N) - # @test_throws MethodError ggc(X) - - # # Test with transposed features - # fgt = FeaturedGraph(adj, nf=Xt) - # fgt_ = ggc(fgt) - # @test size(node_feature(fgt_)) == (out_channel, N) - - # g = Zygote.gradient(x -> sum(node_feature(ggc(x))), fg)[1] - # @test size(g.nf) == size(X) - - # g = Zygote.gradient(model -> sum(node_feature(model(fg))), ggc)[1] - # @test size(g.weight) == size(ggc.weight) - # end - # end - - # @testset "EdgeConv" begin - # X = rand(T, in_channel, N) - # Xt = transpose(rand(T, N, in_channel)) - # @testset "layer with graph" begin - # ec = EdgeConv(fg, Dense(2*in_channel, out_channel)) - # @test adjacency_list(ec.fg) == [[2,4], [1,3], [2,4], [1,3]] - - # Y = ec(X) - # @test size(Y) == (out_channel, N) - - # # Test with transposed features - # Y = ec(Xt) - # @test size(Y) == (out_channel, N) - - # g = Zygote.gradient(x -> sum(ec(x)), X)[1] - # @test size(g) == size(X) - - # g = Zygote.gradient(model -> sum(model(X)), ec)[1] - # @test size(g.nn.weight) == size(ec.nn.weight) - # @test size(g.nn.bias) == size(ec.nn.bias) - # end - - # @testset "layer without graph" begin - # ec = EdgeConv(Dense(2*in_channel, out_channel)) - - # fg = FeaturedGraph(adj, nf=X) - # fg_ = ec(fg) - # @test size(node_feature(fg_)) == (out_channel, N) - # @test_throws MethodError ec(X) - - # # Test with transposed features - # fgt = FeaturedGraph(adj, nf=Xt) - # fgt_ = ec(fgt) - # @test size(node_feature(fgt_)) == (out_channel, N) - - # g = Zygote.gradient(x -> sum(node_feature(ec(x))), fg)[1] - # @test size(g.nf) == size(X) - - # g = Zygote.gradient(model -> sum(node_feature(model(fg))), ec)[1] - # @test size(g.nn.weight) == size(ec.nn.weight) - # @test size(g.nn.bias) == size(ec.nn.bias) - # end - # end - - # @testset "GINConv" begin - # X = rand(Float32, in_channel, N) - # Xt = transpose(rand(Float32, N, in_channel)) - # nn = Flux.Chain(Dense(in_channel, out_channel)) - # eps = 0.001 - - # @testset "layer with graph" begin - # gc = GINConv(FeaturedGraph(adj), nn, eps) - # @test size(gc.nn.layers[1].weight) == (out_channel, in_channel) - # @test size(gc.nn.layers[1].bias) == (out_channel, ) - # @test GraphSignals.adjacency_matrix(gc.fg) == adj - - # Y = gc(FeaturedGraph(adj, nf=X)) - # @test size(node_feature(Y)) == (out_channel, N) - - # # Test with transposed features - # Y = gc(FeaturedGraph(adj, nf=Xt)) - # @test size(node_feature(Y)) == (out_channel, N) - - # g = Zygote.gradient(x -> sum(node_feature(gc(x))), - # FeaturedGraph(adj, nf=X))[1] - # @test size(g.nf) == size(X) - - # g = Zygote.gradient(model -> sum(node_feature(model(FeaturedGraph(adj, nf=X)))), - # gc)[1] - # @test size(g.nn.layers[1].weight) == size(gc.nn.layers[1].weight) - # @test size(g.nn.layers[1].bias) == size(gc.nn.layers[1].bias) - # @test !in(:eps, Flux.trainable(gc)) - # end - # end - - # @testset "CGConv" begin - # fg = FeaturedGraph(adj) - # X = rand(Float32, in_channel, N) - # E = rand(Float32, in_channel_edge, ne(fg)) - # Xt = transpose(rand(Float32, N, in_channel)) - # @testset "layer with graph" begin - # cgc = CGConv(FeaturedGraph(adj), - # (in_channel, in_channel_edge)) - # @test size(cgc.Wf) == (in_channel, 2 * in_channel + in_channel_edge) - # @test size(cgc.Ws) == (in_channel, 2 * in_channel + in_channel_edge) - # @test size(cgc.bf) == (in_channel,) - # @test size(cgc.bs) == (in_channel,) - - # Y = cgc(X, E) - # @test size(Y) == (in_channel, N) - - # Yg = cgc(FeaturedGraph(adj, nf=X, ef=E)) - # @test size(node_feature(Yg)) == (in_channel, N) - # @test edge_feature(Yg) == E - # end - # end + @testset "GatedGraphConv" begin + num_layers = 3 + X = rand(T, in_channel, N) + Xt = transpose(rand(T, N, in_channel)) + @testset "layer with graph" begin + ggc = GatedGraphConv(fg, out_channel, num_layers) + @test adjacency_list(ggc.fg) == [[2,4], [1,3], [2,4], [1,3]] + @test size(ggc.weight) == (out_channel, out_channel, num_layers) + + Y = ggc(X) + @test size(Y) == (out_channel, N) + + + # Test with transposed features + Y = ggc(Xt) + @test size(Y) == (out_channel, N) + + g = Zygote.gradient(x -> sum(ggc(x)), X)[1] + @test size(g) == size(X) + + g = Zygote.gradient(model -> sum(model(X)), ggc)[1] + @test size(g.weight) == size(ggc.weight) + end + + @testset "layer without graph" begin + ggc = GatedGraphConv(out_channel, num_layers) + @test size(ggc.weight) == (out_channel, out_channel, num_layers) + + fg = FeaturedGraph(adj, nf=X) + fg_ = ggc(fg) + @test size(node_feature(fg_)) == (out_channel, N) + @test_throws MethodError ggc(X) + + # Test with transposed features + fgt = FeaturedGraph(adj, nf=Xt) + fgt_ = ggc(fgt) + @test size(node_feature(fgt_)) == (out_channel, N) + + g = Zygote.gradient(x -> sum(node_feature(ggc(x))), fg)[1] + @test size(g.nf) == size(X) + + g = Zygote.gradient(model -> sum(node_feature(model(fg))), ggc)[1] + @test size(g.weight) == size(ggc.weight) + end + end + + @testset "EdgeConv" begin + X = rand(T, in_channel, N) + Xt = transpose(rand(T, N, in_channel)) + @testset "layer with graph" begin + ec = EdgeConv(fg, Dense(2*in_channel, out_channel)) + @test adjacency_list(ec.fg) == [[2,4], [1,3], [2,4], [1,3]] + + Y = ec(X) + @test size(Y) == (out_channel, N) + + # Test with transposed features + Y = ec(Xt) + @test size(Y) == (out_channel, N) + + g = Zygote.gradient(x -> sum(ec(x)), X)[1] + @test size(g) == size(X) + + g = Zygote.gradient(model -> sum(model(X)), ec)[1] + @test size(g.nn.weight) == size(ec.nn.weight) + @test size(g.nn.bias) == size(ec.nn.bias) + end + + @testset "layer without graph" begin + ec = EdgeConv(Dense(2*in_channel, out_channel)) + + fg = FeaturedGraph(adj, nf=X) + fg_ = ec(fg) + @test size(node_feature(fg_)) == (out_channel, N) + @test_throws MethodError ec(X) + + # Test with transposed features + fgt = FeaturedGraph(adj, nf=Xt) + fgt_ = ec(fgt) + @test size(node_feature(fgt_)) == (out_channel, N) + + g = Zygote.gradient(x -> sum(node_feature(ec(x))), fg)[1] + @test size(g.nf) == size(X) + + g = Zygote.gradient(model -> sum(node_feature(model(fg))), ec)[1] + @test size(g.nn.weight) == size(ec.nn.weight) + @test size(g.nn.bias) == size(ec.nn.bias) + end + end + + @testset "GINConv" begin + X = rand(Float32, in_channel, N) + Xt = transpose(rand(Float32, N, in_channel)) + nn = Flux.Chain(Dense(in_channel, out_channel)) + eps = 0.001 + + @testset "layer with graph" begin + gc = GINConv(FeaturedGraph(adj), nn, eps) + @test size(gc.nn.layers[1].weight) == (out_channel, in_channel) + @test size(gc.nn.layers[1].bias) == (out_channel, ) + @test GraphSignals.adjacency_matrix(gc.fg) == adj + + Y = gc(FeaturedGraph(adj, nf=X)) + @test size(node_feature(Y)) == (out_channel, N) + + # Test with transposed features + Y = gc(FeaturedGraph(adj, nf=Xt)) + @test size(node_feature(Y)) == (out_channel, N) + + g = Zygote.gradient(x -> sum(node_feature(gc(x))), + FeaturedGraph(adj, nf=X))[1] + @test size(g.nf) == size(X) + + g = Zygote.gradient(model -> sum(node_feature(model(FeaturedGraph(adj, nf=X)))), + gc)[1] + @test size(g.nn.layers[1].weight) == size(gc.nn.layers[1].weight) + @test size(g.nn.layers[1].bias) == size(gc.nn.layers[1].bias) + @test !in(:eps, Flux.trainable(gc)) + end + end + + @testset "CGConv" begin + fg = FeaturedGraph(adj) + X = rand(Float32, in_channel, N) + E = rand(Float32, in_channel_edge, ne(fg)) + Xt = transpose(rand(Float32, N, in_channel)) + @testset "layer with graph" begin + cgc = CGConv(FeaturedGraph(adj), + (in_channel, in_channel_edge)) + @test size(cgc.Wf) == (in_channel, 2 * in_channel + in_channel_edge) + @test size(cgc.Ws) == (in_channel, 2 * in_channel + in_channel_edge) + @test size(cgc.bf) == (in_channel,) + @test size(cgc.bs) == (in_channel,) + + Y = cgc(X, E) + @test size(Y) == (in_channel, N) + + Yg = cgc(FeaturedGraph(adj, nf=X, ef=E)) + @test size(node_feature(Yg)) == (in_channel, N) + @test edge_feature(Yg) == E + end + end end