Skip to content

Commit

Permalink
fix error of gradient over scalar matrix multiplication
Browse files Browse the repository at this point in the history
  • Loading branch information
yuehhua committed Dec 12, 2021
1 parent 8febd8f commit dfe3381
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 165 deletions.
22 changes: 0 additions & 22 deletions src/cuda/conv.jl
Original file line number Diff line number Diff line change
@@ -1,25 +1,3 @@
(g::GCNConv)(L̃::AbstractMatrix, X::CuArray) = g(cu(L̃), X)

(g::GCNConv)(L̃::CuArray, X::CuArray) = g.σ.(g.weight * X *.+ g.bias)

(c::ChebConv)(L̃::AbstractMatrix, X::CuArray) = c(cu(L̃), X)

function (c::ChebConv)(L̃::CuArray, X::CuArray)
@assert size(X, 1) == c.in_channel "Input feature size must match input channel size."
@assert size(X, 2) == size(L̃, 1) "Input vertex number must match Laplacian matrix size."

Z_prev = X
Z = X *
Y = view(c.weight,:,:,1) * Z_prev
Y += view(c.weight,:,:,2) * Z
for k = 3:c.k
Z, Z_prev = 2*Z*- Z_prev, Z
Y += view(c.weight,:,:,k) * Z
end
return Y .+ c.bias
end


# Avoid ambiguity
update_batch_edge(g::GATConv, adj, E::Fill{S,2,Axes}, X::CuMatrix, u) where {S,Axes} = update_batch_edge(g, adj, X)

Expand Down
2 changes: 1 addition & 1 deletion src/layers/conv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ function (c::ChebConv)(fg::FeaturedGraph, X::AbstractMatrix{T}) where T
Y = view(c.weight,:,:,1) * Z_prev
Y += view(c.weight,:,:,2) * Z
for k = 3:c.k
Z, Z_prev = 2*Z*- Z_prev, Z
Z, Z_prev = 2 .* Z * - Z_prev, Z
Y += view(c.weight,:,:,k) * Z
end
return Y .+ c.bias
Expand Down
284 changes: 142 additions & 142 deletions test/layers/conv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -259,146 +259,146 @@
end
end

# @testset "GatedGraphConv" begin
# num_layers = 3
# X = rand(T, in_channel, N)
# Xt = transpose(rand(T, N, in_channel))
# @testset "layer with graph" begin
# ggc = GatedGraphConv(fg, out_channel, num_layers)
# @test adjacency_list(ggc.fg) == [[2,4], [1,3], [2,4], [1,3]]
# @test size(ggc.weight) == (out_channel, out_channel, num_layers)

# Y = ggc(X)
# @test size(Y) == (out_channel, N)


# # Test with transposed features
# Y = ggc(Xt)
# @test size(Y) == (out_channel, N)

# g = Zygote.gradient(x -> sum(ggc(x)), X)[1]
# @test size(g) == size(X)

# g = Zygote.gradient(model -> sum(model(X)), ggc)[1]
# @test size(g.weight) == size(ggc.weight)
# end

# @testset "layer without graph" begin
# ggc = GatedGraphConv(out_channel, num_layers)
# @test size(ggc.weight) == (out_channel, out_channel, num_layers)

# fg = FeaturedGraph(adj, nf=X)
# fg_ = ggc(fg)
# @test size(node_feature(fg_)) == (out_channel, N)
# @test_throws MethodError ggc(X)

# # Test with transposed features
# fgt = FeaturedGraph(adj, nf=Xt)
# fgt_ = ggc(fgt)
# @test size(node_feature(fgt_)) == (out_channel, N)

# g = Zygote.gradient(x -> sum(node_feature(ggc(x))), fg)[1]
# @test size(g.nf) == size(X)

# g = Zygote.gradient(model -> sum(node_feature(model(fg))), ggc)[1]
# @test size(g.weight) == size(ggc.weight)
# end
# end

# @testset "EdgeConv" begin
# X = rand(T, in_channel, N)
# Xt = transpose(rand(T, N, in_channel))
# @testset "layer with graph" begin
# ec = EdgeConv(fg, Dense(2*in_channel, out_channel))
# @test adjacency_list(ec.fg) == [[2,4], [1,3], [2,4], [1,3]]

# Y = ec(X)
# @test size(Y) == (out_channel, N)

# # Test with transposed features
# Y = ec(Xt)
# @test size(Y) == (out_channel, N)

# g = Zygote.gradient(x -> sum(ec(x)), X)[1]
# @test size(g) == size(X)

# g = Zygote.gradient(model -> sum(model(X)), ec)[1]
# @test size(g.nn.weight) == size(ec.nn.weight)
# @test size(g.nn.bias) == size(ec.nn.bias)
# end

# @testset "layer without graph" begin
# ec = EdgeConv(Dense(2*in_channel, out_channel))

# fg = FeaturedGraph(adj, nf=X)
# fg_ = ec(fg)
# @test size(node_feature(fg_)) == (out_channel, N)
# @test_throws MethodError ec(X)

# # Test with transposed features
# fgt = FeaturedGraph(adj, nf=Xt)
# fgt_ = ec(fgt)
# @test size(node_feature(fgt_)) == (out_channel, N)

# g = Zygote.gradient(x -> sum(node_feature(ec(x))), fg)[1]
# @test size(g.nf) == size(X)

# g = Zygote.gradient(model -> sum(node_feature(model(fg))), ec)[1]
# @test size(g.nn.weight) == size(ec.nn.weight)
# @test size(g.nn.bias) == size(ec.nn.bias)
# end
# end

# @testset "GINConv" begin
# X = rand(Float32, in_channel, N)
# Xt = transpose(rand(Float32, N, in_channel))
# nn = Flux.Chain(Dense(in_channel, out_channel))
# eps = 0.001

# @testset "layer with graph" begin
# gc = GINConv(FeaturedGraph(adj), nn, eps)
# @test size(gc.nn.layers[1].weight) == (out_channel, in_channel)
# @test size(gc.nn.layers[1].bias) == (out_channel, )
# @test GraphSignals.adjacency_matrix(gc.fg) == adj

# Y = gc(FeaturedGraph(adj, nf=X))
# @test size(node_feature(Y)) == (out_channel, N)

# # Test with transposed features
# Y = gc(FeaturedGraph(adj, nf=Xt))
# @test size(node_feature(Y)) == (out_channel, N)

# g = Zygote.gradient(x -> sum(node_feature(gc(x))),
# FeaturedGraph(adj, nf=X))[1]
# @test size(g.nf) == size(X)

# g = Zygote.gradient(model -> sum(node_feature(model(FeaturedGraph(adj, nf=X)))),
# gc)[1]
# @test size(g.nn.layers[1].weight) == size(gc.nn.layers[1].weight)
# @test size(g.nn.layers[1].bias) == size(gc.nn.layers[1].bias)
# @test !in(:eps, Flux.trainable(gc))
# end
# end

# @testset "CGConv" begin
# fg = FeaturedGraph(adj)
# X = rand(Float32, in_channel, N)
# E = rand(Float32, in_channel_edge, ne(fg))
# Xt = transpose(rand(Float32, N, in_channel))
# @testset "layer with graph" begin
# cgc = CGConv(FeaturedGraph(adj),
# (in_channel, in_channel_edge))
# @test size(cgc.Wf) == (in_channel, 2 * in_channel + in_channel_edge)
# @test size(cgc.Ws) == (in_channel, 2 * in_channel + in_channel_edge)
# @test size(cgc.bf) == (in_channel,)
# @test size(cgc.bs) == (in_channel,)

# Y = cgc(X, E)
# @test size(Y) == (in_channel, N)

# Yg = cgc(FeaturedGraph(adj, nf=X, ef=E))
# @test size(node_feature(Yg)) == (in_channel, N)
# @test edge_feature(Yg) == E
# end
# end
@testset "GatedGraphConv" begin
num_layers = 3
X = rand(T, in_channel, N)
Xt = transpose(rand(T, N, in_channel))
@testset "layer with graph" begin
ggc = GatedGraphConv(fg, out_channel, num_layers)
@test adjacency_list(ggc.fg) == [[2,4], [1,3], [2,4], [1,3]]
@test size(ggc.weight) == (out_channel, out_channel, num_layers)

Y = ggc(X)
@test size(Y) == (out_channel, N)


# Test with transposed features
Y = ggc(Xt)
@test size(Y) == (out_channel, N)

g = Zygote.gradient(x -> sum(ggc(x)), X)[1]
@test size(g) == size(X)

g = Zygote.gradient(model -> sum(model(X)), ggc)[1]
@test size(g.weight) == size(ggc.weight)
end

@testset "layer without graph" begin
ggc = GatedGraphConv(out_channel, num_layers)
@test size(ggc.weight) == (out_channel, out_channel, num_layers)

fg = FeaturedGraph(adj, nf=X)
fg_ = ggc(fg)
@test size(node_feature(fg_)) == (out_channel, N)
@test_throws MethodError ggc(X)

# Test with transposed features
fgt = FeaturedGraph(adj, nf=Xt)
fgt_ = ggc(fgt)
@test size(node_feature(fgt_)) == (out_channel, N)

g = Zygote.gradient(x -> sum(node_feature(ggc(x))), fg)[1]
@test size(g.nf) == size(X)

g = Zygote.gradient(model -> sum(node_feature(model(fg))), ggc)[1]
@test size(g.weight) == size(ggc.weight)
end
end

@testset "EdgeConv" begin
X = rand(T, in_channel, N)
Xt = transpose(rand(T, N, in_channel))
@testset "layer with graph" begin
ec = EdgeConv(fg, Dense(2*in_channel, out_channel))
@test adjacency_list(ec.fg) == [[2,4], [1,3], [2,4], [1,3]]

Y = ec(X)
@test size(Y) == (out_channel, N)

# Test with transposed features
Y = ec(Xt)
@test size(Y) == (out_channel, N)

g = Zygote.gradient(x -> sum(ec(x)), X)[1]
@test size(g) == size(X)

g = Zygote.gradient(model -> sum(model(X)), ec)[1]
@test size(g.nn.weight) == size(ec.nn.weight)
@test size(g.nn.bias) == size(ec.nn.bias)
end

@testset "layer without graph" begin
ec = EdgeConv(Dense(2*in_channel, out_channel))

fg = FeaturedGraph(adj, nf=X)
fg_ = ec(fg)
@test size(node_feature(fg_)) == (out_channel, N)
@test_throws MethodError ec(X)

# Test with transposed features
fgt = FeaturedGraph(adj, nf=Xt)
fgt_ = ec(fgt)
@test size(node_feature(fgt_)) == (out_channel, N)

g = Zygote.gradient(x -> sum(node_feature(ec(x))), fg)[1]
@test size(g.nf) == size(X)

g = Zygote.gradient(model -> sum(node_feature(model(fg))), ec)[1]
@test size(g.nn.weight) == size(ec.nn.weight)
@test size(g.nn.bias) == size(ec.nn.bias)
end
end

@testset "GINConv" begin
X = rand(Float32, in_channel, N)
Xt = transpose(rand(Float32, N, in_channel))
nn = Flux.Chain(Dense(in_channel, out_channel))
eps = 0.001

@testset "layer with graph" begin
gc = GINConv(FeaturedGraph(adj), nn, eps)
@test size(gc.nn.layers[1].weight) == (out_channel, in_channel)
@test size(gc.nn.layers[1].bias) == (out_channel, )
@test GraphSignals.adjacency_matrix(gc.fg) == adj

Y = gc(FeaturedGraph(adj, nf=X))
@test size(node_feature(Y)) == (out_channel, N)

# Test with transposed features
Y = gc(FeaturedGraph(adj, nf=Xt))
@test size(node_feature(Y)) == (out_channel, N)

g = Zygote.gradient(x -> sum(node_feature(gc(x))),
FeaturedGraph(adj, nf=X))[1]
@test size(g.nf) == size(X)

g = Zygote.gradient(model -> sum(node_feature(model(FeaturedGraph(adj, nf=X)))),
gc)[1]
@test size(g.nn.layers[1].weight) == size(gc.nn.layers[1].weight)
@test size(g.nn.layers[1].bias) == size(gc.nn.layers[1].bias)
@test !in(:eps, Flux.trainable(gc))
end
end

@testset "CGConv" begin
fg = FeaturedGraph(adj)
X = rand(Float32, in_channel, N)
E = rand(Float32, in_channel_edge, ne(fg))
Xt = transpose(rand(Float32, N, in_channel))
@testset "layer with graph" begin
cgc = CGConv(FeaturedGraph(adj),
(in_channel, in_channel_edge))
@test size(cgc.Wf) == (in_channel, 2 * in_channel + in_channel_edge)
@test size(cgc.Ws) == (in_channel, 2 * in_channel + in_channel_edge)
@test size(cgc.bf) == (in_channel,)
@test size(cgc.bs) == (in_channel,)

Y = cgc(X, E)
@test size(Y) == (in_channel, N)

Yg = cgc(FeaturedGraph(adj, nf=X, ef=E))
@test size(node_feature(Yg)) == (in_channel, N)
@test edge_feature(Yg) == E
end
end
end

0 comments on commit dfe3381

Please sign in to comment.