From f5d364aea4cf668a91465c98f618b734e2243758 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Sat, 31 Jul 2021 09:13:14 +0200
Subject: [PATCH 01/15] initial implementation

---
 Project.toml                |   5 +-
 src/GeometricFlux.jl        |  24 +++-
 src/featuredgraph.jl        | 249 ++++++++++++++++++++++++++++++++++++
 src/graph_conversions.jl    |  60 +++++++++
 src/utils.jl                |  14 --
 test/cuda/featured_graph.jl |  36 ++++++
 test/featured_graph.jl      |  81 ++++++++++++
 test/runtests.jl            |  18 +--
 8 files changed, 453 insertions(+), 34 deletions(-)
 create mode 100644 src/featuredgraph.jl
 create mode 100644 src/graph_conversions.jl
 create mode 100644 test/cuda/featured_graph.jl
 create mode 100644 test/featured_graph.jl

diff --git a/Project.toml b/Project.toml
index f4f02e75a..aa0f9ff27 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,12 +5,11 @@ version = "0.7.6"
 
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-GraphLaplacians = "a1251efa-393a-423f-9d7b-faaecba535dc"
 GraphMLDatasets = "21828b05-d3b3-40ad-870e-a4bc2f52d5e8"
-GraphSignals = "3ebe565e-a4b5-49c6-aed2-300248c3a9c1"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
@@ -25,9 +24,7 @@ CUDA = "3.3"
 DataStructures = "0.18"
 FillArrays = "0.11, 0.12"
 Flux = "0.12"
-GraphLaplacians = "0.1"
 GraphMLDatasets = "0.1"
-GraphSignals = "0.2"
 LightGraphs = "1.3"
 NNlib = "0.7"
 NNlibCUDA = "0.1"
diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
index 0c10a3c36..79566e67c 100644
--- a/src/GeometricFlux.jl
+++ b/src/GeometricFlux.jl
@@ -1,20 +1,28 @@
 module GeometricFlux
 
 using Statistics: mean
-using LinearAlgebra: Adjoint, norm, Transpose
-using Reexport
-
+using LinearAlgebra
 using CUDA
 using FillArrays: Fill
 using Flux
 using Flux: glorot_uniform, leakyrelu, GRUCell, @functor
 using NNlib, NNlibCUDA
-using GraphLaplacians
-@reexport using GraphSignals
-using LightGraphs
 using Zygote
+using ChainRulesCore
+import LightGraphs
+using LightGraphs: AbstractGraph, outneighbors, inneighbors, is_directed, ne, nv, 
+                  adjacency_matrix, degree
 
 export
+    # featured_graph
+    FeaturedGraph,
+    edge_index,
+    node_feature, edge_feature, global_feature,
+    adjacency_list, normalized_laplacian, scaled_laplacian,
+
+    # from LightGraphs
+    adjacency_matrix, 
+
     # layers/gn
     GraphNet,
 
@@ -50,8 +58,10 @@ export
     # utils
     generate_cluster
 
+    
+include("featuredgraph.jl")
+include("graph_conversions.jl")
 include("datasets.jl")
-
 include("utils.jl")
 
 include("layers/gn.jl")
diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
new file mode 100644
index 000000000..3a10487c2
--- /dev/null
+++ b/src/featuredgraph.jl
@@ -0,0 +1,249 @@
+#===================================
+Define FeaturedGraph type as a subtype of LightGraphs' AbstractGraph.
+For the core methods to be implemented by any AbstractGraph, see
+https://juliagraphs.org/LightGraphs.jl/latest/types/#AbstractGraph-Type
+https://juliagraphs.org/LightGraphs.jl/latest/developing/#Developing-Alternate-Graph-Types
+=============================================#
+
+abstract type AbstractFeaturedGraph <: AbstractGraph{Int} end
+
+"""
+    NullGraph()
+
+Null object for `FeaturedGraph`.
+"""
+struct NullGraph <: AbstractFeaturedGraph end
+
+const COO_T = Tuple{T, T} where T <: AbstractVector
+const ADJMAT_T = AbstractMatrix
+const ADJLIST_T = AbstractVector{T} where T <: AbstractVector
+
+struct FeaturedGraph{T<:Union{COO_T,ADJMAT_T}} <: AbstractFeaturedGraph
+    graph::T
+    num_nodes::Int
+    num_edges::Int
+    nf
+    ef
+    gf
+    ## possible future property stores
+    # ndata::Dict{String, Any} # https://github.com/FluxML/Zygote.jl/issues/717        
+    # edata::Dict{String, Any}
+    # gdata::Dict{String, Any}
+end
+
+
+function FeaturedGraph(graph; 
+                        num_nodes = nothing, 
+                        graph_type = :coo,
+                        nf = nothing, 
+                        ef = nothing, 
+                        gf = nothing,
+                        # ndata = Dict{String, Any}(), 
+                        # edata = Dict{String, Any}(),
+                        # gdata = Dict{String, Any}()
+                        )
+
+    @assert graph_type ∈ [:coo, :adjmat] "Invalid graph_type $graph_type requested"
+    
+    if graph_type == :coo
+        graph, num_nodes, num_edges = convert_to_coo(graph; num_nodes)
+    else graph_type == :adjmat
+        graph, num_nodes, num_edges = convert_to_adjmat(graph)
+    end
+
+    ## I would like to have dict data store, but currently this 
+    ## doesn't play well with zygote due to 
+    ## https://github.com/FluxML/Zygote.jl/issues/717    
+    # ndata["x"] = nf
+    # edata["e"] = ef
+    # gdata["g"] = gf
+    
+
+    FeaturedGraph(graph, num_nodes, num_edges, nf, ef, gf)
+end
+
+FeaturedGraph(s::AbstractVector, t::AbstractVector; kws...) = FeaturedGraph((s,t); kws...)
+FeaturedGraph(g::AbstractGraph; kws...) = FeaturedGraph(adjacency_matrix(g, dir=:out); kws...)
+
+function FeaturedGraph(fg::FeaturedGraph; 
+                # ndata=copy(fg.ndata), edata=copy(fg.edata), gdata=copy(fg.gdata), # copy keeps the refs to old data 
+                nf=node_feature(fg), ef=edge_feature(fg), gf=global_feature(fg))
+    
+    FeaturedGraph(fg.graph; 
+                #   ndata, edata, gdata, 
+                  nf, ef, gf)
+end
+
+@functor FeaturedGraph
+
+"""
+    edge_index(fg::FeaturedGraph)
+
+Return a tuple containing two vectors, respectively containing the source and target 
+nodes of the edges in the graph `fg`.
+
+```julia
+s, t = edge_index(fg)
+```
+"""
+edge_index(fg::FeaturedGraph{<:COO_T}) = fg.graph
+
+LightGraphs.edges(fg::FeaturedGraph{<:COO_T}) = zip(edge_index(fg)...)
+
+LightGraphs.edgetype(fg::FeaturedGraph{<:COO_T}) = Tuple{Int, Int}
+
+function LightGraphs.has_edge(fg::FeaturedGraph{<:COO_T}, i::Integer, j::Integer)
+    s, t = edge_index(fg)
+    return any((s .== i) .& (t .== j))
+end
+
+LightGraphs.nv(fg::FeaturedGraph) = fg.num_nodes
+LightGraphs.ne(fg::FeaturedGraph) = fg.num_edges
+LightGraphs.has_vertex(fg::FeaturedGraph, i::Int) = i in 1:fg.num_nodes
+LightGraphs.vertices(fg::FeaturedGraph) = 1:fg.num_nodes
+
+function LightGraphs.outneighbors(fg::FeaturedGraph{<:COO_T}, i::Integer)
+    s, t = edge_index(fg)
+    return t[s .== i]
+end
+
+function LightGraphs.inneighbors(fg::FeaturedGraph{<:COO_T}, i::Integer)
+    s, t = edge_index(fg)
+    return s[t .== i]
+end
+
+LightGraphs.is_directed(::FeaturedGraph) = true
+LightGraphs.is_directed(::Type{FeaturedGraph}) = true
+
+function adjacency_list(fg::FeaturedGraph; dir=:out)
+    @assert dir ∈ [:out, :in]
+    fneighs = dir == :out ? outneighbors : inneighbors
+    return [fneighs(fg, i) for i in 1:fg.num_nodes]
+end
+
+# TODO return sparse matrix
+function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:COO_T}, T::DataType=Int; dir=:out)
+    # TODO dir=:both
+    s, t = edge_index(fg)
+    n = fg.num_nodes
+    adj_mat = fill!(similar(s, T, (n, n)), 0)
+    adj_mat[s .+ n .* (t .- 1)] .= 1 # exploiting linear indexing
+    return dir == :out ? adj_mat : adj_mat'
+end
+
+function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:ADJMAT_T}, T::DataType=eltype(fg.graph); dir=:out)
+    @assert dir == :out
+    A = fg.graph 
+    if T != eltype(A)
+        return T.(A)
+    else
+        return A
+    end
+end
+
+function LightGraphs.degree(fg::FeaturedGraph{<:COO_T}; dir=:out)
+    s, t = edge_index(fg)
+    degs = fill!(similar(s, eltype(s), fg.num_nodes), 0)
+    o = fill!(similar(s, eltype(s), fg.num_edges), 1)
+    if dir ∈ [:out, :both]
+        NNlib.scatter!(+, degs, o, s)
+    end
+    if dir ∈ [:in, :both]
+        NNlib.scatter!(+, degs, o, t)
+    end
+    return degs
+end
+
+# node_feature(fg::FeaturedGraph) = fg.ndata["x"]
+# edge_feature(fg::FeaturedGraph) = fg.edata["e"]
+# global_feature(fg::FeaturedGraph) = fg.gdata["g"]
+
+node_feature(fg::FeaturedGraph) = fg.nf
+edge_feature(fg::FeaturedGraph) = fg.ef
+global_feature(fg::FeaturedGraph) = fg.gf
+
+# function Base.getproperty(fg::FeaturedGraph, sym::Symbol)
+#     if sym === :nf
+#         return fg.ndata["x"]
+#     elseif sym === :ef
+#         return fg.edata["e"]
+#     elseif sym === :gf
+#         return fg.gdata["g"]
+#     else # fallback to getfield
+#         return getfield(fg, sym)
+#     end
+# end
+
+function LightGraphs.laplacian_matrix(fg::FeaturedGraph, T::DataType=Int; dir::Symbol=:out)
+    A = adjacency_matrix(fg, T; dir=dir)
+    D = Diagonal(vec(sum(A; dims=2)))
+    return D - A
+end
+
+"""
+    normalized_laplacian(fg, T=Float32; selfloop=false, dir=:out)
+
+Normalized Laplacian matrix of graph `g`.
+
+# Arguments
+
+- `fg`: A `FeaturedGraph`.
+- `T`: result element type of degree vector; default `Float32`.
+- `selfloop`: adding self loop while calculating the matrix.
+- `dir`: the edge directionality considered (:out, :in, :both).
+"""
+function normalized_laplacian(fg::FeaturedGraph, T::DataType=Float32; selfloop::Bool=false, dir::Symbol=:out)
+    A = adjacency_matrix(fg, T; dir=dir)
+    selfloop && (A += I)
+    degs = vec(sum(A; dims=2))
+    inv_sqrtD = Diagonal(inv.(sqrt.(degs)))
+    return I - inv_sqrtD * A * inv_sqrtD
+end
+
+@doc raw"""
+    scaled_laplacian(g[, T]; dir=:out)
+
+Scaled Laplacian matrix of graph `g`,
+defined as ``\hat{L} = \frac{2}{\lambda_{max}} L - I`` where ``L`` is the normalized Laplacian matrix.
+
+# Arguments
+
+- `g`: should be a adjacency matrix, `FeaturedGraph`, `SimpleGraph`, `SimpleDiGraph` (from LightGraphs) or `SimpleWeightedGraph`, `SimpleWeightedDiGraph` (from SimpleWeightedGraphs).
+- `T`: result element type of degree vector; default is the element type of `g` (optional).
+- `dir`: the edge directionality considered (:out, :in, :both).
+"""
+function scaled_laplacian(fg::FeaturedGraph, T::DataType=Float32; dir=:out)
+    A = adjacency_matrix(fg, T; dir=dir)
+    @assert issymmetric(A) "scaled_laplacian only works with symmetric matrices"
+    E = eigen(Symmetric(A)).values
+    degs = vec(sum(A; dims=2))
+    inv_sqrtD = Diagonal(inv.(sqrt.(degs)))
+    Lnorm = I - inv_sqrtD * A * inv_sqrtD
+    return  2 / maximum(E) * Lnorm - I
+end
+
+function add_self_loops(fg::FeaturedGraph{<:COO_T})
+    s, t = edge_index(fg)
+    @assert edge_feature(fg) === nothing
+    mask_old_loops = s .!= t
+    s = s[mask_old_loops]
+    t = t[mask_old_loops]
+    n = fg.num_nodes
+    nodes = convert(typeof(s), [1:n;])
+    s = [s; nodes]
+    t = [t; nodes]
+    FeaturedGraph(s, t, nf=node_feature(fg), ef=edge_feature(fg), gf=global_feature(fg))
+end
+
+@non_differentiable normalized_laplacian(x...)
+@non_differentiable scaled_laplacian(x...)
+@non_differentiable adjacency_matrix(x...)
+@non_differentiable adjacency_list(x...)
+@non_differentiable degree(x...)
+@non_differentiable add_self_loops(x...)
+
+# # delete when https://github.com/JuliaDiff/ChainRules.jl/pull/472 is merged
+# function ChainRulesCore.rrule(::typeof(copy), x)
+#     copy_pullback(ȳ) = (NoTangent(), ȳ)
+#     return copy(x), copy_pullback
+# end
\ No newline at end of file
diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
new file mode 100644
index 000000000..75e672e21
--- /dev/null
+++ b/src/graph_conversions.jl
@@ -0,0 +1,60 @@
+### CONVERT_TO_COO REPRESENTATION ########
+
+function convert_to_coo(graph::COO_T; num_nodes=nothing)
+    s, t = graph   
+    num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
+    @assert length(s) == length(t)
+    @assert min(minimum(s), minimum(t)) >= 1 
+    @assert max(maximum(s), maximum(t)) <= num_nodes 
+
+    num_edges = length(s)
+    return graph, num_nodes, num_edges
+end
+
+function convert_to_coo(adj_mat::ADJMAT_T; dir=:out, num_nodes=nothing)
+    @assert dir ∈ [:out, :in]
+    num_nodes = size(adj_mat, 1)
+    @assert num_nodes == size(adj_mat, 2)
+    @assert all(x -> (x == 1) || (x == 0), adj_mat)
+    num_edges = round(Int, sum(adj_mat))
+    s = zeros(Int, num_edges)
+    t = zeros(Int, num_edges)
+    e = 0
+    for j in 1:num_nodes
+        for i in 1:num_nodes
+            if adj_mat[i, j] == 1
+                e += 1
+                s[e] = i
+                t[e] = j
+            end
+        end
+    end
+    @assert e == num_edges
+    if dir == :in
+        s, t = t, s
+    end
+    return (s, t), num_nodes, num_edges
+end
+
+function convert_to_coo(adj_list::ADJLIST_T; dir=:out, num_nodes=nothing)
+    @assert dir ∈ [:out, :in]
+    num_nodes = length(adj_list)
+    num_edges = sum(length.(adj_list))
+    s = zeros(Int, num_edges)
+    t = zeros(Int, num_edges)
+    e = 0
+    for i in 1:num_nodes
+        for j in adj_list[i]
+            e += 1
+            s[e] = i
+            t[e] = j 
+        end
+    end
+    @assert e == num_edges
+    if dir == :in
+        s, t = t, s
+    end
+    (s, t), num_nodes, num_edges
+end
+
+########################################################################
diff --git a/src/utils.jl b/src/utils.jl
index 9a59b563f..ea222a31c 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -63,17 +63,3 @@ edge_index_table(fg::FeaturedGraph) = edge_index_table(fg.graph, fg.directed)
 
 Zygote.@nograd edge_index_table
 
-### TODO move these to GraphSignals ######
-import GraphSignals: FeaturedGraph
-
-function FeaturedGraph(fg::FeaturedGraph; 
-                        nf=node_feature(fg), 
-                        ef=edge_feature(fg), 
-                        gf=global_feature(fg))
-
-    return FeaturedGraph(graph(fg); nf, ef, gf)
-end
-
-function check_num_nodes(fg::FeaturedGraph, x::AbstractArray)
-    @assert nv(fg) == size(x, ndims(x))    
-end
diff --git a/test/cuda/featured_graph.jl b/test/cuda/featured_graph.jl
new file mode 100644
index 000000000..c8daea441
--- /dev/null
+++ b/test/cuda/featured_graph.jl
@@ -0,0 +1,36 @@
+@testset "featured graph" begin
+    s = [1,1,2,3,4,5,5,5]
+    t = [2,5,3,2,1,4,3,1]
+    s, t = [s; t], [t; s]  #symmetrize
+    fg = FeaturedGraph(s, t) 
+    fg_gpu = fg |> gpu
+        
+    @testset "functor" begin
+        s_gpu, t_gpu = edge_index(fg_gpu)
+        @test s_gpu isa CuVector{Int}
+        @test Array(s_gpu) == s
+        @test t_gpu isa CuVector{Int}
+        @test Array(t_gpu) == t
+    end
+
+    @testset "adjacency_matrix" begin
+        mat = adjacency_matrix(fg)
+        mat_gpu = adjacency_matrix(fg_gpu)
+        @test mat_gpu isa CuMatrix{Int}
+    end
+
+    @testset "normalized_laplacian" begin
+        mat = normalized_laplacian(fg)
+        mat_gpu = normalized_laplacian(fg_gpu)
+        @test mat_gpu isa CuMatrix{Float32}
+    end
+
+    @testset "scaled_laplacian" begin
+        @test_broken begin
+            mat = scaled_laplacian(fg)
+            mat_gpu = scaled_laplacian(fg_gpu)
+            @test mat_gpu isa CuMatrix{Float32}
+            true
+        end
+    end
+end
\ No newline at end of file
diff --git a/test/featured_graph.jl b/test/featured_graph.jl
new file mode 100644
index 000000000..66e193a39
--- /dev/null
+++ b/test/featured_graph.jl
@@ -0,0 +1,81 @@
+@testset "FeaturedGraph" begin
+    @testset "symmetric graph" begin
+        u = [1, 2, 3, 4, 2, 3, 4, 1]
+        v = [2, 3, 4, 1, 1, 2, 3, 4]
+        adj_mat =  [0  1  0  1
+                    1  0  1  0
+                    0  1  0  1
+                    1  0  1  0]
+        adj_list_out =  [[2,4], [3,1], [4,2], [1,3]]
+        adj_list_in =  [[4,2], [1,3], [2,4], [3,1]]
+
+        # core functionality
+        fg = FeaturedGraph(u, v)
+        @test fg.num_edges == 8
+        @test fg.num_nodes == 4
+        @test collect(edges(fg)) == collect(zip(u, v))
+        @test sort(outneighbors(fg, 1)) == [2, 4] 
+        @test sort(inneighbors(fg, 1)) == [2, 4] 
+        @test is_directed(fg) == true
+
+        # adjacency
+        @test adjacency_matrix(fg) == adj_mat
+        @test adjacency_matrix(fg; dir=:in) == adj_mat
+        @test adjacency_matrix(fg; dir=:out) == adj_mat
+        @test adjacency_list(fg; dir=:in) == adj_list_in
+        @test adjacency_list(fg; dir=:out) == adj_list_out
+
+        @testset "constructors" begin
+            fg = FeaturedGraph(adj_mat)
+            adjacency_matrix(fg; dir=:out) == adj_mat
+            adjacency_matrix(fg; dir=:in) == adj_mat
+        end 
+
+        @testset "degree" begin
+            fg = FeaturedGraph(adj_mat)
+            @test degree(fg, dir=:out) == vec(sum(adj_mat, dims=2))
+            @test degree(fg, dir=:in) == vec(sum(adj_mat, dims=1))
+        end
+    end
+
+    @testset "asymmetric graph" begin
+        u = [1, 2, 3, 4]
+        v = [2, 3, 4, 1]
+        adj_mat_out =  [0  1  0  0
+                        0  0  1  0
+                        0  0  0  1
+                        1  0  0  0]
+        adj_list_out =  [[2], [3], [4], [1]]
+
+
+        adj_mat_in =   [0  0  0  1
+                        1  0  0  0
+                        0  1  0  0
+                        0  0  1  0]
+        adj_list_in =  [[4], [1], [2], [3]]
+
+        # core functionality
+        fg = FeaturedGraph(u, v)
+        @test fg.num_edges == 4
+        @test fg.num_nodes == 4
+        @test collect(edges(fg)) == collect(zip(u, v))
+        @test sort(outneighbors(fg, 1)) == [2] 
+        @test sort(inneighbors(fg, 1)) == [4] 
+        @test is_directed(fg) == true
+
+        # adjacency
+        @test adjacency_matrix(fg) ==  adj_mat_out
+        @test adjacency_list(fg) ==  adj_list_out
+        @test adjacency_matrix(fg, dir=:out) ==  adj_mat_out
+        @test adjacency_list(fg, dir=:out) ==  adj_list_out
+        @test adjacency_matrix(fg, dir=:in) ==  adj_mat_in
+        @test adjacency_list(fg, dir=:in) ==  adj_list_in
+
+        @testset "degree" begin
+            fg = FeaturedGraph(adj_mat_out)
+            @test degree(fg, dir=:out) == vec(sum(adj_mat_out, dims=2))
+            @test degree(fg, dir=:in) == vec(sum(adj_mat_out, dims=1))
+        end
+    end
+
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 46f335f15..f7dcbd11c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,27 +3,27 @@ using GeometricFlux.Datasets
 using Flux
 using Flux: @functor
 using FillArrays
-using GraphSignals
-using LightGraphs: SimpleGraph, SimpleDiGraph, add_edge!, nv, ne
 using LinearAlgebra
 using NNlib
-using SparseArrays: SparseMatrixCSC
+using LightGraphs
 using Statistics: mean
 using Zygote
 using Test
 
 cuda_tests = [
+    # "cuda/featured_graph",
     # "cuda/conv",
     # "cuda/msgpass",
 ]
 
 tests = [
-    "layers/gn",
-    "layers/msgpass",
-    "layers/conv",
-    "layers/pool",
-    "layers/misc",
-    "models",
+    "featured_graph",
+    # "layers/gn",
+    # "layers/msgpass",
+    # "layers/conv",
+    # "layers/pool",
+    # "layers/misc",
+    # "models",
 ]
 
 if Flux.use_cuda[]

From b253a852169a2f39098b0e4daaf685409e7465ec Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Sat, 31 Jul 2021 09:46:08 +0200
Subject: [PATCH 02/15] some fixes

---
 src/GeometricFlux.jl   |  2 +-
 src/featuredgraph.jl   |  2 ++
 src/utils.jl           | 31 +++++++++++--------------------
 test/layers/conv.jl    | 20 +++++++++-----------
 test/layers/gn.jl      |  8 ++++----
 test/layers/misc.jl    |  2 +-
 test/layers/msgpass.jl |  6 +++---
 test/runtests.jl       | 12 ++++++------
 8 files changed, 37 insertions(+), 46 deletions(-)

diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
index 79566e67c..5ca1493ec 100644
--- a/src/GeometricFlux.jl
+++ b/src/GeometricFlux.jl
@@ -16,7 +16,7 @@ using LightGraphs: AbstractGraph, outneighbors, inneighbors, is_directed, ne, nv
 export
     # featured_graph
     FeaturedGraph,
-    edge_index,
+    graph, edge_index,
     node_feature, edge_feature, global_feature,
     adjacency_list, normalized_laplacian, scaled_laplacian,
 
diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
index 3a10487c2..88ff32305 100644
--- a/src/featuredgraph.jl
+++ b/src/featuredgraph.jl
@@ -88,6 +88,8 @@ s, t = edge_index(fg)
 """
 edge_index(fg::FeaturedGraph{<:COO_T}) = fg.graph
 
+graph(fg::FeaturedGraph) = fg.graph
+
 LightGraphs.edges(fg::FeaturedGraph{<:COO_T}) = zip(edge_index(fg)...)
 
 LightGraphs.edgetype(fg::FeaturedGraph{<:COO_T}) = Tuple{Int, Int}
diff --git a/src/utils.jl b/src/utils.jl
index ea222a31c..36ae9d2f8 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -21,31 +21,19 @@ Zygote.@nograd function generate_cluster(M::AbstractArray{T,N}, accu_edge) where
 end
 
 """
-    edge_index_table(adj[, directed])
+    edge_index_table(adj)
 
 Generate a mapping from vertex pair (i, j) to edge index. The edge indecies are determined by
 the sorted vertex indecies.
 """
-function edge_index_table(adj::AbstractVector{<:AbstractVector{<:Integer}}, directed::Bool=is_directed(adj))
+function edge_index_table(adj::AbstractVector{<:AbstractVector{<:Integer}})
     table = Dict{Tuple{UInt32,UInt32},UInt64}()
     e = one(UInt64)
-    if directed
-        for (i, js) = enumerate(adj)
-            js = sort(js)
-            for j = js
-                table[(i, j)] = e
-                e += one(UInt64)
-            end
-        end
-    else
-        for (i, js) = enumerate(adj)
-            js = sort(js)
-            js = js[i .≤ js]
-            for j = js
-                table[(i, j)] = e
-                table[(j, i)] = e
-                e += one(UInt64)
-            end
+    for (i, js) = enumerate(adj)
+        js = sort(js)
+        for j = js
+            table[(i, j)] = e
+            e += one(UInt64)
         end
     end
     table
@@ -59,7 +47,10 @@ function edge_index_table(vpair::AbstractVector{<:Tuple})
     table
 end
 
-edge_index_table(fg::FeaturedGraph) = edge_index_table(fg.graph, fg.directed)
+edge_index_table(fg::FeaturedGraph) = edge_index_table(fg.graph)
 
 Zygote.@nograd edge_index_table
 
+function check_num_nodes(fg::FeaturedGraph, x::AbstractArray)
+    @assert nv(fg) == size(x, ndims(x))    
+end
\ No newline at end of file
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index 3d7131b0c..68ecb1ad4 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -25,7 +25,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             gc = GCNConv(fg, in_channel=>out_channel)
             @test size(gc.weight) == (out_channel, in_channel)
             @test size(gc.bias) == (out_channel,)
-            @test graph(gc.fg) === adj
+            @test adjacency_matrix(gc.fg) == adj
 
             Y = gc(X)
             @test size(Y) == (out_channel, N)
@@ -46,8 +46,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             gc = GCNConv(in_channel=>out_channel)
             @test size(gc.weight) == (out_channel, in_channel)
             @test size(gc.bias) == (out_channel,)
-            @test !has_graph(gc.fg)
-
+            
             fg = FeaturedGraph(adj, nf=X)
             fg_ = gc(fg)
             @test size(node_feature(fg_)) == (out_channel, N)
@@ -59,7 +58,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             @test size(node_feature(fgt_)) == (out_channel, N)
 
             g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
-            @test size(g[].nf) == size(X)
+            @test size(g.nf) == size(X)
 
             g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
             @test size(g.weight) == size(gc.weight)
@@ -81,7 +80,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             cc = ChebConv(fg, in_channel=>out_channel, k)
             @test size(cc.weight) == (out_channel, in_channel, k)
             @test size(cc.bias) == (out_channel,)
-            @test graph(cc.fg) === adj
+            @test adjacency_matrix(cc.fg) == adj
             @test cc.k == k
             
             Y = cc(X)
@@ -103,7 +102,6 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             cc = ChebConv(in_channel=>out_channel, k)
             @test size(cc.weight) == (out_channel, in_channel, k)
             @test size(cc.bias) == (out_channel,)
-            @test !has_graph(cc.fg)
             @test cc.k == k
             
             fg = FeaturedGraph(adj, nf=X)
@@ -117,7 +115,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             @test size(node_feature(fgt_)) == (out_channel, N)
 
             g = Zygote.gradient(x -> sum(node_feature(cc(x))), fg)[1]
-            @test size(g[].nf) == size(X)
+            @test size(g.nf) == size(X)
 
             g = Zygote.gradient(model -> sum(node_feature(model(fg))), cc)[1]
             @test size(g.weight) == size(cc.weight)
@@ -173,7 +171,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             @test size(node_feature(fgt_)) == (out_channel, N)
 
             g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
-            @test size(g[].nf) == size(X)
+            @test size(g.nf) == size(X)
 
             g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
             @test size(g.weight1) == size(gc.weight1)
@@ -244,7 +242,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
                 @test size(node_feature(fgt_)) == (concat ? (out_channel*heads, N) : (out_channel, N))
 
                 g = Zygote.gradient(x -> sum(node_feature(gat(x))), fg_gat)[1]
-                @test size(g[].nf) == size(X)
+                @test size(g.nf) == size(X)
 
                 g = Zygote.gradient(model -> sum(node_feature(model(fg_gat))), gat)[1]
                 @test size(g.weight) == size(gat.weight)
@@ -298,7 +296,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             @test size(node_feature(fgt_)) == (out_channel, N)
 
             g = Zygote.gradient(x -> sum(node_feature(ggc(x))), fg)[1]
-            @test size(g[].nf) == size(X)
+            @test size(g.nf) == size(X)
 
             g = Zygote.gradient(model -> sum(node_feature(model(fg))), ggc)[1]
             @test size(g.weight) == size(ggc.weight)
@@ -341,7 +339,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             @test size(node_feature(fgt_)) == (out_channel, N)
 
             g = Zygote.gradient(x -> sum(node_feature(ec(x))), fg)[1]
-            @test size(g[].nf) == size(X)
+            @test size(g.nf) == size(X)
 
             g = Zygote.gradient(model -> sum(node_feature(model(fg))), ec)[1]
             @test size(g.nn.weight) == size(ec.nn.weight)
diff --git a/test/layers/gn.jl b/test/layers/gn.jl
index 0c32ec1f9..a8b055e0c 100644
--- a/test/layers/gn.jl
+++ b/test/layers/gn.jl
@@ -27,7 +27,7 @@ u = rand(T, in_channel)
         fg = FeaturedGraph(adj, nf=V)
         fg_ = l(fg)
 
-        @test graph(fg_) === adj
+        @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (in_channel, num_V)
         @test size(edge_feature(fg_)) == (0, 2*num_E)
         @test size(global_feature(fg_)) == (0,)
@@ -40,7 +40,7 @@ u = rand(T, in_channel)
         l = NewGNLayer()
         fg_ = l(fg)
 
-        @test graph(fg_) === adj
+        @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (in_channel, num_V)
         @test size(edge_feature(fg_)) == (in_channel, 2*num_E)
         @test size(global_feature(fg_)) == (0,)
@@ -54,7 +54,7 @@ u = rand(T, in_channel)
         l = NewGNLayer()
         fg_ = l(fg)
 
-        @test graph(fg_) === adj
+        @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (in_channel, num_V)
         @test size(edge_feature(fg_)) == (out_channel, 2*num_E)
         @test size(global_feature(fg_)) == (0,)
@@ -68,7 +68,7 @@ u = rand(T, in_channel)
         l = NewGNLayer()
         fg_ = l(fg)
 
-        @test graph(fg_) === adj
+        @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (out_channel, num_V)
         @test size(edge_feature(fg_)) == (out_channel, 2*num_E)
         @test size(global_feature(fg_)) == (in_channel,)
diff --git a/test/layers/misc.jl b/test/layers/misc.jl
index 58ba83029..9f89500cd 100644
--- a/test/layers/misc.jl
+++ b/test/layers/misc.jl
@@ -16,7 +16,7 @@
                                 x -> x .+ 2.,
                                 x -> x .+ 3.)
         fg_ = layer(fg)
-        @test graph(fg_) == adj
+        @test adjacency_matrix(fg_) == adj
         @test node_feature(fg_) == nf .+ 1.
         @test edge_feature(fg_) == ef .+ 2.
         @test global_feature(fg_) == gf .+ 3.
diff --git a/test/layers/msgpass.jl b/test/layers/msgpass.jl
index 9e9c86caa..04e1096de 100644
--- a/test/layers/msgpass.jl
+++ b/test/layers/msgpass.jl
@@ -27,7 +27,7 @@ l = NewLayer(out_channel, in_channel)
     @testset "no message or update" begin
         fg_ = l(fg)
 
-        @test graph(fg_) == adj
+        @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (in_channel, num_V)
         @test size(edge_feature(fg_)) == (in_channel, 2*num_E)
         @test size(global_feature(fg_)) == (0,)
@@ -37,7 +37,7 @@ l = NewLayer(out_channel, in_channel)
     @testset "message function" begin
         fg_ = l(fg)
 
-        @test graph(fg_) == adj
+        @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (out_channel, num_V)
         @test size(edge_feature(fg_)) == (out_channel, 2*num_E)
         @test size(global_feature(fg_)) == (0,)
@@ -47,7 +47,7 @@ l = NewLayer(out_channel, in_channel)
     @testset "message and update" begin
         fg_ = l(fg)
 
-        @test graph(fg_) == adj
+        @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (out_channel, num_V)
         @test size(edge_feature(fg_)) == (out_channel, 2*num_E)
         @test size(global_feature(fg_)) == (0,)
diff --git a/test/runtests.jl b/test/runtests.jl
index f7dcbd11c..8c530c49a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,12 +18,12 @@ cuda_tests = [
 
 tests = [
     "featured_graph",
-    # "layers/gn",
-    # "layers/msgpass",
-    # "layers/conv",
-    # "layers/pool",
-    # "layers/misc",
-    # "models",
+    "layers/gn",
+    "layers/msgpass",
+    "layers/conv",
+    "layers/pool",
+    "layers/misc",
+    "models",
 ]
 
 if Flux.use_cuda[]

From f1fc8f209f707693b2cbb3a41279050aae25f626 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Sun, 1 Aug 2021 12:18:31 +0200
Subject: [PATCH 03/15] tests passing for both types

---
 src/GeometricFlux.jl     |   1 +
 src/featuredgraph.jl     |  65 +++++---
 src/graph_conversions.jl |  55 ++++++-
 src/layers/conv.jl       |   8 +-
 src/utils.jl             |  12 +-
 test/layers/conv.jl      | 327 +++++++++++++++++++--------------------
 test/layers/gn.jl        |  12 +-
 test/runtests.jl         |   6 +-
 8 files changed, 280 insertions(+), 206 deletions(-)

diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
index 5ca1493ec..1db7c6b0c 100644
--- a/src/GeometricFlux.jl
+++ b/src/GeometricFlux.jl
@@ -1,5 +1,6 @@
 module GeometricFlux
 
+using LinearAlgebra: similar
 using Statistics: mean
 using LinearAlgebra
 using CUDA
diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
index 88ff32305..2ea7dfef5 100644
--- a/src/featuredgraph.jl
+++ b/src/featuredgraph.jl
@@ -34,7 +34,8 @@ end
 
 function FeaturedGraph(graph; 
                         num_nodes = nothing, 
-                        graph_type = :coo,
+                        graph_type = :adjmat,
+                        dir = :out,
                         nf = nothing, 
                         ef = nothing, 
                         gf = nothing,
@@ -44,15 +45,15 @@ function FeaturedGraph(graph;
                         )
 
     @assert graph_type ∈ [:coo, :adjmat] "Invalid graph_type $graph_type requested"
-    
+    @assert dir ∈ [:in, :out]
     if graph_type == :coo
-        graph, num_nodes, num_edges = convert_to_coo(graph; num_nodes)
+        graph, num_nodes, num_edges = to_coo(graph; num_nodes, dir)
     else graph_type == :adjmat
-        graph, num_nodes, num_edges = convert_to_adjmat(graph)
+        graph, num_nodes, num_edges = to_adjmat(graph; dir)
     end
 
-    ## I would like to have dict data store, but currently this 
-    ## doesn't play well with zygote due to 
+    ## Possible future implementation of feature maps. 
+    ## Currently this doesn't play well with zygote due to 
     ## https://github.com/FluxML/Zygote.jl/issues/717    
     # ndata["x"] = nf
     # edata["e"] = ef
@@ -66,12 +67,11 @@ FeaturedGraph(s::AbstractVector, t::AbstractVector; kws...) = FeaturedGraph((s,t
 FeaturedGraph(g::AbstractGraph; kws...) = FeaturedGraph(adjacency_matrix(g, dir=:out); kws...)
 
 function FeaturedGraph(fg::FeaturedGraph; 
-                # ndata=copy(fg.ndata), edata=copy(fg.edata), gdata=copy(fg.gdata), # copy keeps the refs to old data 
+                num_nodes=fg.num_nodes,
                 nf=node_feature(fg), ef=edge_feature(fg), gf=global_feature(fg))
+                # ndata=copy(fg.ndata), edata=copy(fg.edata), gdata=copy(fg.gdata), # copy keeps the refs to old data 
     
-    FeaturedGraph(fg.graph; 
-                #   ndata, edata, gdata, 
-                  nf, ef, gf)
+    FeaturedGraph(fg.graph; num_nodes, nf, ef, gf) #   ndata, edata, gdata, 
 end
 
 @functor FeaturedGraph
@@ -114,6 +114,16 @@ function LightGraphs.inneighbors(fg::FeaturedGraph{<:COO_T}, i::Integer)
     return s[t .== i]
 end
 
+function LightGraphs.outneighbors(fg::FeaturedGraph{<:ADJMAT_T}, i::Integer)
+    A = graph(fg)
+    return findall(!=(0), A[i,:])
+end
+
+function LightGraphs.inneighbors(fg::FeaturedGraph{<:ADJMAT_T}, i::Integer)
+    A = graph(fg)
+    return findall(!=(0), A[:,i])
+end
+
 LightGraphs.is_directed(::FeaturedGraph) = true
 LightGraphs.is_directed(::Type{FeaturedGraph}) = true
 
@@ -125,22 +135,15 @@ end
 
 # TODO return sparse matrix
 function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:COO_T}, T::DataType=Int; dir=:out)
-    # TODO dir=:both
-    s, t = edge_index(fg)
-    n = fg.num_nodes
-    adj_mat = fill!(similar(s, T, (n, n)), 0)
-    adj_mat[s .+ n .* (t .- 1)] .= 1 # exploiting linear indexing
-    return dir == :out ? adj_mat : adj_mat'
+    A, n, m = to_adjmat(fg.graph, T, num_nodes=fg.num_nodes)
+    return dir == :out ? A : A'
 end
 
 function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:ADJMAT_T}, T::DataType=eltype(fg.graph); dir=:out)
-    @assert dir == :out
+    @assert dir ∈ [:in, :out]
     A = fg.graph 
-    if T != eltype(A)
-        return T.(A)
-    else
-        return A
-    end
+    A = T != eltype(A) ? T.(A) : A
+    return dir == :out ? A : A'
 end
 
 function LightGraphs.degree(fg::FeaturedGraph{<:COO_T}; dir=:out)
@@ -234,7 +237,21 @@ function add_self_loops(fg::FeaturedGraph{<:COO_T})
     nodes = convert(typeof(s), [1:n;])
     s = [s; nodes]
     t = [t; nodes]
-    FeaturedGraph(s, t, nf=node_feature(fg), ef=edge_feature(fg), gf=global_feature(fg))
+
+    FeaturedGraph((s, t), fg.num_nodes, fg.num_edges,
+        node_feature(fg), edge_feature(fg), global_feature(fg))
+end
+
+
+function remove_self_loops(fg::FeaturedGraph{<:COO_T})
+    s, t = edge_index(fg)
+    @assert edge_feature(fg) === nothing
+    mask_old_loops = s .!= t
+    s = s[mask_old_loops]
+    t = t[mask_old_loops]
+
+    FeaturedGraph((s, t), fg.num_nodes, fg.num_edges,
+        node_feature(fg), edge_feature(fg), global_feature(fg))
 end
 
 @non_differentiable normalized_laplacian(x...)
@@ -243,6 +260,8 @@ end
 @non_differentiable adjacency_list(x...)
 @non_differentiable degree(x...)
 @non_differentiable add_self_loops(x...)
+@non_differentiable remove_self_loops(x...)
+
 
 # # delete when https://github.com/JuliaDiff/ChainRules.jl/pull/472 is merged
 # function ChainRulesCore.rrule(::typeof(copy), x)
diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
index 75e672e21..552655ac7 100644
--- a/src/graph_conversions.jl
+++ b/src/graph_conversions.jl
@@ -1,6 +1,6 @@
 ### CONVERT_TO_COO REPRESENTATION ########
 
-function convert_to_coo(graph::COO_T; num_nodes=nothing)
+function to_coo(graph::COO_T; num_nodes=nothing)
     s, t = graph   
     num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
     @assert length(s) == length(t)
@@ -11,7 +11,7 @@ function convert_to_coo(graph::COO_T; num_nodes=nothing)
     return graph, num_nodes, num_edges
 end
 
-function convert_to_coo(adj_mat::ADJMAT_T; dir=:out, num_nodes=nothing)
+function to_coo(adj_mat::ADJMAT_T; dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
     num_nodes = size(adj_mat, 1)
     @assert num_nodes == size(adj_mat, 2)
@@ -36,7 +36,7 @@ function convert_to_coo(adj_mat::ADJMAT_T; dir=:out, num_nodes=nothing)
     return (s, t), num_nodes, num_edges
 end
 
-function convert_to_coo(adj_list::ADJLIST_T; dir=:out, num_nodes=nothing)
+function to_coo(adj_list::ADJLIST_T; dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
     num_nodes = length(adj_list)
     num_edges = sum(length.(adj_list))
@@ -57,4 +57,51 @@ function convert_to_coo(adj_list::ADJLIST_T; dir=:out, num_nodes=nothing)
     (s, t), num_nodes, num_edges
 end
 
-########################################################################
+### CONVERT TO ADJACENCY MATRIX ################
+
+function to_adjmat(adj_mat::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num_nodes=nothing)
+    @assert dir ∈ [:out, :in]
+    num_nodes = size(adj_mat, 1)
+    @assert num_nodes == size(adj_mat, 2)
+    # @assert all(x -> (x == 1) || (x == 0), adj_mat)
+    num_edges = round(Int, sum(adj_mat))
+    if dir == :in
+        adj_mat = adj_mat'
+    end
+    if T != eltype(adj_mat)
+        adj_mat = T.(adj_mat)
+    end
+    return adj_mat, num_nodes, num_edges
+end
+
+function to_adjmat(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=nothing)
+    @assert dir ∈ [:out, :in]
+    num_nodes = length(adj_list)
+    num_edges = sum(length.(adj_list))
+    @assert num_nodes > 0
+    A = similar(adj_list[1], T, (num_nodes, num_nodes))
+    if dir == :out
+        for (i, neigs) in enumerate(adj_list)
+            A[i, neigs] .= 1
+        end
+    else 
+        for (i, neigs) in enumerate(adj_list)
+            A[neigs, i] .= 1
+        end
+    end
+    A, num_nodes, num_edges
+end
+
+function to_adjmat(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
+    # Dir will be ignored since the input eindes is alwasys in source target format.
+    # The output will always be a adjmat in :out format (e.g. A[i,j] denotes from i to j)
+    s, t = eindex
+    n = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes
+    adj_mat = fill!(similar(s, T, (n, n)), 0)
+    adj_mat[s .+ n .* (t .- 1)] .= 1 # exploiting linear indexing
+    return adj_mat, n, length(s)
+end
+
+## TODO
+# to_sparse
+# to_dense
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index a8831b4e1..01958b386 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -246,10 +246,6 @@ update_batch_edge(gat::GATConv, adj, E::AbstractMatrix, X::AbstractMatrix, u) =
 
 function update_batch_edge(gat::GATConv, adj, X::AbstractMatrix)
     n = size(adj, 1)
-    # a vertex must always receive a message from itself
-    Zygote.ignore() do
-        GraphLaplacians.add_self_loop!(adj, n)
-    end
     mapreduce(i -> apply_batch_message(gat, i, adj[i], X), hcat, 1:n)
 end
 
@@ -266,8 +262,10 @@ function update_batch_vertex(gat::GATConv, M::AbstractMatrix)
 end
 
 function (gat::GATConv)(fg::FeaturedGraph, X::AbstractMatrix)
+    # a vertex must always receive a message from itself
+    adjlist = add_self_loops(adjacency_list(fg))
     check_num_nodes(fg, X)
-    _, X = propagate(gat, adjacency_list(fg), Fill(0.f0, 0, ne(fg)), X, +)
+    _, X = propagate(gat, adjlist, Fill(0.f0, 0, ne(fg)), X, +)
     X
 end
 
diff --git a/src/utils.jl b/src/utils.jl
index 36ae9d2f8..67ab2d7f2 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -53,4 +53,14 @@ Zygote.@nograd edge_index_table
 
 function check_num_nodes(fg::FeaturedGraph, x::AbstractArray)
     @assert nv(fg) == size(x, ndims(x))    
-end
\ No newline at end of file
+end
+
+function add_self_loops(adjlist::AbstractVector{<:AbstractVector})
+    anew = deepcopy(adjlist)
+    for (i, neigs) in enumerate(anew)
+        if i ∉ neigs
+            push!(neigs, i)
+        end
+    end
+    return anew
+end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index 68ecb1ad4..a0562ac96 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -18,173 +18,173 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             
 
 @testset "layer" begin
-    @testset "GCNConv" begin
-        X = rand(T, in_channel, N)
-        Xt = transpose(rand(T, N, in_channel))
-        @testset "layer with graph" begin
-            gc = GCNConv(fg, in_channel=>out_channel)
-            @test size(gc.weight) == (out_channel, in_channel)
-            @test size(gc.bias) == (out_channel,)
-            @test adjacency_matrix(gc.fg) == adj
-
-            Y = gc(X)
-            @test size(Y) == (out_channel, N)
-
-            # Test with transposed features
-            Y = gc(Xt)
-            @test size(Y) == (out_channel, N)
-
-            g = Zygote.gradient(x -> sum(gc(x)), X)[1]
-            @test size(g) == size(X)
-
-            g = Zygote.gradient(model -> sum(model(X)), gc)[1]
-            @test size(g.weight) == size(gc.weight)
-            @test size(g.bias) == size(gc.bias)
-        end
-
-        @testset "layer without graph" begin
-            gc = GCNConv(in_channel=>out_channel)
-            @test size(gc.weight) == (out_channel, in_channel)
-            @test size(gc.bias) == (out_channel,)
+    # @testset "GCNConv" begin
+    #     X = rand(T, in_channel, N)
+    #     Xt = transpose(rand(T, N, in_channel))
+    #     @testset "layer with graph" begin
+    #         gc = GCNConv(fg, in_channel=>out_channel)
+    #         @test size(gc.weight) == (out_channel, in_channel)
+    #         @test size(gc.bias) == (out_channel,)
+    #         @test adjacency_matrix(gc.fg) == adj
+
+    #         Y = gc(X)
+    #         @test size(Y) == (out_channel, N)
+
+    #         # Test with transposed features
+    #         Y = gc(Xt)
+    #         @test size(Y) == (out_channel, N)
+
+    #         g = Zygote.gradient(x -> sum(gc(x)), X)[1]
+    #         @test size(g) == size(X)
+
+    #         g = Zygote.gradient(model -> sum(model(X)), gc)[1]
+    #         @test size(g.weight) == size(gc.weight)
+    #         @test size(g.bias) == size(gc.bias)
+    #     end
+
+    #     @testset "layer without graph" begin
+    #         gc = GCNConv(in_channel=>out_channel)
+    #         @test size(gc.weight) == (out_channel, in_channel)
+    #         @test size(gc.bias) == (out_channel,)
             
-            fg = FeaturedGraph(adj, nf=X)
-            fg_ = gc(fg)
-            @test size(node_feature(fg_)) == (out_channel, N)
-            @test_throws MethodError gc(X)
+    #         fg = FeaturedGraph(adj, nf=X)
+    #         fg_ = gc(fg)
+    #         @test size(node_feature(fg_)) == (out_channel, N)
+    #         @test_throws MethodError gc(X)
             
-            # Test with transposed features
-            fgt = FeaturedGraph(adj, nf=Xt)
-            fgt_ = gc(fgt)
-            @test size(node_feature(fgt_)) == (out_channel, N)
-
-            g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
-            @test size(g.nf) == size(X)
-
-            g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
-            @test size(g.weight) == size(gc.weight)
-            @test size(g.bias) == size(gc.bias)
-        end
-
-        @testset "bias=false" begin
-            @test length(Flux.params(GCNConv(2=>3))) == 2
-            @test length(Flux.params(GCNConv(2=>3, bias=false))) == 1
-        end
-    end
-
-
-    @testset "ChebConv" begin
-        k = 6
-        X = rand(T, in_channel, N)
-        Xt = transpose(rand(T, N, in_channel))
-        @testset "layer with graph" begin
-            cc = ChebConv(fg, in_channel=>out_channel, k)
-            @test size(cc.weight) == (out_channel, in_channel, k)
-            @test size(cc.bias) == (out_channel,)
-            @test adjacency_matrix(cc.fg) == adj
-            @test cc.k == k
+    #         # Test with transposed features
+    #         fgt = FeaturedGraph(adj, nf=Xt)
+    #         fgt_ = gc(fgt)
+    #         @test size(node_feature(fgt_)) == (out_channel, N)
+
+    #         g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
+    #         @test size(g.nf) == size(X)
+
+    #         g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
+    #         @test size(g.weight) == size(gc.weight)
+    #         @test size(g.bias) == size(gc.bias)
+    #     end
+
+    #     @testset "bias=false" begin
+    #         @test length(Flux.params(GCNConv(2=>3))) == 2
+    #         @test length(Flux.params(GCNConv(2=>3, bias=false))) == 1
+    #     end
+    # end
+
+
+    # @testset "ChebConv" begin
+    #     k = 6
+    #     X = rand(T, in_channel, N)
+    #     Xt = transpose(rand(T, N, in_channel))
+    #     @testset "layer with graph" begin
+    #         cc = ChebConv(fg, in_channel=>out_channel, k)
+    #         @test size(cc.weight) == (out_channel, in_channel, k)
+    #         @test size(cc.bias) == (out_channel,)
+    #         @test adjacency_matrix(cc.fg) == adj
+    #         @test cc.k == k
             
-            Y = cc(X)
-            @test size(Y) == (out_channel, N)
-
-            # Test with transposed features
-            Y = cc(Xt)
-            @test size(Y) == (out_channel, N)
-
-            g = Zygote.gradient(x -> sum(cc(x)), X)[1]
-            @test size(g) == size(X)
-
-            g = Zygote.gradient(model -> sum(model(X)), cc)[1]
-            @test size(g.weight) == size(cc.weight)
-            @test size(g.bias) == size(cc.bias)
-        end
-
-        @testset "layer without graph" begin
-            cc = ChebConv(in_channel=>out_channel, k)
-            @test size(cc.weight) == (out_channel, in_channel, k)
-            @test size(cc.bias) == (out_channel,)
-            @test cc.k == k
+    #         Y = cc(X)
+    #         @test size(Y) == (out_channel, N)
+
+    #         # Test with transposed features
+    #         Y = cc(Xt)
+    #         @test size(Y) == (out_channel, N)
+
+    #         g = Zygote.gradient(x -> sum(cc(x)), X)[1]
+    #         @test size(g) == size(X)
+
+    #         g = Zygote.gradient(model -> sum(model(X)), cc)[1]
+    #         @test size(g.weight) == size(cc.weight)
+    #         @test size(g.bias) == size(cc.bias)
+    #     end
+
+    #     @testset "layer without graph" begin
+    #         cc = ChebConv(in_channel=>out_channel, k)
+    #         @test size(cc.weight) == (out_channel, in_channel, k)
+    #         @test size(cc.bias) == (out_channel,)
+    #         @test cc.k == k
             
-            fg = FeaturedGraph(adj, nf=X)
-            fg_ = cc(fg)
-            @test size(node_feature(fg_)) == (out_channel, N)
-            @test_throws MethodError cc(X)
-
-            # Test with transposed features
-            fgt = FeaturedGraph(adj, nf=Xt)
-            fgt_ = cc(fgt)
-            @test size(node_feature(fgt_)) == (out_channel, N)
-
-            g = Zygote.gradient(x -> sum(node_feature(cc(x))), fg)[1]
-            @test size(g.nf) == size(X)
-
-            g = Zygote.gradient(model -> sum(node_feature(model(fg))), cc)[1]
-            @test size(g.weight) == size(cc.weight)
-            @test size(g.bias) == size(cc.bias)
-        end
-
-        @testset "bias=false" begin
-            @test length(Flux.params(ChebConv(2=>3, 3))) == 2
-            @test length(Flux.params(ChebConv(2=>3, 3, bias=false))) == 1
-        end
-    end
-
-    @testset "GraphConv" begin
-        X = rand(T, in_channel, N)
-        Xt = transpose(rand(T, N, in_channel))
-        @testset "layer with graph" begin
-            gc = GraphConv(fg, in_channel=>out_channel)
-            @test adjacency_list(gc.fg) == [[2,4], [1,3], [2,4], [1,3]]
-            @test size(gc.weight1) == (out_channel, in_channel)
-            @test size(gc.weight2) == (out_channel, in_channel)
-            @test size(gc.bias) == (out_channel,)
-
-            Y = gc(X)
-            @test size(Y) == (out_channel, N)
-
-            # Test with transposed features
-            Y = gc(Xt)
-            @test size(Y) == (out_channel, N)
-
-            g = Zygote.gradient(x -> sum(gc(x)), X)[1]
-            @test size(g) == size(X)
-
-            g = Zygote.gradient(model -> sum(model(X)), gc)[1]
-            @test size(g.weight1) == size(gc.weight1)
-            @test size(g.weight2) == size(gc.weight2)
-            @test size(g.bias) == size(gc.bias)
-        end
-
-        @testset "layer without graph" begin
-            gc = GraphConv(in_channel=>out_channel)
-            @test size(gc.weight1) == (out_channel, in_channel)
-            @test size(gc.weight2) == (out_channel, in_channel)
-            @test size(gc.bias) == (out_channel,)
-
-            fg = FeaturedGraph(adj, nf=X)
-            fg_ = gc(fg)
-            @test size(node_feature(fg_)) == (out_channel, N)
-            @test_throws MethodError gc(X)
-
-            # Test with transposed features
-            fgt = FeaturedGraph(adj, nf=Xt)
-            fgt_ = gc(fgt)
-            @test size(node_feature(fgt_)) == (out_channel, N)
-
-            g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
-            @test size(g.nf) == size(X)
-
-            g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
-            @test size(g.weight1) == size(gc.weight1)
-            @test size(g.weight2) == size(gc.weight2)
-            @test size(g.bias) == size(gc.bias)
-        end
-
-
-        @testset "bias=false" begin
-            @test length(Flux.params(GraphConv(2=>3))) == 3
-            @test length(Flux.params(GraphConv(2=>3, bias=false))) == 2
-        end
-    end
+    #         fg = FeaturedGraph(adj, nf=X)
+    #         fg_ = cc(fg)
+    #         @test size(node_feature(fg_)) == (out_channel, N)
+    #         @test_throws MethodError cc(X)
+
+    #         # Test with transposed features
+    #         fgt = FeaturedGraph(adj, nf=Xt)
+    #         fgt_ = cc(fgt)
+    #         @test size(node_feature(fgt_)) == (out_channel, N)
+
+    #         g = Zygote.gradient(x -> sum(node_feature(cc(x))), fg)[1]
+    #         @test size(g.nf) == size(X)
+
+    #         g = Zygote.gradient(model -> sum(node_feature(model(fg))), cc)[1]
+    #         @test size(g.weight) == size(cc.weight)
+    #         @test size(g.bias) == size(cc.bias)
+    #     end
+
+    #     @testset "bias=false" begin
+    #         @test length(Flux.params(ChebConv(2=>3, 3))) == 2
+    #         @test length(Flux.params(ChebConv(2=>3, 3, bias=false))) == 1
+    #     end
+    # end
+
+    # @testset "GraphConv" begin
+    #     X = rand(T, in_channel, N)
+    #     Xt = transpose(rand(T, N, in_channel))
+    #     @testset "layer with graph" begin
+    #         gc = GraphConv(fg, in_channel=>out_channel)
+    #         @test adjacency_list(gc.fg) == [[2,4], [1,3], [2,4], [1,3]]
+    #         @test size(gc.weight1) == (out_channel, in_channel)
+    #         @test size(gc.weight2) == (out_channel, in_channel)
+    #         @test size(gc.bias) == (out_channel,)
+
+    #         Y = gc(X)
+    #         @test size(Y) == (out_channel, N)
+
+    #         # Test with transposed features
+    #         Y = gc(Xt)
+    #         @test size(Y) == (out_channel, N)
+
+    #         g = Zygote.gradient(x -> sum(gc(x)), X)[1]
+    #         @test size(g) == size(X)
+
+    #         g = Zygote.gradient(model -> sum(model(X)), gc)[1]
+    #         @test size(g.weight1) == size(gc.weight1)
+    #         @test size(g.weight2) == size(gc.weight2)
+    #         @test size(g.bias) == size(gc.bias)
+    #     end
+
+    #     @testset "layer without graph" begin
+    #         gc = GraphConv(in_channel=>out_channel)
+    #         @test size(gc.weight1) == (out_channel, in_channel)
+    #         @test size(gc.weight2) == (out_channel, in_channel)
+    #         @test size(gc.bias) == (out_channel,)
+
+    #         fg = FeaturedGraph(adj, nf=X)
+    #         fg_ = gc(fg)
+    #         @test size(node_feature(fg_)) == (out_channel, N)
+    #         @test_throws MethodError gc(X)
+
+    #         # Test with transposed features
+    #         fgt = FeaturedGraph(adj, nf=Xt)
+    #         fgt_ = gc(fgt)
+    #         @test size(node_feature(fgt_)) == (out_channel, N)
+
+    #         g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
+    #         @test size(g.nf) == size(X)
+
+    #         g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
+    #         @test size(g.weight1) == size(gc.weight1)
+    #         @test size(g.weight2) == size(gc.weight2)
+    #         @test size(g.bias) == size(gc.bias)
+    #     end
+
+
+    #     @testset "bias=false" begin
+    #         @test length(Flux.params(GraphConv(2=>3))) == 3
+    #         @test length(Flux.params(GraphConv(2=>3, bias=false))) == 2
+    #     end
+    # end
 
     @testset "GATConv" begin
 
@@ -230,7 +230,6 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
                 @test size(gat.weight) == (out_channel * heads, in_channel)
                 @test size(gat.bias) == (out_channel * heads,)
                 @test size(gat.a) == (2*out_channel, heads)
-
                 fg_ = gat(fg_gat)
                 Y = node_feature(fg_)
                 @test size(Y) == (concat ? (out_channel*heads, N) : (out_channel, N))
diff --git a/test/layers/gn.jl b/test/layers/gn.jl
index a8b055e0c..89c80583d 100644
--- a/test/layers/gn.jl
+++ b/test/layers/gn.jl
@@ -29,35 +29,35 @@ u = rand(T, in_channel)
 
         @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (in_channel, num_V)
-        @test size(edge_feature(fg_)) == (0, 2*num_E)
-        @test size(global_feature(fg_)) == (0,)
+        @test all(edge_feature(fg_) .== fill(nothing, 2*num_E))
+        @test global_feature(fg_) === nothing
     end
 
     @testset "with neighbor aggregation" begin
         (l::NewGNLayer)(fg) = GeometricFlux.propagate(l, fg, +)
 
-        fg = FeaturedGraph(adj, nf=V, ef=E, gf=zeros(0))
+        fg = FeaturedGraph(adj, nf=V, ef=E, gf=nothing)
         l = NewGNLayer()
         fg_ = l(fg)
 
         @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (in_channel, num_V)
         @test size(edge_feature(fg_)) == (in_channel, 2*num_E)
-        @test size(global_feature(fg_)) == (0,)
+        @test global_feature(fg_) === nothing
     end
 
     GeometricFlux.update_edge(l::NewGNLayer, e, vi, vj, u) = rand(T, out_channel)
     @testset "update edge with neighbor aggregation" begin
         (l::NewGNLayer)(fg) = GeometricFlux.propagate(l, fg, +)
 
-        fg = FeaturedGraph(adj, nf=V, ef=E, gf=zeros(0))
+        fg = FeaturedGraph(adj, nf=V, ef=E, gf=nothing)
         l = NewGNLayer()
         fg_ = l(fg)
 
         @test adjacency_matrix(fg_) == adj
         @test size(node_feature(fg_)) == (in_channel, num_V)
         @test size(edge_feature(fg_)) == (out_channel, 2*num_E)
-        @test size(global_feature(fg_)) == (0,)
+        @test global_feature(fg_) === nothing
     end
 
     GeometricFlux.update_vertex(l::NewGNLayer, ē, vi, u) = rand(T, out_channel)
diff --git a/test/runtests.jl b/test/runtests.jl
index 8c530c49a..d2a5c412c 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -17,9 +17,9 @@ cuda_tests = [
 ]
 
 tests = [
-    "featured_graph",
-    "layers/gn",
-    "layers/msgpass",
+    # "featured_graph",
+    # "layers/gn",
+    # "layers/msgpass",
     "layers/conv",
     "layers/pool",
     "layers/misc",

From 269dcb4333bc9139893556da91641e81eece6b80 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Sun, 1 Aug 2021 18:52:26 +0200
Subject: [PATCH 04/15] fix cuda test

---
 src/featuredgraph.jl        |  45 +++--
 src/graph_conversions.jl    |  12 +-
 test/cuda/conv.jl           |   2 +-
 test/cuda/featured_graph.jl |   7 +-
 test/cuda/msgpass.jl        |   2 +-
 test/featured_graph.jl      |  22 +--
 test/layers/conv.jl         | 342 ++++++++++++++++++------------------
 test/layers/gn.jl           |  62 +++----
 test/layers/misc.jl         |   2 +-
 test/layers/msgpass.jl      |  62 ++++---
 test/models.jl              |  10 +-
 test/runtests.jl            |  11 +-
 12 files changed, 303 insertions(+), 276 deletions(-)

diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
index 2ea7dfef5..e21235fd7 100644
--- a/src/featuredgraph.jl
+++ b/src/featuredgraph.jl
@@ -17,6 +17,8 @@ struct NullGraph <: AbstractFeaturedGraph end
 const COO_T = Tuple{T, T} where T <: AbstractVector
 const ADJMAT_T = AbstractMatrix
 const ADJLIST_T = AbstractVector{T} where T <: AbstractVector
+# const SPARSE_T = ...  Support sparse adjacency matrices in the future
+
 
 struct FeaturedGraph{T<:Union{COO_T,ADJMAT_T}} <: AbstractFeaturedGraph
     graph::T
@@ -31,8 +33,9 @@ struct FeaturedGraph{T<:Union{COO_T,ADJMAT_T}} <: AbstractFeaturedGraph
     # gdata::Dict{String, Any}
 end
 
+@functor FeaturedGraph
 
-function FeaturedGraph(graph; 
+function FeaturedGraph(g; 
                         num_nodes = nothing, 
                         graph_type = :adjmat,
                         dir = :out,
@@ -47,9 +50,9 @@ function FeaturedGraph(graph;
     @assert graph_type ∈ [:coo, :adjmat] "Invalid graph_type $graph_type requested"
     @assert dir ∈ [:in, :out]
     if graph_type == :coo
-        graph, num_nodes, num_edges = to_coo(graph; num_nodes, dir)
+        g, num_nodes, num_edges = to_coo(g; num_nodes, dir)
     else graph_type == :adjmat
-        graph, num_nodes, num_edges = to_adjmat(graph; dir)
+        g, num_nodes, num_edges = to_adjmat(g; dir)
     end
 
     ## Possible future implementation of feature maps. 
@@ -60,21 +63,19 @@ function FeaturedGraph(graph;
     # gdata["g"] = gf
     
 
-    FeaturedGraph(graph, num_nodes, num_edges, nf, ef, gf)
+    FeaturedGraph(g, num_nodes, num_edges, nf, ef, gf)
 end
 
 FeaturedGraph(s::AbstractVector, t::AbstractVector; kws...) = FeaturedGraph((s,t); kws...)
 FeaturedGraph(g::AbstractGraph; kws...) = FeaturedGraph(adjacency_matrix(g, dir=:out); kws...)
 
 function FeaturedGraph(fg::FeaturedGraph; 
-                num_nodes=fg.num_nodes,
                 nf=node_feature(fg), ef=edge_feature(fg), gf=global_feature(fg))
                 # ndata=copy(fg.ndata), edata=copy(fg.edata), gdata=copy(fg.gdata), # copy keeps the refs to old data 
     
-    FeaturedGraph(fg.graph; num_nodes, nf, ef, gf) #   ndata, edata, gdata, 
+    FeaturedGraph(fg.graph, fg.num_nodes, fg.num_edges, nf, ef, gf) #   ndata, edata, gdata, 
 end
 
-@functor FeaturedGraph
 
 """
     edge_index(fg::FeaturedGraph)
@@ -88,17 +89,24 @@ s, t = edge_index(fg)
 """
 edge_index(fg::FeaturedGraph{<:COO_T}) = fg.graph
 
+function edge_index(fg::FeaturedGraph{<:ADJMAT_T})
+    nz = findall(!=(0), graph(fg)) # vec of cartesian indexes
+    ntuple(i -> map(t->t[i], nz), 2)
+end
+
 graph(fg::FeaturedGraph) = fg.graph
 
-LightGraphs.edges(fg::FeaturedGraph{<:COO_T}) = zip(edge_index(fg)...)
+LightGraphs.edges(fg::FeaturedGraph) = zip(edge_index(fg)...)
 
-LightGraphs.edgetype(fg::FeaturedGraph{<:COO_T}) = Tuple{Int, Int}
+LightGraphs.edgetype(fg::FeaturedGraph) = Tuple{Int, Int}
 
 function LightGraphs.has_edge(fg::FeaturedGraph{<:COO_T}, i::Integer, j::Integer)
     s, t = edge_index(fg)
     return any((s .== i) .& (t .== j))
 end
 
+LightGraphs.has_edge(fg::FeaturedGraph{<:ADJMAT_T}, i::Integer, j::Integer) = graph(fg)[i,j] != 0
+
 LightGraphs.nv(fg::FeaturedGraph) = fg.num_nodes
 LightGraphs.ne(fg::FeaturedGraph) = fg.num_edges
 LightGraphs.has_vertex(fg::FeaturedGraph, i::Int) = i in 1:fg.num_nodes
@@ -109,16 +117,16 @@ function LightGraphs.outneighbors(fg::FeaturedGraph{<:COO_T}, i::Integer)
     return t[s .== i]
 end
 
-function LightGraphs.inneighbors(fg::FeaturedGraph{<:COO_T}, i::Integer)
-    s, t = edge_index(fg)
-    return s[t .== i]
-end
-
 function LightGraphs.outneighbors(fg::FeaturedGraph{<:ADJMAT_T}, i::Integer)
     A = graph(fg)
     return findall(!=(0), A[i,:])
 end
 
+function LightGraphs.inneighbors(fg::FeaturedGraph{<:COO_T}, i::Integer)
+    s, t = edge_index(fg)
+    return s[t .== i]
+end
+
 function LightGraphs.inneighbors(fg::FeaturedGraph{<:ADJMAT_T}, i::Integer)
     A = graph(fg)
     return findall(!=(0), A[:,i])
@@ -159,6 +167,12 @@ function LightGraphs.degree(fg::FeaturedGraph{<:COO_T}; dir=:out)
     return degs
 end
 
+function LightGraphs.degree(fg::FeaturedGraph{<:ADJMAT_T}; dir=:out)
+    @assert dir ∈ (:in, :out)
+    A = graph(fg)
+    return dir == :out ? vec(sum(A, dims=2)) : vec(sum(A, dims=1))
+end
+
 # node_feature(fg::FeaturedGraph) = fg.ndata["x"]
 # edge_feature(fg::FeaturedGraph) = fg.edata["e"]
 # global_feature(fg::FeaturedGraph) = fg.gdata["g"]
@@ -262,9 +276,8 @@ end
 @non_differentiable add_self_loops(x...)
 @non_differentiable remove_self_loops(x...)
 
-
 # # delete when https://github.com/JuliaDiff/ChainRules.jl/pull/472 is merged
 # function ChainRulesCore.rrule(::typeof(copy), x)
 #     copy_pullback(ȳ) = (NoTangent(), ȳ)
 #     return copy(x), copy_pullback
-# end
\ No newline at end of file
+# end
diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
index 552655ac7..2d3d14183 100644
--- a/src/graph_conversions.jl
+++ b/src/graph_conversions.jl
@@ -1,6 +1,6 @@
 ### CONVERT_TO_COO REPRESENTATION ########
 
-function to_coo(graph::COO_T; num_nodes=nothing)
+function to_coo(graph::COO_T; num_nodes=nothing, dir=:out)
     s, t = graph   
     num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
     @assert length(s) == length(t)
@@ -40,8 +40,9 @@ function to_coo(adj_list::ADJLIST_T; dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
     num_nodes = length(adj_list)
     num_edges = sum(length.(adj_list))
-    s = zeros(Int, num_edges)
-    t = zeros(Int, num_edges)
+    @assert num_nodes > 0
+    s = similar(adj_list[1], eltype(adj_list[1]), num_edges)
+    t = similar(adj_list[1], eltype(adj_list[1]), num_edges)
     e = 0
     for i in 1:num_nodes
         for j in adj_list[i]
@@ -93,7 +94,7 @@ function to_adjmat(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=not
 end
 
 function to_adjmat(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
-    # Dir will be ignored since the input eindes is alwasys in source target format.
+    # `dir` will be ignored since the input `eindex` is alwasys in source target format.
     # The output will always be a adjmat in :out format (e.g. A[i,j] denotes from i to j)
     s, t = eindex
     n = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes
@@ -102,6 +103,9 @@ function to_adjmat(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
     return adj_mat, n, length(s)
 end
 
+@non_differentiable to_coo(x...)
+@non_differentiable to_adjmat(x...)
+
 ## TODO
 # to_sparse
 # to_dense
diff --git a/test/cuda/conv.jl b/test/cuda/conv.jl
index 386edc94e..e6d79d098 100644
--- a/test/cuda/conv.jl
+++ b/test/cuda/conv.jl
@@ -8,7 +8,7 @@ adj = [0 1 0 1;
        0 1 0 1;
        1 0 1 0]
 
-fg = FeaturedGraph(adj)
+fg = FeaturedGraph(adj, graph_type=GRAPH_T)
 
 @testset "cuda/conv" begin
     @testset "GCNConv" begin
diff --git a/test/cuda/featured_graph.jl b/test/cuda/featured_graph.jl
index c8daea441..8e2a3fb7e 100644
--- a/test/cuda/featured_graph.jl
+++ b/test/cuda/featured_graph.jl
@@ -2,15 +2,16 @@
     s = [1,1,2,3,4,5,5,5]
     t = [2,5,3,2,1,4,3,1]
     s, t = [s; t], [t; s]  #symmetrize
-    fg = FeaturedGraph(s, t) 
+    fg = FeaturedGraph(s, t, graph_type=GRAPH_T) 
     fg_gpu = fg |> gpu
         
     @testset "functor" begin
+        s_cpu, t_cpu = edge_index(fg)
         s_gpu, t_gpu = edge_index(fg_gpu)
         @test s_gpu isa CuVector{Int}
-        @test Array(s_gpu) == s
+        @test Array(s_gpu) == s_cpu
         @test t_gpu isa CuVector{Int}
-        @test Array(t_gpu) == t
+        @test Array(t_gpu) == t_cpu
     end
 
     @testset "adjacency_matrix" begin
diff --git a/test/cuda/msgpass.jl b/test/cuda/msgpass.jl
index e372a41b5..18650dda9 100644
--- a/test/cuda/msgpass.jl
+++ b/test/cuda/msgpass.jl
@@ -20,7 +20,7 @@ GeometricFlux.message(n::NewCudaLayer, x_i, x_j, e_ij) = n.weight * x_j
 GeometricFlux.update(::NewCudaLayer, m, x) = m
 
 X = rand(T, in_channel, N) |> gpu
-fg = FeaturedGraph(adj, nf=X)
+fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
 l = NewCudaLayer(out_channel, in_channel) |> gpu
 
 @testset "cuda/msgpass" begin
diff --git a/test/featured_graph.jl b/test/featured_graph.jl
index 66e193a39..29079e3a3 100644
--- a/test/featured_graph.jl
+++ b/test/featured_graph.jl
@@ -6,14 +6,14 @@
                     1  0  1  0
                     0  1  0  1
                     1  0  1  0]
-        adj_list_out =  [[2,4], [3,1], [4,2], [1,3]]
-        adj_list_in =  [[4,2], [1,3], [2,4], [3,1]]
+        adj_list_out =  [[2,4], [1,3], [2,4], [1,3]]
+        adj_list_in =  [[2,4], [1,3], [2,4], [1,3]]
 
         # core functionality
-        fg = FeaturedGraph(u, v)
+        fg = FeaturedGraph(u, v; graph_type=GRAPH_T)
         @test fg.num_edges == 8
         @test fg.num_nodes == 4
-        @test collect(edges(fg)) == collect(zip(u, v))
+        @test collect(edges(fg)) |> sort == collect(zip(u, v)) |> sort
         @test sort(outneighbors(fg, 1)) == [2, 4] 
         @test sort(inneighbors(fg, 1)) == [2, 4] 
         @test is_directed(fg) == true
@@ -22,17 +22,17 @@
         @test adjacency_matrix(fg) == adj_mat
         @test adjacency_matrix(fg; dir=:in) == adj_mat
         @test adjacency_matrix(fg; dir=:out) == adj_mat
-        @test adjacency_list(fg; dir=:in) == adj_list_in
-        @test adjacency_list(fg; dir=:out) == adj_list_out
+        @test sort.(adjacency_list(fg; dir=:in)) == adj_list_in
+        @test sort.(adjacency_list(fg; dir=:out)) == adj_list_out
 
         @testset "constructors" begin
-            fg = FeaturedGraph(adj_mat)
+            fg = FeaturedGraph(adj_mat; graph_type=GRAPH_T)
             adjacency_matrix(fg; dir=:out) == adj_mat
             adjacency_matrix(fg; dir=:in) == adj_mat
         end 
 
         @testset "degree" begin
-            fg = FeaturedGraph(adj_mat)
+            fg = FeaturedGraph(adj_mat; graph_type=GRAPH_T)
             @test degree(fg, dir=:out) == vec(sum(adj_mat, dims=2))
             @test degree(fg, dir=:in) == vec(sum(adj_mat, dims=1))
         end
@@ -55,10 +55,10 @@
         adj_list_in =  [[4], [1], [2], [3]]
 
         # core functionality
-        fg = FeaturedGraph(u, v)
+        fg = FeaturedGraph(u, v; graph_type=GRAPH_T)
         @test fg.num_edges == 4
         @test fg.num_nodes == 4
-        @test collect(edges(fg)) == collect(zip(u, v))
+        @test collect(edges(fg)) |> sort == collect(zip(u, v)) |> sort
         @test sort(outneighbors(fg, 1)) == [2] 
         @test sort(inneighbors(fg, 1)) == [4] 
         @test is_directed(fg) == true
@@ -72,7 +72,7 @@
         @test adjacency_list(fg, dir=:in) ==  adj_list_in
 
         @testset "degree" begin
-            fg = FeaturedGraph(adj_mat_out)
+            fg = FeaturedGraph(adj_mat_out; graph_type=GRAPH_T)
             @test degree(fg, dir=:out) == vec(sum(adj_mat_out, dims=2))
             @test degree(fg, dir=:in) == vec(sum(adj_mat_out, dims=1))
         end
diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index a0562ac96..23c173bf9 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -14,177 +14,177 @@ adj_single_vertex = T[0. 0. 0. 1.;
                       0. 0. 0. 1.;
                       1. 0. 1. 0.]
 
-fg_single_vertex = FeaturedGraph(adj_single_vertex)
+fg_single_vertex = FeaturedGraph(adj_single_vertex, graph_type=GRAPH_T)
             
 
 @testset "layer" begin
-    # @testset "GCNConv" begin
-    #     X = rand(T, in_channel, N)
-    #     Xt = transpose(rand(T, N, in_channel))
-    #     @testset "layer with graph" begin
-    #         gc = GCNConv(fg, in_channel=>out_channel)
-    #         @test size(gc.weight) == (out_channel, in_channel)
-    #         @test size(gc.bias) == (out_channel,)
-    #         @test adjacency_matrix(gc.fg) == adj
-
-    #         Y = gc(X)
-    #         @test size(Y) == (out_channel, N)
-
-    #         # Test with transposed features
-    #         Y = gc(Xt)
-    #         @test size(Y) == (out_channel, N)
-
-    #         g = Zygote.gradient(x -> sum(gc(x)), X)[1]
-    #         @test size(g) == size(X)
-
-    #         g = Zygote.gradient(model -> sum(model(X)), gc)[1]
-    #         @test size(g.weight) == size(gc.weight)
-    #         @test size(g.bias) == size(gc.bias)
-    #     end
-
-    #     @testset "layer without graph" begin
-    #         gc = GCNConv(in_channel=>out_channel)
-    #         @test size(gc.weight) == (out_channel, in_channel)
-    #         @test size(gc.bias) == (out_channel,)
+    @testset "GCNConv" begin
+        X = rand(T, in_channel, N)
+        Xt = transpose(rand(T, N, in_channel))
+        @testset "layer with graph" begin
+            gc = GCNConv(fg, in_channel=>out_channel)
+            @test size(gc.weight) == (out_channel, in_channel)
+            @test size(gc.bias) == (out_channel,)
+            @test adjacency_matrix(gc.fg) == adj
+
+            Y = gc(X)
+            @test size(Y) == (out_channel, N)
+
+            # Test with transposed features
+            Y = gc(Xt)
+            @test size(Y) == (out_channel, N)
+
+            g = Zygote.gradient(x -> sum(gc(x)), X)[1]
+            @test size(g) == size(X)
+
+            g = Zygote.gradient(model -> sum(model(X)), gc)[1]
+            @test size(g.weight) == size(gc.weight)
+            @test size(g.bias) == size(gc.bias)
+        end
+
+        @testset "layer without graph" begin
+            gc = GCNConv(in_channel=>out_channel)
+            @test size(gc.weight) == (out_channel, in_channel)
+            @test size(gc.bias) == (out_channel,)
             
-    #         fg = FeaturedGraph(adj, nf=X)
-    #         fg_ = gc(fg)
-    #         @test size(node_feature(fg_)) == (out_channel, N)
-    #         @test_throws MethodError gc(X)
+            fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
+            fg_ = gc(fg)
+            @test size(node_feature(fg_)) == (out_channel, N)
+            @test_throws MethodError gc(X)
             
-    #         # Test with transposed features
-    #         fgt = FeaturedGraph(adj, nf=Xt)
-    #         fgt_ = gc(fgt)
-    #         @test size(node_feature(fgt_)) == (out_channel, N)
-
-    #         g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
-    #         @test size(g.nf) == size(X)
-
-    #         g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
-    #         @test size(g.weight) == size(gc.weight)
-    #         @test size(g.bias) == size(gc.bias)
-    #     end
-
-    #     @testset "bias=false" begin
-    #         @test length(Flux.params(GCNConv(2=>3))) == 2
-    #         @test length(Flux.params(GCNConv(2=>3, bias=false))) == 1
-    #     end
-    # end
-
-
-    # @testset "ChebConv" begin
-    #     k = 6
-    #     X = rand(T, in_channel, N)
-    #     Xt = transpose(rand(T, N, in_channel))
-    #     @testset "layer with graph" begin
-    #         cc = ChebConv(fg, in_channel=>out_channel, k)
-    #         @test size(cc.weight) == (out_channel, in_channel, k)
-    #         @test size(cc.bias) == (out_channel,)
-    #         @test adjacency_matrix(cc.fg) == adj
-    #         @test cc.k == k
+            # Test with transposed features
+            fgt = FeaturedGraph(adj, nf=Xt, graph_type=GRAPH_T)
+            fgt_ = gc(fgt)
+            @test size(node_feature(fgt_)) == (out_channel, N)
+
+            g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
+            @test size(g.nf) == size(X)
+
+            g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
+            @test size(g.weight) == size(gc.weight)
+            @test size(g.bias) == size(gc.bias)
+        end
+
+        @testset "bias=false" begin
+            @test length(Flux.params(GCNConv(2=>3))) == 2
+            @test length(Flux.params(GCNConv(2=>3, bias=false))) == 1
+        end
+    end
+
+
+    @testset "ChebConv" begin
+        k = 6
+        X = rand(T, in_channel, N)
+        Xt = transpose(rand(T, N, in_channel))
+        @testset "layer with graph" begin
+            cc = ChebConv(fg, in_channel=>out_channel, k)
+            @test size(cc.weight) == (out_channel, in_channel, k)
+            @test size(cc.bias) == (out_channel,)
+            @test adjacency_matrix(cc.fg) == adj
+            @test cc.k == k
             
-    #         Y = cc(X)
-    #         @test size(Y) == (out_channel, N)
-
-    #         # Test with transposed features
-    #         Y = cc(Xt)
-    #         @test size(Y) == (out_channel, N)
-
-    #         g = Zygote.gradient(x -> sum(cc(x)), X)[1]
-    #         @test size(g) == size(X)
-
-    #         g = Zygote.gradient(model -> sum(model(X)), cc)[1]
-    #         @test size(g.weight) == size(cc.weight)
-    #         @test size(g.bias) == size(cc.bias)
-    #     end
-
-    #     @testset "layer without graph" begin
-    #         cc = ChebConv(in_channel=>out_channel, k)
-    #         @test size(cc.weight) == (out_channel, in_channel, k)
-    #         @test size(cc.bias) == (out_channel,)
-    #         @test cc.k == k
+            Y = cc(X)
+            @test size(Y) == (out_channel, N)
+
+            # Test with transposed features
+            Y = cc(Xt)
+            @test size(Y) == (out_channel, N)
+
+            g = Zygote.gradient(x -> sum(cc(x)), X)[1]
+            @test size(g) == size(X)
+
+            g = Zygote.gradient(model -> sum(model(X)), cc)[1]
+            @test size(g.weight) == size(cc.weight)
+            @test size(g.bias) == size(cc.bias)
+        end
+
+        @testset "layer without graph" begin
+            cc = ChebConv(in_channel=>out_channel, k)
+            @test size(cc.weight) == (out_channel, in_channel, k)
+            @test size(cc.bias) == (out_channel,)
+            @test cc.k == k
             
-    #         fg = FeaturedGraph(adj, nf=X)
-    #         fg_ = cc(fg)
-    #         @test size(node_feature(fg_)) == (out_channel, N)
-    #         @test_throws MethodError cc(X)
-
-    #         # Test with transposed features
-    #         fgt = FeaturedGraph(adj, nf=Xt)
-    #         fgt_ = cc(fgt)
-    #         @test size(node_feature(fgt_)) == (out_channel, N)
-
-    #         g = Zygote.gradient(x -> sum(node_feature(cc(x))), fg)[1]
-    #         @test size(g.nf) == size(X)
-
-    #         g = Zygote.gradient(model -> sum(node_feature(model(fg))), cc)[1]
-    #         @test size(g.weight) == size(cc.weight)
-    #         @test size(g.bias) == size(cc.bias)
-    #     end
-
-    #     @testset "bias=false" begin
-    #         @test length(Flux.params(ChebConv(2=>3, 3))) == 2
-    #         @test length(Flux.params(ChebConv(2=>3, 3, bias=false))) == 1
-    #     end
-    # end
-
-    # @testset "GraphConv" begin
-    #     X = rand(T, in_channel, N)
-    #     Xt = transpose(rand(T, N, in_channel))
-    #     @testset "layer with graph" begin
-    #         gc = GraphConv(fg, in_channel=>out_channel)
-    #         @test adjacency_list(gc.fg) == [[2,4], [1,3], [2,4], [1,3]]
-    #         @test size(gc.weight1) == (out_channel, in_channel)
-    #         @test size(gc.weight2) == (out_channel, in_channel)
-    #         @test size(gc.bias) == (out_channel,)
-
-    #         Y = gc(X)
-    #         @test size(Y) == (out_channel, N)
-
-    #         # Test with transposed features
-    #         Y = gc(Xt)
-    #         @test size(Y) == (out_channel, N)
-
-    #         g = Zygote.gradient(x -> sum(gc(x)), X)[1]
-    #         @test size(g) == size(X)
-
-    #         g = Zygote.gradient(model -> sum(model(X)), gc)[1]
-    #         @test size(g.weight1) == size(gc.weight1)
-    #         @test size(g.weight2) == size(gc.weight2)
-    #         @test size(g.bias) == size(gc.bias)
-    #     end
-
-    #     @testset "layer without graph" begin
-    #         gc = GraphConv(in_channel=>out_channel)
-    #         @test size(gc.weight1) == (out_channel, in_channel)
-    #         @test size(gc.weight2) == (out_channel, in_channel)
-    #         @test size(gc.bias) == (out_channel,)
-
-    #         fg = FeaturedGraph(adj, nf=X)
-    #         fg_ = gc(fg)
-    #         @test size(node_feature(fg_)) == (out_channel, N)
-    #         @test_throws MethodError gc(X)
-
-    #         # Test with transposed features
-    #         fgt = FeaturedGraph(adj, nf=Xt)
-    #         fgt_ = gc(fgt)
-    #         @test size(node_feature(fgt_)) == (out_channel, N)
-
-    #         g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
-    #         @test size(g.nf) == size(X)
-
-    #         g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
-    #         @test size(g.weight1) == size(gc.weight1)
-    #         @test size(g.weight2) == size(gc.weight2)
-    #         @test size(g.bias) == size(gc.bias)
-    #     end
-
-
-    #     @testset "bias=false" begin
-    #         @test length(Flux.params(GraphConv(2=>3))) == 3
-    #         @test length(Flux.params(GraphConv(2=>3, bias=false))) == 2
-    #     end
-    # end
+            fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
+            fg_ = cc(fg)
+            @test size(node_feature(fg_)) == (out_channel, N)
+            @test_throws MethodError cc(X)
+
+            # Test with transposed features
+            fgt = FeaturedGraph(adj, nf=Xt, graph_type=GRAPH_T)
+            fgt_ = cc(fgt)
+            @test size(node_feature(fgt_)) == (out_channel, N)
+
+            g = Zygote.gradient(x -> sum(node_feature(cc(x))), fg)[1]
+            @test size(g.nf) == size(X)
+
+            g = Zygote.gradient(model -> sum(node_feature(model(fg))), cc)[1]
+            @test size(g.weight) == size(cc.weight)
+            @test size(g.bias) == size(cc.bias)
+        end
+
+        @testset "bias=false" begin
+            @test length(Flux.params(ChebConv(2=>3, 3))) == 2
+            @test length(Flux.params(ChebConv(2=>3, 3, bias=false))) == 1
+        end
+    end
+
+    @testset "GraphConv" begin
+        X = rand(T, in_channel, N)
+        Xt = transpose(rand(T, N, in_channel))
+        @testset "layer with graph" begin
+            gc = GraphConv(fg, in_channel=>out_channel)
+            @test adjacency_list(gc.fg) == [[2,4], [1,3], [2,4], [1,3]]
+            @test size(gc.weight1) == (out_channel, in_channel)
+            @test size(gc.weight2) == (out_channel, in_channel)
+            @test size(gc.bias) == (out_channel,)
+
+            Y = gc(X)
+            @test size(Y) == (out_channel, N)
+
+            # Test with transposed features
+            Y = gc(Xt)
+            @test size(Y) == (out_channel, N)
+
+            g = Zygote.gradient(x -> sum(gc(x)), X)[1]
+            @test size(g) == size(X)
+
+            g = Zygote.gradient(model -> sum(model(X)), gc)[1]
+            @test size(g.weight1) == size(gc.weight1)
+            @test size(g.weight2) == size(gc.weight2)
+            @test size(g.bias) == size(gc.bias)
+        end
+
+        @testset "layer without graph" begin
+            gc = GraphConv(in_channel=>out_channel)
+            @test size(gc.weight1) == (out_channel, in_channel)
+            @test size(gc.weight2) == (out_channel, in_channel)
+            @test size(gc.bias) == (out_channel,)
+
+            fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
+            fg_ = gc(fg)
+            @test size(node_feature(fg_)) == (out_channel, N)
+            @test_throws MethodError gc(X)
+
+            # Test with transposed features
+            fgt = FeaturedGraph(adj, nf=Xt, graph_type=GRAPH_T)
+            fgt_ = gc(fgt)
+            @test size(node_feature(fgt_)) == (out_channel, N)
+
+            g = Zygote.gradient(x -> sum(node_feature(gc(x))), fg)[1]
+            @test size(g.nf) == size(X)
+
+            g = Zygote.gradient(model -> sum(node_feature(model(fg))), gc)[1]
+            @test size(g.weight1) == size(gc.weight1)
+            @test size(g.weight2) == size(gc.weight2)
+            @test size(g.bias) == size(gc.bias)
+        end
+
+
+        @testset "bias=false" begin
+            @test length(Flux.params(GraphConv(2=>3))) == 3
+            @test length(Flux.params(GraphConv(2=>3, bias=false))) == 2
+        end
+    end
 
     @testset "GATConv" begin
 
@@ -193,7 +193,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
 
         @testset "layer with graph" begin
             for heads = [1, 2], concat = [true, false], adj_gat in [adj, adj_single_vertex]
-                fg_gat = FeaturedGraph(adj_gat)
+                fg_gat = FeaturedGraph(adj_gat, graph_type=GRAPH_T)
                 gat = GATConv(fg_gat, in_channel=>out_channel, heads=heads, concat=concat)
 
                 if adj_gat == adj
@@ -225,7 +225,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
 
         @testset "layer without graph" begin
             for heads = [1, 2], concat = [true, false], adj_gat in [adj, adj_single_vertex]
-                fg_gat = FeaturedGraph(adj_gat, nf=X)
+                fg_gat = FeaturedGraph(adj_gat, nf=X, graph_type=GRAPH_T)
                 gat = GATConv(in_channel=>out_channel, heads=heads, concat=concat)
                 @test size(gat.weight) == (out_channel * heads, in_channel)
                 @test size(gat.bias) == (out_channel * heads,)
@@ -236,7 +236,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
                 @test_throws MethodError gat(X)
 
                 # Test with transposed features
-                fgt = FeaturedGraph(adj_gat, nf=Xt)
+                fgt = FeaturedGraph(adj_gat, nf=Xt, graph_type=GRAPH_T)
                 fgt_ = gat(fgt)
                 @test size(node_feature(fgt_)) == (concat ? (out_channel*heads, N) : (out_channel, N))
 
@@ -284,13 +284,13 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
             ggc = GatedGraphConv(out_channel, num_layers)
             @test size(ggc.weight) == (out_channel, out_channel, num_layers)
 
-            fg = FeaturedGraph(adj, nf=X)
+            fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
             fg_ = ggc(fg)
             @test size(node_feature(fg_)) == (out_channel, N)
             @test_throws MethodError ggc(X)
 
             # Test with transposed features
-            fgt = FeaturedGraph(adj, nf=Xt)
+            fgt = FeaturedGraph(adj, nf=Xt, graph_type=GRAPH_T)
             fgt_ = ggc(fgt)
             @test size(node_feature(fgt_)) == (out_channel, N)
 
@@ -327,13 +327,13 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex)
         @testset "layer without graph" begin
             ec = EdgeConv(Dense(2*in_channel, out_channel))
 
-            fg = FeaturedGraph(adj, nf=X)
+            fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
             fg_ = ec(fg)
             @test size(node_feature(fg_)) == (out_channel, N)
             @test_throws MethodError ec(X)
 
             # Test with transposed features
-            fgt = FeaturedGraph(adj, nf=Xt)
+            fgt = FeaturedGraph(adj, nf=Xt, graph_type=GRAPH_T)
             fgt_ = ec(fgt)
             @test size(node_feature(fgt_)) == (out_channel, N)
 
diff --git a/test/layers/gn.jl b/test/layers/gn.jl
index 89c80583d..fdd3e4288 100644
--- a/test/layers/gn.jl
+++ b/test/layers/gn.jl
@@ -1,30 +1,29 @@
-in_channel = 10
-out_channel = 5
-num_V = 6
-num_E = 7
-T = Float32
-
-adj = T[0. 1. 0. 0. 0. 0.;
-       1. 0. 0. 1. 1. 1.;
-       0. 0. 0. 0. 0. 1.;
-       0. 1. 0. 0. 1. 0.;
-       0. 1. 0. 1. 0. 1.;
-       0. 1. 1. 0. 1. 0.]
-
-struct NewGNLayer <: GraphNet
-end
-
-V = rand(T, in_channel, num_V)
-E = rand(T, in_channel, 2num_E)
-u = rand(T, in_channel)
-
 @testset "gn" begin
-    l = NewGNLayer()
+    in_channel = 10
+    out_channel = 5
+    num_V = 6
+    num_E = 7
+    T = Float32
+
+    adj =  [0 1 0 0 0 0
+            1 0 0 1 1 1
+            0 0 0 0 0 1
+            0 1 0 0 1 0
+            0 1 0 1 0 1
+            0 1 1 0 1 0]
+
+    struct NewGNLayer{G} <: GraphNet end
+    NewGNLayer() = NewGNLayer{GRAPH_T}()
+
+    V = rand(T, in_channel, num_V)
+    E = rand(T, in_channel, 2num_E)
+    u = rand(T, in_channel)
 
     @testset "without aggregation" begin
-        (l::NewGNLayer)(fg) = GeometricFlux.propagate(l, fg)
+        (l::NewGNLayer{GRAPH_T})(fg) = GeometricFlux.propagate(l, fg)
 
-        fg = FeaturedGraph(adj, nf=V)
+        fg = FeaturedGraph(adj, nf=V, graph_type=GRAPH_T)
+        l = NewGNLayer()
         fg_ = l(fg)
 
         @test adjacency_matrix(fg_) == adj
@@ -34,9 +33,9 @@ u = rand(T, in_channel)
     end
 
     @testset "with neighbor aggregation" begin
-        (l::NewGNLayer)(fg) = GeometricFlux.propagate(l, fg, +)
+        (l::NewGNLayer{GRAPH_T})(fg) = GeometricFlux.propagate(l, fg, +)
 
-        fg = FeaturedGraph(adj, nf=V, ef=E, gf=nothing)
+        fg = FeaturedGraph(adj, nf=V, ef=E, gf=nothing, graph_type=GRAPH_T)
         l = NewGNLayer()
         fg_ = l(fg)
 
@@ -46,11 +45,12 @@ u = rand(T, in_channel)
         @test global_feature(fg_) === nothing
     end
 
-    GeometricFlux.update_edge(l::NewGNLayer, e, vi, vj, u) = rand(T, out_channel)
     @testset "update edge with neighbor aggregation" begin
-        (l::NewGNLayer)(fg) = GeometricFlux.propagate(l, fg, +)
+        (l::NewGNLayer{GRAPH_T})(fg) = GeometricFlux.propagate(l, fg, +)
+        GeometricFlux.update_edge(l::NewGNLayer{GRAPH_T}, e, vi, vj, u) = rand(T, out_channel)
+    
 
-        fg = FeaturedGraph(adj, nf=V, ef=E, gf=nothing)
+        fg = FeaturedGraph(adj, nf=V, ef=E, gf=nothing, graph_type=GRAPH_T)
         l = NewGNLayer()
         fg_ = l(fg)
 
@@ -60,11 +60,11 @@ u = rand(T, in_channel)
         @test global_feature(fg_) === nothing
     end
 
-    GeometricFlux.update_vertex(l::NewGNLayer, ē, vi, u) = rand(T, out_channel)
     @testset "update edge/vertex with all aggregation" begin
-        (l::NewGNLayer)(fg) = GeometricFlux.propagate(l, fg, +, +, +)
+        (l::NewGNLayer{GRAPH_T})(fg) = GeometricFlux.propagate(l, fg, +, +, +)
+        GeometricFlux.update_vertex(l::NewGNLayer{GRAPH_T}, ē, vi, u) = rand(T, out_channel)
 
-        fg = FeaturedGraph(adj, nf=V, ef=E, gf=u)
+        fg = FeaturedGraph(adj, nf=V, ef=E, gf=u, graph_type=GRAPH_T)
         l = NewGNLayer()
         fg_ = l(fg)
 
diff --git a/test/layers/misc.jl b/test/layers/misc.jl
index 9f89500cd..9ddcd0e01 100644
--- a/test/layers/misc.jl
+++ b/test/layers/misc.jl
@@ -11,7 +11,7 @@
         ef = rand(5, E)
         gf = rand(7)
 
-        fg = FeaturedGraph(adj, nf=nf, ef=ef, gf=gf)
+        fg = FeaturedGraph(adj, nf=nf, ef=ef, gf=gf, graph_type=GRAPH_T)
         layer = bypass_graph(x -> x .+ 1.,
                                 x -> x .+ 2.,
                                 x -> x .+ 3.)
diff --git a/test/layers/msgpass.jl b/test/layers/msgpass.jl
index 04e1096de..1c4feb642 100644
--- a/test/layers/msgpass.jl
+++ b/test/layers/msgpass.jl
@@ -1,30 +1,29 @@
-in_channel = 10
-out_channel = 5
-num_V = 6
-num_E = 7
-T = Float32
-
-adj = T[0. 1. 0. 0. 0. 0.;
-       1. 0. 0. 1. 1. 1.;
-       0. 0. 0. 0. 0. 1.;
-       0. 1. 0. 0. 1. 0.;
-       0. 1. 0. 1. 0. 1.;
-       0. 1. 1. 0. 1. 0.]
-
-struct NewLayer <: MessagePassing
-    weight
-end
-NewLayer(m, n) = NewLayer(randn(T, m,n))
-
-(l::NewLayer)(fg) = GeometricFlux.propagate(l, fg, +)
-
-X = Array{T}(reshape(1:num_V*in_channel, in_channel, num_V))
-fg = FeaturedGraph(adj, nf=X, ef=Fill(zero(T), 0, 2num_E))
-
-l = NewLayer(out_channel, in_channel)
-
 @testset "msgpass" begin
+    in_channel = 10
+    out_channel = 5
+    num_V = 6
+    num_E = 7
+    T = Float32
+
+    adj =  [0 1 0 0 0 0
+            1 0 0 1 1 1
+            0 0 0 0 0 1
+            0 1 0 0 1 0
+            0 1 0 1 0 1
+            0 1 1 0 1 0]
+
+    struct NewLayer{G} <: MessagePassing
+        weight
+    end
+    NewLayer(m, n) = NewLayer{GRAPH_T}(randn(T, m,n))
+
+    X = Array{T}(reshape(1:num_V*in_channel, in_channel, num_V))
+    
     @testset "no message or update" begin
+        (l::NewLayer{GRAPH_T})(fg) = GeometricFlux.propagate(l, fg, +)
+
+        fg = FeaturedGraph(adj, nf=X, ef=Fill(zero(T), 0, 2num_E), graph_type=GRAPH_T)
+        l = NewLayer(out_channel, in_channel)
         fg_ = l(fg)
 
         @test adjacency_matrix(fg_) == adj
@@ -33,8 +32,13 @@ l = NewLayer(out_channel, in_channel)
         @test size(global_feature(fg_)) == (0,)
     end
 
-    GeometricFlux.message(l::NewLayer, x_i, x_j, e_ij) = l.weight * x_j
+    
     @testset "message function" begin
+        (l::NewLayer{GRAPH_T})(fg) = GeometricFlux.propagate(l, fg, +)
+        GeometricFlux.message(l::NewLayer{GRAPH_T}, x_i, x_j, e_ij) = l.weight * x_j
+    
+        fg = FeaturedGraph(adj, nf=X, ef=Fill(zero(T), 0, 2num_E), graph_type=GRAPH_T)
+        l = NewLayer(out_channel, in_channel)
         fg_ = l(fg)
 
         @test adjacency_matrix(fg_) == adj
@@ -43,8 +47,12 @@ l = NewLayer(out_channel, in_channel)
         @test size(global_feature(fg_)) == (0,)
     end
 
-    GeometricFlux.update(l::NewLayer, m, x) = l.weight * x + m
     @testset "message and update" begin
+        (l::NewLayer{GRAPH_T})(fg) = GeometricFlux.propagate(l, fg, +)
+        GeometricFlux.update(l::NewLayer{GRAPH_T}, m, x) = l.weight * x + m
+
+        fg = FeaturedGraph(adj, nf=X, ef=Fill(zero(T), 0, 2num_E), graph_type=GRAPH_T)
+        l = NewLayer(out_channel, in_channel)
         fg_ = l(fg)
 
         @test adjacency_matrix(fg_) == adj
diff --git a/test/models.jl b/test/models.jl
index 0e4e62c48..d04b3cb70 100644
--- a/test/models.jl
+++ b/test/models.jl
@@ -9,7 +9,7 @@ adj = [0. 1. 0. 1.;
        0. 1. 0. 1.;
        1. 0. 1. 0.]
 
-fg = FeaturedGraph(adj)
+fg = FeaturedGraph(adj; graph_type=GRAPH_T)
 
 @testset "models" begin
     @testset "GAE" begin
@@ -28,13 +28,13 @@ fg = FeaturedGraph(adj)
            @test size(Y) == (N, N)
 
            X = rand(T, 1, N)
-           fg = FeaturedGraph(adj, nf=X)
+           fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
            fg_ = ipd(fg)
            Y = node_feature(fg_)
            @test size(Y) == (N, N)
 
            X = rand(T, in_channel, N)
-           fg = FeaturedGraph(adj, nf=X)
+           fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
            fg_ = ipd(fg)
            Y = node_feature(fg_)
            @test size(Y) == (N, N)
@@ -45,7 +45,7 @@ fg = FeaturedGraph(adj)
             gc = GCNConv(in_channel=>out_channel)
             ve = VariationalEncoder(gc, out_channel, z_dim)
             X = rand(T, in_channel, N)
-            fg = FeaturedGraph(adj, nf=X)
+            fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
             fg_ = ve(fg)
             Z = node_feature(fg_)
             @test size(Z) == (z_dim, N)
@@ -56,7 +56,7 @@ fg = FeaturedGraph(adj)
             gc = GCNConv(in_channel=>out_channel)
             vgae = VGAE(gc, out_channel, z_dim)
             X = rand(T, in_channel, N)
-            fg = FeaturedGraph(adj, nf=X)
+            fg = FeaturedGraph(adj, nf=X, graph_type=GRAPH_T)
             fg_ = vgae(fg)
             Y = node_feature(fg_)
             @test size(Y) == (N, N)
diff --git a/test/runtests.jl b/test/runtests.jl
index d2a5c412c..ab134024d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -11,15 +11,15 @@ using Zygote
 using Test
 
 cuda_tests = [
-    # "cuda/featured_graph",
+    "cuda/featured_graph",
     # "cuda/conv",
     # "cuda/msgpass",
 ]
 
 tests = [
-    # "featured_graph",
-    # "layers/gn",
-    # "layers/msgpass",
+    "featured_graph",
+    "layers/gn",
+    "layers/msgpass",
     "layers/conv",
     "layers/pool",
     "layers/misc",
@@ -35,7 +35,8 @@ else
     @warn "CUDA unavailable, not testing GPU support"
 end
 
-@testset "GeometricFlux" begin
+@testset "GeometricFlux: graph format $graph_type" for graph_type in (:coo, :adjmat)
+    global GRAPH_T = graph_type
     for t in tests
         include("$(t).jl")
     end

From 07794529272ee08a2e012d4a46b0b9c037dea746 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Mon, 2 Aug 2021 08:29:13 +0200
Subject: [PATCH 05/15] add docstrings

---
 src/featuredgraph.jl | 105 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 99 insertions(+), 6 deletions(-)

diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
index e21235fd7..0ee5e8458 100644
--- a/src/featuredgraph.jl
+++ b/src/featuredgraph.jl
@@ -19,7 +19,71 @@ const ADJMAT_T = AbstractMatrix
 const ADJLIST_T = AbstractVector{T} where T <: AbstractVector
 # const SPARSE_T = ...  Support sparse adjacency matrices in the future
 
+"""
+    FeaturedGraph(g; [graph_type, dir, num_nodes, nf, ef, gf])
+    FeaturedGraph(fg::FeaturedGraph; [nf, ef, gf])
+
+A type representing a graph structure and storing also arrays 
+that contain features associated to nodes, edges, and the whole graph. 
+    
+A `FeaturedGraph` can be constructed out of different objects `g` representing
+the connections inside the graph, while the internal representation 
+is governed by `graph_type`. 
+When constructed from another featured graph `fg`, the internal representationis 
+is preserved and shared. 
+
+A FeaturedGraph is a LightGraphs' `AbstractGraph`, therefore any 
+
+# Arguments 
+
+- `g`: Some data representing the graph topology. Possible type are 
+    - An adjacency matrix
+    - An adjacency list.
+    - A tuple containing the source and target vectors (COO representation)
+    - A LightGraphs' graph.
+- `graph_type`: A keyword argument that specifies 
+                the underlying representation used by the FeaturedGraph. 
+                Currently supported values are 
+    - `:coo`
+    - `:adjmat`  
+    Default `:coo`.
+- `dir`. The assumed edge direction when given adjacency matrix or adjacency list input data `g`. 
+        Possible values are `:out` and `:in`. Defaul `:out`.
+- `num_nodes`. The number of nodes. If not specified, inferred from `g`. Default nothing.
+- `nf`: Node features. Either nothing, or an array whose last dimension has size num_nodes. Default nothing.
+- `ef`: Edge features. Either nothing, or an array whose last dimension has size num_edges. Default nothing.
+- `gf`: Global features. Default nothing. 
+
+# Usage. 
+
+```
+using Flux, GeometricFlux
+
+# Construct from adjacency list representation
+g = [[2,3], [1,4,5], [1], [2,5], [2,4]]
+fg = FeaturedGraph(g)
+
+# Same graph in COO representation
+s = [1,1,2,2,2,3,4,4,5,5]
+t = [2,3,1,4,5,3,2,5,2,4]
+fg = FeaturedGraph((s, t))
+fg = FeaturedGraph(s, t) # other convenience constructor
+
+# From a LightGraphs' graph
+fg = FeaturedGraph(erdos_renyi(100, 20))
 
+# Copy featured graph while also adding node features
+fg = FeaturedGraph(fg, nf=rand(100, 5))
+
+# Send to gpu
+fg = fg |> gpu
+
+# Collect edges' source and target nodes.
+source, target = edge_index(fg)
+```
+
+See also [`graph`](@ref), [`edge_index`](@ref), [`node_feature`](@ref), [`edge_feature`](@ref), and [`global_feature`](@ref) 
+"""
 struct FeaturedGraph{T<:Union{COO_T,ADJMAT_T}} <: AbstractFeaturedGraph
     graph::T
     num_nodes::Int
@@ -35,7 +99,7 @@ end
 
 @functor FeaturedGraph
 
-function FeaturedGraph(g; 
+function FeaturedGraph(data; 
                         num_nodes = nothing, 
                         graph_type = :adjmat,
                         dir = :out,
@@ -50,9 +114,9 @@ function FeaturedGraph(g;
     @assert graph_type ∈ [:coo, :adjmat] "Invalid graph_type $graph_type requested"
     @assert dir ∈ [:in, :out]
     if graph_type == :coo
-        g, num_nodes, num_edges = to_coo(g; num_nodes, dir)
+        g, num_nodes, num_edges = to_coo(data; num_nodes, dir)
     else graph_type == :adjmat
-        g, num_nodes, num_edges = to_adjmat(g; dir)
+        g, num_nodes, num_edges = to_adjmat(data; dir)
     end
 
     ## Possible future implementation of feature maps. 
@@ -80,8 +144,8 @@ end
 """
     edge_index(fg::FeaturedGraph)
 
-Return a tuple containing two vectors, respectively containing the source and target 
-nodes of the edges in the graph `fg`.
+Return a tuple containing two vectors, respectively storing 
+the source and target nodes for each edges in `fg`.
 
 ```julia
 s, t = edge_index(fg)
@@ -94,6 +158,11 @@ function edge_index(fg::FeaturedGraph{<:ADJMAT_T})
     ntuple(i -> map(t->t[i], nz), 2)
 end
 
+"""
+    graph(fg::FeaturedGraph)
+
+Re
+"""
 graph(fg::FeaturedGraph) = fg.graph
 
 LightGraphs.edges(fg::FeaturedGraph) = zip(edge_index(fg)...)
@@ -177,9 +246,27 @@ end
 # edge_feature(fg::FeaturedGraph) = fg.edata["e"]
 # global_feature(fg::FeaturedGraph) = fg.gdata["g"]
 
+
+"""
+    node_feature(fg::FeaturedGraph)
+
+Return the node features of `fg`.
+"""
 node_feature(fg::FeaturedGraph) = fg.nf
+
+"""
+    edge_feature(fg::FeaturedGraph)
+
+Return the edge features of `fg`.
+"""
 edge_feature(fg::FeaturedGraph) = fg.ef
-global_feature(fg::FeaturedGraph) = fg.gf
+
+"""
+    global_feature(fg::FeaturedGraph)
+
+Return the global features of `fg`.
+"""
+global_feature(fg::NullGraph) = fg.gf
 
 # function Base.getproperty(fg::FeaturedGraph, sym::Symbol)
 #     if sym === :nf
@@ -241,6 +328,12 @@ function scaled_laplacian(fg::FeaturedGraph, T::DataType=Float32; dir=:out)
     return  2 / maximum(E) * Lnorm - I
 end
 
+"""
+    add_self_loops(fg::FeaturedGraph)
+
+Return a featured graph with the same features as `fg`
+but also adding edges connecting the nodes to themselves.
+"""
 function add_self_loops(fg::FeaturedGraph{<:COO_T})
     s, t = edge_index(fg)
     @assert edge_feature(fg) === nothing

From 092c7671aeacf2e46addb97da255228d0d9b65a4 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Mon, 2 Aug 2021 08:46:14 +0200
Subject: [PATCH 06/15] add docs

---
 docs/make.jl                     |  6 ++++--
 docs/src/manual/featuredgraph.md | 20 ++++++++++++++++++++
 docs/src/manual/linalg.md        | 22 ----------------------
 3 files changed, 24 insertions(+), 24 deletions(-)
 create mode 100644 docs/src/manual/featuredgraph.md

diff --git a/docs/make.jl b/docs/make.jl
index c94577157..ad84cd068 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -19,10 +19,12 @@ makedocs(
                ["Message passing scheme" => "abstractions/msgpass.md",
                 "Graph network block" => "abstractions/gn.md"],
              "Manual" =>
-               ["Convolutional Layers" => "manual/conv.md",
+               [
+                "Graphs" => "manual/featuredgraph.md",
+                "Convolutional Layers" => "manual/conv.md",
                 "Pooling Layers" => "manual/pool.md",
                 "Models" => "manual/models.md",
-                "Linear Algebra" => "manual/linalg.md"]
+               ]
     ]
 )
 
diff --git a/docs/src/manual/featuredgraph.md b/docs/src/manual/featuredgraph.md
new file mode 100644
index 000000000..15e328a24
--- /dev/null
+++ b/docs/src/manual/featuredgraph.md
@@ -0,0 +1,20 @@
+# Graphs
+
+GeometricFlux relies on the [`FeaturedGraph`](@ref)
+type to represent graph structures and feature arrays associated to
+nodes and edges.
+
+
+```@docs
+FeaturedGraph
+edge_index
+graph
+adjacency_list
+adjacency_matrix
+add_self_loops
+remove_self_loops
+degree
+laplacian_matrix
+normalized_laplacian
+scaled_laplacian
+```
diff --git a/docs/src/manual/linalg.md b/docs/src/manual/linalg.md
index 00a8604ee..e69de29bb 100644
--- a/docs/src/manual/linalg.md
+++ b/docs/src/manual/linalg.md
@@ -1,22 +0,0 @@
-# Linear Algebra
-
-
-```@docs
-GraphSignals.degrees
-```
-
-```@docs
-GraphSignals.degree_matrix
-```
-
-```@docs
-GraphSignals.inv_sqrt_degree_matrix
-```
-
-```@docs
-GraphSignals.laplacian_matrix
-```
-
-```@docs
-GraphSignals.normalized_laplacian
-```

From aec3a0bdc24e2197824b5294847f19245c110315 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Mon, 2 Aug 2021 08:47:21 +0200
Subject: [PATCH 07/15] add docs

---
 docs/src/manual/featuredgraph.md | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/docs/src/manual/featuredgraph.md b/docs/src/manual/featuredgraph.md
index 15e328a24..7d39be937 100644
--- a/docs/src/manual/featuredgraph.md
+++ b/docs/src/manual/featuredgraph.md
@@ -6,15 +6,15 @@ nodes and edges.
 
 
 ```@docs
-FeaturedGraph
-edge_index
-graph
-adjacency_list
-adjacency_matrix
-add_self_loops
-remove_self_loops
-degree
-laplacian_matrix
-normalized_laplacian
-scaled_laplacian
+GeometrixFlux.FeaturedGraph
+GeometrixFlux.edge_index
+GeometrixFlux.graph
+GeometrixFlux.adjacency_list
+GeometrixFlux.adjacency_matrix
+GeometrixFlux.add_self_loops
+GeometrixFlux.remove_self_loops
+GeometrixFlux.degree
+GeometrixFlux.laplacian_matrix
+GeometrixFlux.normalized_laplacian
+GeometrixFlux.scaled_laplacian
 ```

From 2ce6f45be2cec15ba3f458881e16af2ef48d0547 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Tue, 3 Aug 2021 09:52:51 +0200
Subject: [PATCH 08/15] add dense/sparse support

---
 Project.toml                |  4 ++
 src/GeometricFlux.jl        |  1 +
 src/featuredgraph.jl        | 34 ++++++++---------
 src/graph_conversions.jl    | 73 ++++++++++++++++++++++++-------------
 test/cuda/featured_graph.jl |  4 +-
 test/runtests.jl            |  3 +-
 6 files changed, 74 insertions(+), 45 deletions(-)

diff --git a/Project.toml b/Project.toml
index aa0f9ff27..8fed0df26 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,18 +4,22 @@ authors = ["Yueh-Hua Tu <a504082002@gmail.com>"]
 version = "0.7.6"
 
 [deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GraphMLDatasets = "21828b05-d3b3-40ad-870e-a4bc2f52d5e8"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
index 1db7c6b0c..12b37586e 100644
--- a/src/GeometricFlux.jl
+++ b/src/GeometricFlux.jl
@@ -3,6 +3,7 @@ module GeometricFlux
 using LinearAlgebra: similar
 using Statistics: mean
 using LinearAlgebra
+using SparseArrays
 using CUDA
 using FillArrays: Fill
 using Flux
diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
index 0ee5e8458..6aef6fdb0 100644
--- a/src/featuredgraph.jl
+++ b/src/featuredgraph.jl
@@ -15,9 +15,9 @@ Null object for `FeaturedGraph`.
 struct NullGraph <: AbstractFeaturedGraph end
 
 const COO_T = Tuple{T, T} where T <: AbstractVector
-const ADJMAT_T = AbstractMatrix
 const ADJLIST_T = AbstractVector{T} where T <: AbstractVector
-# const SPARSE_T = ...  Support sparse adjacency matrices in the future
+const ADJMAT_T = AbstractMatrix
+const SPARSE_T = AbstractSparseMatrix # subset of ADJMAT_T
 
 """
     FeaturedGraph(g; [graph_type, dir, num_nodes, nf, ef, gf])
@@ -45,7 +45,8 @@ A FeaturedGraph is a LightGraphs' `AbstractGraph`, therefore any
                 the underlying representation used by the FeaturedGraph. 
                 Currently supported values are 
     - `:coo`
-    - `:adjmat`  
+    - `:sparse`
+    - `:dense`  
     Default `:coo`.
 - `dir`. The assumed edge direction when given adjacency matrix or adjacency list input data `g`. 
         Possible values are `:out` and `:in`. Defaul `:out`.
@@ -101,7 +102,7 @@ end
 
 function FeaturedGraph(data; 
                         num_nodes = nothing, 
-                        graph_type = :adjmat,
+                        graph_type = :coo,
                         dir = :out,
                         nf = nothing, 
                         ef = nothing, 
@@ -111,12 +112,14 @@ function FeaturedGraph(data;
                         # gdata = Dict{String, Any}()
                         )
 
-    @assert graph_type ∈ [:coo, :adjmat] "Invalid graph_type $graph_type requested"
+    @assert graph_type ∈ [:coo, :dense, :sparse] "Invalid graph_type $graph_type requested"
     @assert dir ∈ [:in, :out]
     if graph_type == :coo
         g, num_nodes, num_edges = to_coo(data; num_nodes, dir)
-    else graph_type == :adjmat
-        g, num_nodes, num_edges = to_adjmat(data; dir)
+    elseif graph_type == :dense
+        g, num_nodes, num_edges = to_dense(data; dir)
+    elseif graph_type == :sparse
+        g, num_nodes, num_edges = to_sparse(data; dir)
     end
 
     ## Possible future implementation of feature maps. 
@@ -151,12 +154,9 @@ the source and target nodes for each edges in `fg`.
 s, t = edge_index(fg)
 ```
 """
-edge_index(fg::FeaturedGraph{<:COO_T}) = fg.graph
+edge_index(fg::FeaturedGraph{<:COO_T}) = graph(fg)
 
-function edge_index(fg::FeaturedGraph{<:ADJMAT_T})
-    nz = findall(!=(0), graph(fg)) # vec of cartesian indexes
-    ntuple(i -> map(t->t[i], nz), 2)
-end
+edge_index(fg::FeaturedGraph{<:ADJMAT_T}) = to_coo(graph(fg))[1]
 
 """
     graph(fg::FeaturedGraph)
@@ -210,15 +210,15 @@ function adjacency_list(fg::FeaturedGraph; dir=:out)
     return [fneighs(fg, i) for i in 1:fg.num_nodes]
 end
 
-# TODO return sparse matrix
+# TODO return sparse matrix (when support is good enough)
 function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:COO_T}, T::DataType=Int; dir=:out)
-    A, n, m = to_adjmat(fg.graph, T, num_nodes=fg.num_nodes)
+    A, n, m = to_dense(graph(fg), T, num_nodes=fg.num_nodes)
     return dir == :out ? A : A'
 end
 
-function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:ADJMAT_T}, T::DataType=eltype(fg.graph); dir=:out)
+function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:ADJMAT_T}, T::DataType=eltype(graph(fg)); dir=:out)
     @assert dir ∈ [:in, :out]
-    A = fg.graph 
+    A = graph(fg) 
     A = T != eltype(A) ? T.(A) : A
     return dir == :out ? A : A'
 end
@@ -266,7 +266,7 @@ edge_feature(fg::FeaturedGraph) = fg.ef
 
 Return the global features of `fg`.
 """
-global_feature(fg::NullGraph) = fg.gf
+global_feature(fg::FeaturedGraph) = fg.gf
 
 # function Base.getproperty(fg::FeaturedGraph, sym::Symbol)
 #     if sym === :nf
diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
index 2d3d14183..15ab1c8f6 100644
--- a/src/graph_conversions.jl
+++ b/src/graph_conversions.jl
@@ -1,38 +1,24 @@
 ### CONVERT_TO_COO REPRESENTATION ########
 
-function to_coo(graph::COO_T; num_nodes=nothing, dir=:out)
-    s, t = graph   
+function to_coo(eindex::COO_T; dir=:out, num_nodes=nothing)
+    s, t = eindex   
     num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
     @assert length(s) == length(t)
     @assert min(minimum(s), minimum(t)) >= 1 
     @assert max(maximum(s), maximum(t)) <= num_nodes 
 
     num_edges = length(s)
-    return graph, num_nodes, num_edges
+    return eindex, num_nodes, num_edges
 end
 
 function to_coo(adj_mat::ADJMAT_T; dir=:out, num_nodes=nothing)
-    @assert dir ∈ [:out, :in]
-    num_nodes = size(adj_mat, 1)
-    @assert num_nodes == size(adj_mat, 2)
-    @assert all(x -> (x == 1) || (x == 0), adj_mat)
-    num_edges = round(Int, sum(adj_mat))
-    s = zeros(Int, num_edges)
-    t = zeros(Int, num_edges)
-    e = 0
-    for j in 1:num_nodes
-        for i in 1:num_nodes
-            if adj_mat[i, j] == 1
-                e += 1
-                s[e] = i
-                t[e] = j
-            end
-        end
-    end
-    @assert e == num_edges
+    nz = findall(!=(0), adj_mat) # vec of cartesian indexes
+    s, t = ntuple(i -> map(t->t[i], nz), 2)
     if dir == :in
         s, t = t, s
     end
+    num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
+    num_edges = length(s)
     return (s, t), num_nodes, num_edges
 end
 
@@ -60,7 +46,11 @@ end
 
 ### CONVERT TO ADJACENCY MATRIX ################
 
-function to_adjmat(adj_mat::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num_nodes=nothing)
+### DENSE ####################
+
+to_dense(adj_mat::AbstractSparseMatrix, x...; kws...) = to_dense(collect(adj_mat), x...; kws...)
+
+function to_dense(adj_mat::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
     num_nodes = size(adj_mat, 1)
     @assert num_nodes == size(adj_mat, 2)
@@ -75,7 +65,7 @@ function to_adjmat(adj_mat::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num
     return adj_mat, num_nodes, num_edges
 end
 
-function to_adjmat(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=nothing)
+function to_dense(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
     num_nodes = length(adj_list)
     num_edges = sum(length.(adj_list))
@@ -93,7 +83,7 @@ function to_adjmat(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=not
     A, num_nodes, num_edges
 end
 
-function to_adjmat(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
+function to_dense(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
     # `dir` will be ignored since the input `eindex` is alwasys in source target format.
     # The output will always be a adjmat in :out format (e.g. A[i,j] denotes from i to j)
     s, t = eindex
@@ -103,8 +93,41 @@ function to_adjmat(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
     return adj_mat, n, length(s)
 end
 
+### SPARSE #############
+
+function to_sparse(adj_mat::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num_nodes=nothing)
+    @assert dir ∈ [:out, :in]
+    num_nodes = size(adj_mat, 1)
+    @assert num_nodes == size(adj_mat, 2)
+    # @assert all(x -> (x == 1) || (x == 0), adj_mat)
+    num_edges = round(Int, sum(adj_mat))
+    if dir == :in
+        adj_mat = adj_mat'
+    end
+    if T != eltype(adj_mat)
+        adj_mat = T.(adj_mat)
+    end
+    return sparse(adj_mat), num_nodes, num_edges
+end
+
+function to_sparse(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=nothing)
+    eindex, num_nodes, num_edges = to_coo(adj_list; dir, num_nodes)
+    to_sparse(eindex; dir, num_nodes)
+end
+
+function to_sparse(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
+    s, t = eindex    
+    val = fill!(similar(s, T), 1)
+    A = sparse(s, t, val)
+    num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
+    num_edges = length(s)
+    A, num_nodes, num_edges
+end
+
 @non_differentiable to_coo(x...)
-@non_differentiable to_adjmat(x...)
+@non_differentiable to_dense(x...)
+@non_differentiable to_sparse(x...)
+
 
 ## TODO
 # to_sparse
diff --git a/test/cuda/featured_graph.jl b/test/cuda/featured_graph.jl
index 8e2a3fb7e..6e252c3af 100644
--- a/test/cuda/featured_graph.jl
+++ b/test/cuda/featured_graph.jl
@@ -1,10 +1,10 @@
-@testset "featured graph" begin
+@testset "cuda/featured graph" begin
     s = [1,1,2,3,4,5,5,5]
     t = [2,5,3,2,1,4,3,1]
     s, t = [s; t], [t; s]  #symmetrize
     fg = FeaturedGraph(s, t, graph_type=GRAPH_T) 
     fg_gpu = fg |> gpu
-        
+
     @testset "functor" begin
         s_cpu, t_cpu = edge_index(fg)
         s_gpu, t_gpu = edge_index(fg_gpu)
diff --git a/test/runtests.jl b/test/runtests.jl
index ab134024d..fdad649c8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -35,7 +35,8 @@ else
     @warn "CUDA unavailable, not testing GPU support"
 end
 
-@testset "GeometricFlux: graph format $graph_type" for graph_type in (:coo, :adjmat)
+# Testing all graph types. :sparse is a bit broken at the moment
+@testset "GeometricFlux: graph format $graph_type" for graph_type in (:coo, :dense) # :sparse
     global GRAPH_T = graph_type
     for t in tests
         include("$(t).jl")

From 008be05f1d042285ecfa0eee61002f8135a5621d Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Tue, 3 Aug 2021 12:08:33 +0200
Subject: [PATCH 09/15] cpu sparse tests passing

---
 Project.toml         |  4 ++--
 src/GeometricFlux.jl |  4 +++-
 src/featuredgraph.jl | 29 ++++++++++++++++-------------
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/Project.toml b/Project.toml
index 8fed0df26..d2e01ae6e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,12 +7,11 @@ version = "0.7.6"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GraphMLDatasets = "21828b05-d3b3-40ad-870e-a4bc2f52d5e8"
-JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
+KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
@@ -29,6 +28,7 @@ DataStructures = "0.18"
 FillArrays = "0.11, 0.12"
 Flux = "0.12"
 GraphMLDatasets = "0.1"
+KrylovKit = "0.5"
 LightGraphs = "1.3"
 NNlib = "0.7"
 NNlibCUDA = "0.1"
diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
index 12b37586e..eee35992f 100644
--- a/src/GeometricFlux.jl
+++ b/src/GeometricFlux.jl
@@ -1,9 +1,11 @@
 module GeometricFlux
 
-using LinearAlgebra: similar
+using NNlib: similar
+using LinearAlgebra: similar, fill!
 using Statistics: mean
 using LinearAlgebra
 using SparseArrays
+import KrylovKit
 using CUDA
 using FillArrays: Fill
 using Flux
diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
index 6aef6fdb0..b9c90559c 100644
--- a/src/featuredgraph.jl
+++ b/src/featuredgraph.jl
@@ -210,9 +210,8 @@ function adjacency_list(fg::FeaturedGraph; dir=:out)
     return [fneighs(fg, i) for i in 1:fg.num_nodes]
 end
 
-# TODO return sparse matrix (when support is good enough)
 function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:COO_T}, T::DataType=Int; dir=:out)
-    A, n, m = to_dense(graph(fg), T, num_nodes=fg.num_nodes)
+    A, n, m = to_sparse(graph(fg), T, num_nodes=fg.num_nodes)
     return dir == :out ? A : A'
 end
 
@@ -294,7 +293,7 @@ Normalized Laplacian matrix of graph `g`.
 # Arguments
 
 - `fg`: A `FeaturedGraph`.
-- `T`: result element type of degree vector; default `Float32`.
+- `T`: result element type.
 - `selfloop`: adding self loop while calculating the matrix.
 - `dir`: the edge directionality considered (:out, :in, :both).
 """
@@ -307,27 +306,31 @@ function normalized_laplacian(fg::FeaturedGraph, T::DataType=Float32; selfloop::
 end
 
 @doc raw"""
-    scaled_laplacian(g[, T]; dir=:out)
+    scaled_laplacian(fg, T=Float32; dir=:out)
 
 Scaled Laplacian matrix of graph `g`,
 defined as ``\hat{L} = \frac{2}{\lambda_{max}} L - I`` where ``L`` is the normalized Laplacian matrix.
 
 # Arguments
 
-- `g`: should be a adjacency matrix, `FeaturedGraph`, `SimpleGraph`, `SimpleDiGraph` (from LightGraphs) or `SimpleWeightedGraph`, `SimpleWeightedDiGraph` (from SimpleWeightedGraphs).
-- `T`: result element type of degree vector; default is the element type of `g` (optional).
+- `fg`: A `FeaturedGraph`.
+- `T`: result element type.
 - `dir`: the edge directionality considered (:out, :in, :both).
 """
 function scaled_laplacian(fg::FeaturedGraph, T::DataType=Float32; dir=:out)
-    A = adjacency_matrix(fg, T; dir=dir)
-    @assert issymmetric(A) "scaled_laplacian only works with symmetric matrices"
-    E = eigen(Symmetric(A)).values
-    degs = vec(sum(A; dims=2))
-    inv_sqrtD = Diagonal(inv.(sqrt.(degs)))
-    Lnorm = I - inv_sqrtD * A * inv_sqrtD
-    return  2 / maximum(E) * Lnorm - I
+    L = normalized_laplacian(fg, T)
+    @assert issymmetric(L) "scaled_laplacian only works with symmetric matrices"
+    λmax = _eigmax(L)
+    return  2 / λmax * L - I
 end
 
+# _eigmax(A) = eigmax(Symmetric(A)) # Doesn't work on sparse arrays
+_eigmax(A) = KrylovKit.eigsolve(Symmetric(A), 1, :LR)[1][1] # also eigs(A, x0, nev, mode) available 
+
+# Eigenvalues for cuarray don't seem to be well supported. 
+# https://github.com/JuliaGPU/CUDA.jl/issues/154
+# https://discourse.julialang.org/t/cuda-eigenvalues-of-a-sparse-matrix/46851/5
+
 """
     add_self_loops(fg::FeaturedGraph)
 

From 9be2a2e4f625f359e93caf1bf215d0740cce402f Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Tue, 3 Aug 2021 13:07:53 +0200
Subject: [PATCH 10/15] cleanup

---
 Project.toml             |  3 ++-
 src/graph_conversions.jl |  5 -----
 test/runtests.jl         | 15 +++++++++++----
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/Project.toml b/Project.toml
index d2e01ae6e..ab6c2f69f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -4,13 +4,14 @@ authors = ["Yueh-Hua Tu <a504082002@gmail.com>"]
 version = "0.7.6"
 
 [deps]
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GraphMLDatasets = "21828b05-d3b3-40ad-870e-a4bc2f52d5e8"
+JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
index 15ab1c8f6..2672ebbaf 100644
--- a/src/graph_conversions.jl
+++ b/src/graph_conversions.jl
@@ -127,8 +127,3 @@ end
 @non_differentiable to_coo(x...)
 @non_differentiable to_dense(x...)
 @non_differentiable to_sparse(x...)
-
-
-## TODO
-# to_sparse
-# to_dense
diff --git a/test/runtests.jl b/test/runtests.jl
index fdad649c8..83885422e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,8 @@
 using GeometricFlux
 using GeometricFlux.Datasets
 using Flux
+using CUDA
+using Flux: gpu
 using Flux: @functor
 using FillArrays
 using LinearAlgebra
@@ -27,18 +29,23 @@ tests = [
 ]
 
 if Flux.use_cuda[]
-    using CUDA
-    using Flux: gpu
-    using NNlibCUDA
     append!(tests, cuda_tests)
 else
     @warn "CUDA unavailable, not testing GPU support"
 end
 
 # Testing all graph types. :sparse is a bit broken at the moment
-@testset "GeometricFlux: graph format $graph_type" for graph_type in (:coo, :dense) # :sparse
+@testset "GeometricFlux: graph format $graph_type" for graph_type in (:coo, :dense, :sparse)
     global GRAPH_T = graph_type
     for t in tests
         include("$(t).jl")
     end
+
+    if Flux.use_cuda[] && GRAPH_T != :sparse
+        for t in cuda_tests
+            include("$(t).jl")
+        end
+    else
+        @warn "CUDA unavailable, not testing GPU support"
+    end
 end

From 19c6cc759632fa13949fa5df4b77a3ce885c5dd4 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Tue, 3 Aug 2021 13:17:19 +0200
Subject: [PATCH 11/15] update docstring

remove Zygote
---
 Project.toml         |  7 ++-----
 src/GeometricFlux.jl |  1 -
 src/featuredgraph.jl | 12 +++++++++---
 src/utils.jl         | 10 +++++-----
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/Project.toml b/Project.toml
index ab6c2f69f..e5da43e8e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,12 +6,10 @@ version = "0.7.6"
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GraphMLDatasets = "21828b05-d3b3-40ad-870e-a4bc2f52d5e8"
-JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
 LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -21,7 +19,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [compat]
 CUDA = "3.3"
@@ -34,12 +31,12 @@ LightGraphs = "1.3"
 NNlib = "0.7"
 NNlibCUDA = "0.1"
 Reexport = "1.1"
-Zygote = "0.6"
 julia = "1.6"
 
 [extras]
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["SparseArrays", "Test"]
+test = ["SparseArrays", "Test", "Zygote"]
diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
index eee35992f..850543540 100644
--- a/src/GeometricFlux.jl
+++ b/src/GeometricFlux.jl
@@ -11,7 +11,6 @@ using FillArrays: Fill
 using Flux
 using Flux: glorot_uniform, leakyrelu, GRUCell, @functor
 using NNlib, NNlibCUDA
-using Zygote
 using ChainRulesCore
 import LightGraphs
 using LightGraphs: AbstractGraph, outneighbors, inneighbors, is_directed, ne, nv, 
diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
index b9c90559c..6a5772d9e 100644
--- a/src/featuredgraph.jl
+++ b/src/featuredgraph.jl
@@ -44,9 +44,10 @@ A FeaturedGraph is a LightGraphs' `AbstractGraph`, therefore any
 - `graph_type`: A keyword argument that specifies 
                 the underlying representation used by the FeaturedGraph. 
                 Currently supported values are 
-    - `:coo`
-    - `:sparse`
-    - `:dense`  
+    - `:coo`. Graph represented as a tuple `(source, target)`, such that the `k`-th edge 
+              connects the node `source[k]` to node `target[k]`
+    - `:sparse`. A sparse adjacency matrix representation.
+    - `:dense`. A dense adjacency matrix representation.  
     Default `:coo`.
 - `dir`. The assumed edge direction when given adjacency matrix or adjacency list input data `g`. 
         Possible values are `:out` and `:in`. Defaul `:out`.
@@ -64,6 +65,10 @@ using Flux, GeometricFlux
 g = [[2,3], [1,4,5], [1], [2,5], [2,4]]
 fg = FeaturedGraph(g)
 
+# Number of nodes and edges
+fg.num_nodes  # 5
+fg.num_edges  # 10 
+
 # Same graph in COO representation
 s = [1,1,2,2,2,3,4,4,5,5]
 t = [2,3,1,4,5,3,2,5,2,4]
@@ -80,6 +85,7 @@ fg = FeaturedGraph(fg, nf=rand(100, 5))
 fg = fg |> gpu
 
 # Collect edges' source and target nodes.
+# Both source and target are vectors of length num_edges
 source, target = edge_index(fg)
 ```
 
diff --git a/src/utils.jl b/src/utils.jl
index 67ab2d7f2..57d3cc3d1 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -6,9 +6,7 @@ value is accumulated numbers of edge (current vertex not included).
 """
 accumulated_edges(adj::AbstractVector{<:AbstractVector{<:Integer}}) = [0, cumsum(map(length, adj))...]
 
-Zygote.@nograd accumulated_edges
-
-Zygote.@nograd function generate_cluster(M::AbstractArray{T,N}, accu_edge) where {T,N}
+function generate_cluster(M::AbstractArray{T,N}, accu_edge) where {T,N}
     num_V = length(accu_edge) - 1
     num_E = accu_edge[end]
     cluster = similar(M, Int, num_E)
@@ -49,8 +47,6 @@ end
 
 edge_index_table(fg::FeaturedGraph) = edge_index_table(fg.graph)
 
-Zygote.@nograd edge_index_table
-
 function check_num_nodes(fg::FeaturedGraph, x::AbstractArray)
     @assert nv(fg) == size(x, ndims(x))    
 end
@@ -64,3 +60,7 @@ function add_self_loops(adjlist::AbstractVector{<:AbstractVector})
     end
     return anew
 end
+
+@non_differentiable accumulated_edges(x...)
+@non_differentiable generate_cluster(x...)
+@non_differentiable edge_index_table(x...)

From 04766a3cc11d0bf26e0a67d7ba3d888f4fb22fd3 Mon Sep 17 00:00:00 2001
From: CarloLucibello <carlo.lucibello@gmail.com>
Date: Fri, 13 Aug 2021 09:27:36 +0200
Subject: [PATCH 12/15] support values in coo

---
 src/GeometricFlux.jl     |   1 +
 src/featuredgraph.jl     |  67 ++++++++++++++++++------
 src/graph_conversions.jl | 109 +++++++++++++++++++++++++++++----------
 src/utils.jl             |   8 +++
 test/featured_graph.jl   |  43 +++++++++++----
 test/runtests.jl         |  15 +++---
 6 files changed, 184 insertions(+), 59 deletions(-)

diff --git a/src/GeometricFlux.jl b/src/GeometricFlux.jl
index 850543540..ab74763fa 100644
--- a/src/GeometricFlux.jl
+++ b/src/GeometricFlux.jl
@@ -22,6 +22,7 @@ export
     graph, edge_index,
     node_feature, edge_feature, global_feature,
     adjacency_list, normalized_laplacian, scaled_laplacian,
+    add_self_loops,
 
     # from LightGraphs
     adjacency_matrix, 
diff --git a/src/featuredgraph.jl b/src/featuredgraph.jl
index 6a5772d9e..2abc3a85c 100644
--- a/src/featuredgraph.jl
+++ b/src/featuredgraph.jl
@@ -14,7 +14,7 @@ Null object for `FeaturedGraph`.
 """
 struct NullGraph <: AbstractFeaturedGraph end
 
-const COO_T = Tuple{T, T} where T <: AbstractVector
+const COO_T = Tuple{T, T, V} where {T <: AbstractVector,V}
 const ADJLIST_T = AbstractVector{T} where T <: AbstractVector
 const ADJMAT_T = AbstractMatrix
 const SPARSE_T = AbstractSparseMatrix # subset of ADJMAT_T
@@ -27,12 +27,13 @@ A type representing a graph structure and storing also arrays
 that contain features associated to nodes, edges, and the whole graph. 
     
 A `FeaturedGraph` can be constructed out of different objects `g` representing
-the connections inside the graph, while the internal representation 
+the connections inside the graph, while the internal representation type
 is governed by `graph_type`. 
-When constructed from another featured graph `fg`, the internal representationis 
+When constructed from another featured graph `fg`, the internal graph representation
 is preserved and shared. 
 
-A FeaturedGraph is a LightGraphs' `AbstractGraph`, therefore any 
+A `FeaturedGraph` is a LightGraphs' `AbstractGraph`, therefore any functionality
+from the LightGraphs' graph library can be used on it.
 
 # Arguments 
 
@@ -45,7 +46,8 @@ A FeaturedGraph is a LightGraphs' `AbstractGraph`, therefore any
                 the underlying representation used by the FeaturedGraph. 
                 Currently supported values are 
     - `:coo`. Graph represented as a tuple `(source, target)`, such that the `k`-th edge 
-              connects the node `source[k]` to node `target[k]`
+              connects the node `source[k]` to node `target[k]`.
+              Optionally, also edge weights can be given: `(source, target, weights)`.
     - `:sparse`. A sparse adjacency matrix representation.
     - `:dense`. A dense adjacency matrix representation.  
     Default `:coo`.
@@ -139,8 +141,17 @@ function FeaturedGraph(data;
     FeaturedGraph(g, num_nodes, num_edges, nf, ef, gf)
 end
 
-FeaturedGraph(s::AbstractVector, t::AbstractVector; kws...) = FeaturedGraph((s,t); kws...)
-FeaturedGraph(g::AbstractGraph; kws...) = FeaturedGraph(adjacency_matrix(g, dir=:out); kws...)
+# COO convenience constructors
+FeaturedGraph(s::AbstractVector, t::AbstractVector, v = nothing; kws...) = FeaturedGraph((s, t, v); kws...)
+FeaturedGraph((s, t)::NTuple{2}; kws...) = FeaturedGraph((s, t, nothing); kws...)
+
+# FeaturedGraph(g::AbstractGraph; kws...) = FeaturedGraph(adjacency_matrix(g, dir=:out); kws...)
+
+function FeaturedGraph(g::AbstractGraph; kws...)
+    s = LightGraphs.src.(LightGraphs.edges(g))
+    t = LightGraphs.dst.(LightGraphs.edges(g)) 
+    FeaturedGraph((s, t); kws...)
+end
 
 function FeaturedGraph(fg::FeaturedGraph; 
                 nf=node_feature(fg), ef=edge_feature(fg), gf=global_feature(fg))
@@ -160,14 +171,17 @@ the source and target nodes for each edges in `fg`.
 s, t = edge_index(fg)
 ```
 """
-edge_index(fg::FeaturedGraph{<:COO_T}) = graph(fg)
+edge_index(fg::FeaturedGraph{<:COO_T}) = graph(fg)[1:2]
 
-edge_index(fg::FeaturedGraph{<:ADJMAT_T}) = to_coo(graph(fg))[1]
+edge_index(fg::FeaturedGraph{<:ADJMAT_T}) = to_coo(graph(fg))[1][1:2]
+
+edge_weight(fg::FeaturedGraph{<:COO_T}) = graph(fg)[3]
 
 """
     graph(fg::FeaturedGraph)
 
-Re
+Return the underlying implementation of the graph structure of `fg`,
+either an adjacency matrix or an edge list in the COO format.
 """
 graph(fg::FeaturedGraph) = fg.graph
 
@@ -184,7 +198,7 @@ LightGraphs.has_edge(fg::FeaturedGraph{<:ADJMAT_T}, i::Integer, j::Integer) = gr
 
 LightGraphs.nv(fg::FeaturedGraph) = fg.num_nodes
 LightGraphs.ne(fg::FeaturedGraph) = fg.num_edges
-LightGraphs.has_vertex(fg::FeaturedGraph, i::Int) = i in 1:fg.num_nodes
+LightGraphs.has_vertex(fg::FeaturedGraph, i::Int) = 1 <= i <= fg.num_nodes
 LightGraphs.vertices(fg::FeaturedGraph) = 1:fg.num_nodes
 
 function LightGraphs.outneighbors(fg::FeaturedGraph{<:COO_T}, i::Integer)
@@ -218,6 +232,7 @@ end
 
 function LightGraphs.adjacency_matrix(fg::FeaturedGraph{<:COO_T}, T::DataType=Int; dir=:out)
     A, n, m = to_sparse(graph(fg), T, num_nodes=fg.num_nodes)
+    @assert size(A) == (n, n)
     return dir == :out ? A : A'
 end
 
@@ -305,7 +320,13 @@ Normalized Laplacian matrix of graph `g`.
 """
 function normalized_laplacian(fg::FeaturedGraph, T::DataType=Float32; selfloop::Bool=false, dir::Symbol=:out)
     A = adjacency_matrix(fg, T; dir=dir)
-    selfloop && (A += I)
+    sz = size(A)
+    @assert sz[1] == sz[2]
+    if selfloop
+        A += I - Diagonal(A)
+    else
+        A -= Diagonal(A) 
+    end
     degs = vec(sum(A; dims=2))
     inv_sqrtD = Diagonal(inv.(sqrt.(degs)))
     return I - inv_sqrtD * A * inv_sqrtD
@@ -346,6 +367,7 @@ but also adding edges connecting the nodes to themselves.
 function add_self_loops(fg::FeaturedGraph{<:COO_T})
     s, t = edge_index(fg)
     @assert edge_feature(fg) === nothing
+    @assert edge_weight(fg) === nothing
     mask_old_loops = s .!= t
     s = s[mask_old_loops]
     t = t[mask_old_loops]
@@ -354,19 +376,32 @@ function add_self_loops(fg::FeaturedGraph{<:COO_T})
     s = [s; nodes]
     t = [t; nodes]
 
-    FeaturedGraph((s, t), fg.num_nodes, fg.num_edges,
+    FeaturedGraph((s, t, nothing), fg.num_nodes, length(s),
+        node_feature(fg), edge_feature(fg), global_feature(fg))
+end
+
+function add_self_loops(fg::FeaturedGraph{<:ADJMAT_T})
+    A = graph(fg)
+    @assert edge_feature(fg) === nothing
+    nold = sum(Diagonal(A)) |> Int
+    A = A - Diagonal(A) + I
+    num_edges =  fg.num_edges - nold + fg.num_nodes
+    FeaturedGraph(A, fg.num_nodes, num_edges,
         node_feature(fg), edge_feature(fg), global_feature(fg))
 end
 
 
 function remove_self_loops(fg::FeaturedGraph{<:COO_T})
     s, t = edge_index(fg)
+    # TODO remove these constraints
     @assert edge_feature(fg) === nothing
+    @assert edge_weight(fg) === nothing
+    
     mask_old_loops = s .!= t
     s = s[mask_old_loops]
     t = t[mask_old_loops]
 
-    FeaturedGraph((s, t), fg.num_nodes, fg.num_edges,
+    FeaturedGraph((s, t, nothing), fg.num_nodes, length(s),
         node_feature(fg), edge_feature(fg), global_feature(fg))
 end
 
@@ -375,8 +410,8 @@ end
 @non_differentiable adjacency_matrix(x...)
 @non_differentiable adjacency_list(x...)
 @non_differentiable degree(x...)
-@non_differentiable add_self_loops(x...)
-@non_differentiable remove_self_loops(x...)
+@non_differentiable add_self_loops(x...)     # TODO this is wrong, since fg carries feature arrays, needs rrule
+@non_differentiable remove_self_loops(x...)  # TODO this is wrong, since fg carries feature arrays, needs rrule
 
 # # delete when https://github.com/JuliaDiff/ChainRules.jl/pull/472 is merged
 # function ChainRulesCore.rrule(::typeof(copy), x)
diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
index 2672ebbaf..750ce7c8b 100644
--- a/src/graph_conversions.jl
+++ b/src/graph_conversions.jl
@@ -1,14 +1,15 @@
 ### CONVERT_TO_COO REPRESENTATION ########
 
-function to_coo(eindex::COO_T; dir=:out, num_nodes=nothing)
-    s, t = eindex   
+function to_coo(coo::COO_T; dir=:out, num_nodes=nothing)
+    s, t, val = coo   
     num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
+    @assert isnothing(val) || length(val) == length(s)
     @assert length(s) == length(t)
     @assert min(minimum(s), minimum(t)) >= 1 
     @assert max(maximum(s), maximum(t)) <= num_nodes 
 
     num_edges = length(s)
-    return eindex, num_nodes, num_edges
+    return coo, num_nodes, num_edges
 end
 
 function to_coo(adj_mat::ADJMAT_T; dir=:out, num_nodes=nothing)
@@ -19,7 +20,7 @@ function to_coo(adj_mat::ADJMAT_T; dir=:out, num_nodes=nothing)
     end
     num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
     num_edges = length(s)
-    return (s, t), num_nodes, num_edges
+    return (s, t, nothing), num_nodes, num_edges
 end
 
 function to_coo(adj_list::ADJLIST_T; dir=:out, num_nodes=nothing)
@@ -41,7 +42,7 @@ function to_coo(adj_list::ADJLIST_T; dir=:out, num_nodes=nothing)
     if dir == :in
         s, t = t, s
     end
-    (s, t), num_nodes, num_edges
+    (s, t, nothing), num_nodes, num_edges
 end
 
 ### CONVERT TO ADJACENCY MATRIX ################
@@ -83,43 +84,99 @@ function to_dense(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=noth
     A, num_nodes, num_edges
 end
 
-function to_dense(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
-    # `dir` will be ignored since the input `eindex` is alwasys in source target format.
+function to_dense(coo::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
+    # `dir` will be ignored since the input `coo` is always in source -> target format.
     # The output will always be a adjmat in :out format (e.g. A[i,j] denotes from i to j)
-    s, t = eindex
+    s, t, val = coo
     n = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes
-    adj_mat = fill!(similar(s, T, (n, n)), 0)
-    adj_mat[s .+ n .* (t .- 1)] .= 1 # exploiting linear indexing
-    return adj_mat, n, length(s)
+    A = fill!(similar(s, T, (n, n)), 0)
+    if isnothing(val)
+        A[s .+ n .* (t .- 1)] .= 1 # exploiting linear indexing
+    else    
+        A[s .+ n .* (t .- 1)] .= val # exploiting linear indexing
+    end
+    return A, n, length(s)
 end
 
 ### SPARSE #############
 
-function to_sparse(adj_mat::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num_nodes=nothing)
+##########################################
+# Remove when https://github.com/JuliaGPU/CUDA.jl/pull/1093 is merged and new version tagged
+
+using CUDA.CUSPARSE: CuSparseMatrixCSR, CuSparseMatrixCSC, CuSparseMatrixCOO, CuSparseMatrixBSR
+
+CUDA.CUSPARSE.CuSparseMatrixCSC(coo::CuSparseMatrixCOO) = CuSparseMatrixCSC(CuSparseMatrixCSR(coo)) # no direct conversion
+CUDA.CUSPARSE.CuSparseMatrixCOO(csc::CuSparseMatrixCSC) = CuSparseMatrixCOO(CuSparseMatrixCSR(csc)) # no direct conversion
+CUDA.CUSPARSE.CuSparseMatrixBSR(coo::CuSparseMatrixCOO, blockdim) = CuSparseMatrixBSR(CuSparseMatrixCSR(coo), blockdim) # no direct conversion
+CUDA.CUSPARSE.CuSparseMatrixCOO(bsr::CuSparseMatrixBSR) = CuSparseMatrixCOO(CuSparseMatrixCSR(bsr)) # no direct conversion
+
+"""
+    sparse(x::DenseCuMatrix; fmt=:csc)
+    sparse(I::CuVector, J::CuVector, V::CuVector, [m, n]; fmt=:csc)
+
+Return a sparse cuda matrix, with type determined by `fmt`.
+Possible formats are :csc, :csr, :bsr, and :coo.
+"""
+function SparseArrays.sparse(x::DenseCuMatrix; fmt=:csc)
+    if fmt == :csc
+        return CuSparseMatrixCSC(x)
+    elseif fmt == :csr 
+        return CuSparseMatrixCSR(x)
+    elseif fmt == :bsr
+        return CuSparseMatrixBSR(x)
+    elseif fmt == :coo
+        return CuSparseMatrixCOO(x)
+    else
+        error("Format :$fmt not available, use :csc, :csr, :bsr or :coo.")
+    end
+end
+
+SparseArrays.sparse(I::CuVector, J::CuVector, V::CuVector; kws...) = 
+    sparse(I, J, V, maximum(I), maximum(J); kws...)
+
+SparseArrays.sparse(I::CuVector, J::CuVector, V::CuVector, m, n; kws...) = 
+    sparse(Cint.(I), Cint.(J), V, m, n; kws...)
+
+function SparseArrays.sparse(I::CuVector{Cint}, J::CuVector{Cint}, V::CuVector{Tv}, m, n; 
+            fmt=:csc) where Tv
+    spcoo = CuSparseMatrixCOO{Tv}(I, J, V, (m, n))
+    if fmt == :csc
+        return CuSparseMatrixCSC(spcoo)
+    elseif fmt == :csr 
+        return CuSparseMatrixCSR(spcoo)
+    elseif fmt == :coo
+        return spcoo
+    else
+        error("Format :$fmt not available, use :csc, :csr, or :coo.")
+    end
+end
+#############################################
+
+function to_sparse(A::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
-    num_nodes = size(adj_mat, 1)
-    @assert num_nodes == size(adj_mat, 2)
-    # @assert all(x -> (x == 1) || (x == 0), adj_mat)
-    num_edges = round(Int, sum(adj_mat))
+    num_nodes = size(A, 1)
+    @assert num_nodes == size(A, 2)
+    num_edges = round(Int, sum(A))
     if dir == :in
-        adj_mat = adj_mat'
+        A = A'
     end
-    if T != eltype(adj_mat)
-        adj_mat = T.(adj_mat)
+    if T != eltype(A)
+        A = T.(A)
     end
-    return sparse(adj_mat), num_nodes, num_edges
+    return sparse(A), num_nodes, num_edges
 end
 
+
 function to_sparse(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=nothing)
-    eindex, num_nodes, num_edges = to_coo(adj_list; dir, num_nodes)
-    to_sparse(eindex; dir, num_nodes)
+    coo, num_nodes, num_edges = to_coo(adj_list; dir, num_nodes)
+    to_sparse(coo; dir, num_nodes)
 end
 
-function to_sparse(eindex::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
-    s, t = eindex    
-    val = fill!(similar(s, T), 1)
-    A = sparse(s, t, val)
+function to_sparse(coo::COO_T, T::DataType=Int; dir=:out, num_nodes=nothing)
+    s, t, eweight  = coo
+    eweight = isnothing(eweight) ? fill!(similar(s, T), 1) : eweight
     num_nodes = isnothing(num_nodes) ? max(maximum(s), maximum(t)) : num_nodes 
+    A = sparse(s, t, eweight, num_nodes, num_nodes)
     num_edges = length(s)
     A, num_nodes, num_edges
 end
diff --git a/src/utils.jl b/src/utils.jl
index 57d3cc3d1..f0f307443 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -61,6 +61,14 @@ function add_self_loops(adjlist::AbstractVector{<:AbstractVector})
     return anew
 end
 
+sort_edge_index(eindex::Tuple) = sort_edge_index(eindex...)
+
+function sort_edge_index(u, v)
+    uv = collect(zip(u, v))
+    p = sortperm(uv) # isless lexicographically defined for tuples
+    return u[p], v[p]
+end
+
 @non_differentiable accumulated_edges(x...)
 @non_differentiable generate_cluster(x...)
 @non_differentiable edge_index_table(x...)
diff --git a/test/featured_graph.jl b/test/featured_graph.jl
index 29079e3a3..b4ba80a0f 100644
--- a/test/featured_graph.jl
+++ b/test/featured_graph.jl
@@ -1,7 +1,7 @@
 @testset "FeaturedGraph" begin
     @testset "symmetric graph" begin
-        u = [1, 2, 3, 4, 2, 3, 4, 1]
-        v = [2, 3, 4, 1, 1, 2, 3, 4]
+        s = [1, 1, 2, 2, 3, 3, 4, 4]
+        t = [2, 4, 1, 3, 2, 4, 1, 3]
         adj_mat =  [0  1  0  1
                     1  0  1  0
                     0  1  0  1
@@ -10,14 +10,17 @@
         adj_list_in =  [[2,4], [1,3], [2,4], [1,3]]
 
         # core functionality
-        fg = FeaturedGraph(u, v; graph_type=GRAPH_T)
+        fg = FeaturedGraph(s, t; graph_type=GRAPH_T)
         @test fg.num_edges == 8
         @test fg.num_nodes == 4
-        @test collect(edges(fg)) |> sort == collect(zip(u, v)) |> sort
+        @test collect(edges(fg)) |> sort == collect(zip(s, t)) |> sort
         @test sort(outneighbors(fg, 1)) == [2, 4] 
         @test sort(inneighbors(fg, 1)) == [2, 4] 
         @test is_directed(fg) == true
-
+        s1, t1 = sort_edge_index(edge_index(fg))
+        @test s1 == s
+        @test t1 == t
+        
         # adjacency
         @test adjacency_matrix(fg) == adj_mat
         @test adjacency_matrix(fg; dir=:in) == adj_mat
@@ -39,8 +42,8 @@
     end
 
     @testset "asymmetric graph" begin
-        u = [1, 2, 3, 4]
-        v = [2, 3, 4, 1]
+        s = [1, 2, 3, 4]
+        t = [2, 3, 4, 1]
         adj_mat_out =  [0  1  0  0
                         0  0  1  0
                         0  0  0  1
@@ -55,13 +58,16 @@
         adj_list_in =  [[4], [1], [2], [3]]
 
         # core functionality
-        fg = FeaturedGraph(u, v; graph_type=GRAPH_T)
+        fg = FeaturedGraph(s, t; graph_type=GRAPH_T)
         @test fg.num_edges == 4
         @test fg.num_nodes == 4
-        @test collect(edges(fg)) |> sort == collect(zip(u, v)) |> sort
+        @test collect(edges(fg)) |> sort == collect(zip(s, t)) |> sort
         @test sort(outneighbors(fg, 1)) == [2] 
         @test sort(inneighbors(fg, 1)) == [4] 
         @test is_directed(fg) == true
+        s1, t1 = sort_edge_index(edge_index(fg))
+        @test s1 == s
+        @test t1 == t
 
         # adjacency
         @test adjacency_matrix(fg) ==  adj_mat_out
@@ -78,4 +84,21 @@
         end
     end
 
-end
\ No newline at end of file
+    @testset "add self-loops" begin
+        A = [1  1  0  0
+             0  0  1  0
+             0  0  0  1
+             1  0  0  0]
+        A2 =   [1  1  0  0
+                0  1  1  0
+                0  0  1  1
+                1  0  0  1]
+
+        fg = FeaturedGraph(A; graph_type=GRAPH_T)
+        fg2 = add_self_loops(fg)
+        @test adjacency_matrix(fg) == A
+        @test fg.num_edges == sum(A)
+        @test adjacency_matrix(fg2) == A2
+        @test fg2.num_edges == sum(A2)
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 83885422e..f03818283 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,5 +1,6 @@
 using GeometricFlux
 using GeometricFlux.Datasets
+using GeometricFlux: sort_edge_index
 using Flux
 using CUDA
 using Flux: gpu
@@ -19,13 +20,13 @@ cuda_tests = [
 ]
 
 tests = [
-    "featured_graph",
-    "layers/gn",
-    "layers/msgpass",
-    "layers/conv",
-    "layers/pool",
-    "layers/misc",
-    "models",
+    # "featured_graph",
+    # "layers/gn",
+    # "layers/msgpass",
+    # "layers/conv",
+    # "layers/pool",
+    # "layers/misc",
+    # "models",
 ]
 
 if Flux.use_cuda[]

From 2997024024610d8bec792d15635640c63e56edb4 Mon Sep 17 00:00:00 2001
From: CarloLucibello <carlo.lucibello@gmail.com>
Date: Fri, 13 Aug 2021 10:05:40 +0200
Subject: [PATCH 13/15] fixes

---
 src/graph_conversions.jl    | 26 +++++++++++++-------------
 test/cuda/featured_graph.jl | 30 ++++++++++++++++++++++++------
 test/runtests.jl            | 21 ++++++++-------------
 3 files changed, 45 insertions(+), 32 deletions(-)

diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
index 750ce7c8b..1b1910291 100644
--- a/src/graph_conversions.jl
+++ b/src/graph_conversions.jl
@@ -12,8 +12,8 @@ function to_coo(coo::COO_T; dir=:out, num_nodes=nothing)
     return coo, num_nodes, num_edges
 end
 
-function to_coo(adj_mat::ADJMAT_T; dir=:out, num_nodes=nothing)
-    nz = findall(!=(0), adj_mat) # vec of cartesian indexes
+function to_coo(A::ADJMAT_T; dir=:out, num_nodes=nothing)
+    nz = findall(!=(0), A) # vec of cartesian indexes
     s, t = ntuple(i -> map(t->t[i], nz), 2)
     if dir == :in
         s, t = t, s
@@ -49,21 +49,21 @@ end
 
 ### DENSE ####################
 
-to_dense(adj_mat::AbstractSparseMatrix, x...; kws...) = to_dense(collect(adj_mat), x...; kws...)
+to_dense(A::AbstractSparseMatrix, x...; kws...) = to_dense(collect(A), x...; kws...)
 
-function to_dense(adj_mat::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num_nodes=nothing)
+function to_dense(A::ADJMAT_T, T::DataType=eltype(A); dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
-    num_nodes = size(adj_mat, 1)
-    @assert num_nodes == size(adj_mat, 2)
-    # @assert all(x -> (x == 1) || (x == 0), adj_mat)
-    num_edges = round(Int, sum(adj_mat))
+    num_nodes = size(A, 1)
+    @assert num_nodes == size(A, 2)
+    # @assert all(x -> (x == 1) || (x == 0), A)
+    num_edges = round(Int, sum(A))
     if dir == :in
-        adj_mat = adj_mat'
+        A = A'
     end
-    if T != eltype(adj_mat)
-        adj_mat = T.(adj_mat)
+    if T != eltype(A)
+        A = T.(A)
     end
-    return adj_mat, num_nodes, num_edges
+    return A, num_nodes, num_edges
 end
 
 function to_dense(adj_list::ADJLIST_T, T::DataType=Int; dir=:out, num_nodes=nothing)
@@ -152,7 +152,7 @@ function SparseArrays.sparse(I::CuVector{Cint}, J::CuVector{Cint}, V::CuVector{T
 end
 #############################################
 
-function to_sparse(A::ADJMAT_T, T::DataType=eltype(adj_mat); dir=:out, num_nodes=nothing)
+function to_sparse(A::ADJMAT_T, T::DataType=eltype(A); dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
     num_nodes = size(A, 1)
     @assert num_nodes == size(A, 2)
diff --git a/test/cuda/featured_graph.jl b/test/cuda/featured_graph.jl
index 6e252c3af..5cb0dae61 100644
--- a/test/cuda/featured_graph.jl
+++ b/test/cuda/featured_graph.jl
@@ -15,15 +15,33 @@
     end
 
     @testset "adjacency_matrix" begin
-        mat = adjacency_matrix(fg)
-        mat_gpu = adjacency_matrix(fg_gpu)
-        @test mat_gpu isa CuMatrix{Int}
+        function test_adj()
+            mat = adjacency_matrix(fg)
+            mat_gpu = adjacency_matrix(fg_gpu)
+            @test mat_gpu isa CuMatrix{Int}
+            true
+        end
+
+        if GRAPH_T == :coo
+            # See https://github.com/JuliaGPU/CUDA.jl/pull/1093
+            @test_broken test_adj()
+        else
+            test_adj()
+        end
     end
 
     @testset "normalized_laplacian" begin
-        mat = normalized_laplacian(fg)
-        mat_gpu = normalized_laplacian(fg_gpu)
-        @test mat_gpu isa CuMatrix{Float32}
+        function test_normlapl()
+            mat = normalized_laplacian(fg)
+            mat_gpu = normalized_laplacian(fg_gpu)
+            @test mat_gpu isa CuMatrix{Float32}
+            true
+        end
+        if GRAPH_T == :coo
+            @test_broken test_normlapl()
+        else
+            test_normlapl()
+        end
     end
 
     @testset "scaled_laplacian" begin
diff --git a/test/runtests.jl b/test/runtests.jl
index f03818283..e777a34ba 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,6 +12,7 @@ using LightGraphs
 using Statistics: mean
 using Zygote
 using Test
+CUDA.allowscalar(false)
 
 cuda_tests = [
     "cuda/featured_graph",
@@ -20,21 +21,15 @@ cuda_tests = [
 ]
 
 tests = [
-    # "featured_graph",
-    # "layers/gn",
-    # "layers/msgpass",
-    # "layers/conv",
-    # "layers/pool",
-    # "layers/misc",
-    # "models",
+    "featured_graph",
+    "layers/gn",
+    "layers/msgpass",
+    "layers/conv",
+    "layers/pool",
+    "layers/misc",
+    "models",
 ]
 
-if Flux.use_cuda[]
-    append!(tests, cuda_tests)
-else
-    @warn "CUDA unavailable, not testing GPU support"
-end
-
 # Testing all graph types. :sparse is a bit broken at the moment
 @testset "GeometricFlux: graph format $graph_type" for graph_type in (:coo, :dense, :sparse)
     global GRAPH_T = graph_type

From e272139747215e54a5d27d377ec2873ab1bd749e Mon Sep 17 00:00:00 2001
From: CarloLucibello <carlo.lucibello@gmail.com>
Date: Fri, 13 Aug 2021 10:20:36 +0200
Subject: [PATCH 14/15] merge

---
 src/graph_conversions.jl | 52 ----------------------------------------
 src/layers/conv.jl       |  5 ++--
 2 files changed, 2 insertions(+), 55 deletions(-)

diff --git a/src/graph_conversions.jl b/src/graph_conversions.jl
index 1b1910291..007f990ea 100644
--- a/src/graph_conversions.jl
+++ b/src/graph_conversions.jl
@@ -100,58 +100,6 @@ end
 
 ### SPARSE #############
 
-##########################################
-# Remove when https://github.com/JuliaGPU/CUDA.jl/pull/1093 is merged and new version tagged
-
-using CUDA.CUSPARSE: CuSparseMatrixCSR, CuSparseMatrixCSC, CuSparseMatrixCOO, CuSparseMatrixBSR
-
-CUDA.CUSPARSE.CuSparseMatrixCSC(coo::CuSparseMatrixCOO) = CuSparseMatrixCSC(CuSparseMatrixCSR(coo)) # no direct conversion
-CUDA.CUSPARSE.CuSparseMatrixCOO(csc::CuSparseMatrixCSC) = CuSparseMatrixCOO(CuSparseMatrixCSR(csc)) # no direct conversion
-CUDA.CUSPARSE.CuSparseMatrixBSR(coo::CuSparseMatrixCOO, blockdim) = CuSparseMatrixBSR(CuSparseMatrixCSR(coo), blockdim) # no direct conversion
-CUDA.CUSPARSE.CuSparseMatrixCOO(bsr::CuSparseMatrixBSR) = CuSparseMatrixCOO(CuSparseMatrixCSR(bsr)) # no direct conversion
-
-"""
-    sparse(x::DenseCuMatrix; fmt=:csc)
-    sparse(I::CuVector, J::CuVector, V::CuVector, [m, n]; fmt=:csc)
-
-Return a sparse cuda matrix, with type determined by `fmt`.
-Possible formats are :csc, :csr, :bsr, and :coo.
-"""
-function SparseArrays.sparse(x::DenseCuMatrix; fmt=:csc)
-    if fmt == :csc
-        return CuSparseMatrixCSC(x)
-    elseif fmt == :csr 
-        return CuSparseMatrixCSR(x)
-    elseif fmt == :bsr
-        return CuSparseMatrixBSR(x)
-    elseif fmt == :coo
-        return CuSparseMatrixCOO(x)
-    else
-        error("Format :$fmt not available, use :csc, :csr, :bsr or :coo.")
-    end
-end
-
-SparseArrays.sparse(I::CuVector, J::CuVector, V::CuVector; kws...) = 
-    sparse(I, J, V, maximum(I), maximum(J); kws...)
-
-SparseArrays.sparse(I::CuVector, J::CuVector, V::CuVector, m, n; kws...) = 
-    sparse(Cint.(I), Cint.(J), V, m, n; kws...)
-
-function SparseArrays.sparse(I::CuVector{Cint}, J::CuVector{Cint}, V::CuVector{Tv}, m, n; 
-            fmt=:csc) where Tv
-    spcoo = CuSparseMatrixCOO{Tv}(I, J, V, (m, n))
-    if fmt == :csc
-        return CuSparseMatrixCSC(spcoo)
-    elseif fmt == :csr 
-        return CuSparseMatrixCSR(spcoo)
-    elseif fmt == :coo
-        return spcoo
-    else
-        error("Format :$fmt not available, use :csc, :csr, or :coo.")
-    end
-end
-#############################################
-
 function to_sparse(A::ADJMAT_T, T::DataType=eltype(A); dir=:out, num_nodes=nothing)
     @assert dir ∈ [:out, :in]
     num_nodes = size(A, 1)
diff --git a/src/layers/conv.jl b/src/layers/conv.jl
index 01958b386..dc6633b48 100644
--- a/src/layers/conv.jl
+++ b/src/layers/conv.jl
@@ -426,9 +426,8 @@ update(g::GINConv, m::AbstractVector, x) = g.nn((1 + g.eps) * x + m)
 @functor GINConv
 
 function (g::GINConv)(fg::FeaturedGraph, X::AbstractMatrix)
-    gf = graph(fg)
-    GraphSignals.check_num_node(gf, X)
-    _, X = propagate(g, adjacency_list(gf), Fill(0.f0, 0, ne(gf)), X, +)
+    check_num_nodes(fg, X)
+    _, X = propagate(g, adjacency_list(fg), Fill(0.f0, 0, ne(fg)), X, +)
     X
 end
 

From b068283527e35e5c0549ee77ada9ee1db0c1b47d Mon Sep 17 00:00:00 2001
From: CarloLucibello <carlo.lucibello@gmail.com>
Date: Fri, 13 Aug 2021 10:39:21 +0200
Subject: [PATCH 15/15] fix tests

---
 test/layers/conv.jl | 8 ++++----
 test/runtests.jl    | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/layers/conv.jl b/test/layers/conv.jl
index 23c173bf9..27a9f7da1 100644
--- a/test/layers/conv.jl
+++ b/test/layers/conv.jl
@@ -356,7 +356,7 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex, graph_type=GRAPH_T)
             gc = GINConv(FeaturedGraph(adj), nn, eps=eps)
             @test size(gc.nn.layers[1].weight) == (out_channel, in_channel)
             @test size(gc.nn.layers[1].bias) == (out_channel, )
-            @test graph(gc.fg) === adj
+            @test adjacency_matrix(gc.fg) == adj
 
             Y = gc(FeaturedGraph(adj, nf=X))
             @test size(node_feature(Y)) == (out_channel, N)
@@ -365,9 +365,9 @@ fg_single_vertex = FeaturedGraph(adj_single_vertex, graph_type=GRAPH_T)
             Y = gc(FeaturedGraph(adj, nf=Xt))
             @test size(node_feature(Y)) == (out_channel, N)
 
-            g = Zygote.gradient(x -> sum(node_feature(gc(x))), 
-                                FeaturedGraph(adj, nf=X))[1]
-            @test size(g.x.nf) == size(X)
+            fg = FeaturedGraph(adj, nf=X) 
+            g = Zygote.gradient(fg -> sum(node_feature(gc(fg))), fg)[1]
+            @test size(g.nf) == size(X)
 
             g = Zygote.gradient(model -> sum(node_feature(model(FeaturedGraph(adj, nf=X)))), 
                                 gc)[1]
diff --git a/test/runtests.jl b/test/runtests.jl
index e777a34ba..e99a7ee56 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -30,6 +30,8 @@ tests = [
     "models",
 ]
 
+!Flux.use_cuda[] && @warn("CUDA unavailable, not testing GPU support")
+
 # Testing all graph types. :sparse is a bit broken at the moment
 @testset "GeometricFlux: graph format $graph_type" for graph_type in (:coo, :dense, :sparse)
     global GRAPH_T = graph_type
@@ -41,7 +43,5 @@ tests = [
         for t in cuda_tests
             include("$(t).jl")
         end
-    else
-        @warn "CUDA unavailable, not testing GPU support"
     end
 end