From 4c96da7b30ed3370d51c88a07d6bd0bec3b28350 Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Mon, 25 Oct 2021 10:55:23 +0200
Subject: [PATCH 1/4] add link prediction example

---
 examples/link_prediction_cora.jl | 131 +++++++++++++++++++++++++++++++
 src/msgpass.jl                   |   3 +-
 2 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 examples/link_prediction_cora.jl

diff --git a/examples/link_prediction_cora.jl b/examples/link_prediction_cora.jl
new file mode 100644
index 000000000..e2212463a
--- /dev/null
+++ b/examples/link_prediction_cora.jl
@@ -0,0 +1,131 @@
+# An example of link prediction using negative and positive samples.
+# Ported from https://docs.dgl.ai/tutorials/blitz/4_link_predict.html#sphx-glr-tutorials-blitz-4-link-predict-py
+
+using Flux
+using Flux: onecold, onehotbatch
+using Flux.Losses: logitbinarycrossentropy
+using GraphNeuralNetworks
+using GraphNeuralNetworks: ones_like, zeros_like
+using MLDatasets: Cora
+using Statistics, Random, LinearAlgebra
+using CUDA
+using MLJBase: AreaUnderCurve
+CUDA.allowscalar(false)
+
+"""
+Transform vector of cartesian indexes into a tuple of vectors containing integers.
+"""
+ci2t(ci::AbstractVector{<:CartesianIndex}, dims) = ntuple(i -> map(x -> x[i], ci), dims)
+
+# arguments for the `train` function 
+Base.@kwdef mutable struct Args
+    η = 1f-3             # learning rate
+    epochs = 200          # number of epochs
+    seed = 17             # set seed > 0 for reproducibility
+    usecuda = false      # if true use cuda (if available)
+    nhidden = 128        # dimension of hidden features
+    infotime = 10 	     # report every `infotime` epochs
+end
+
+struct DotPredictor end
+
+function (::DotPredictor)(g, x)
+    z = apply_edges((xi, xj, e) -> sum(xi .* xj, dims=1), g, xi=x, xj=x)
+    return vec(z)
+end
+
+function train(; kws...)
+    # args = Args(; kws...)
+    args = Args()
+
+    args.seed > 0 && Random.seed!(args.seed)
+    
+    if args.usecuda && CUDA.functional()
+        device = gpu
+        args.seed > 0 && CUDA.seed!(args.seed)
+        @info "Training on GPU"
+    else
+        device = cpu
+        @info "Training on CPU"
+    end
+
+    ### LOAD DATA
+    data = Cora.dataset()
+    g = GNNGraph(data.adjacency_list) |> device
+    X = data.node_features |> device
+    
+    #### SPLIT INTO NEGATIVE AND POSITIVE SAMPLES
+    # Split edge set for training and testing
+    s, t = edge_index(g)
+    eids = randperm(g.num_edges)
+    test_size = round(Int, g.num_edges * 0.1)
+    train_size = g.num_edges - test_size
+    test_pos_s, test_pos_t = s[eids[1:test_size]], t[eids[1:test_size]]
+    train_pos_s, train_pos_t = s[eids[test_size+1:end]], t[eids[test_size+1:end]]
+
+    # Find all negative edges and split them for training and testing
+    adj = adjacency_matrix(g)
+    adj_neg = 1 .- adj - I
+    neg_s, neg_t = ci2t(findall(adj_neg .> 0), 2)
+
+    neg_eids = randperm(length(neg_s))[1:g.num_edges]
+    test_neg_s, test_neg_t = neg_s[neg_eids[1:test_size]], neg_t[neg_eids[1:test_size]]
+    train_neg_s, train_neg_t = neg_s[neg_eids[test_size+1:end]], neg_t[neg_eids[test_size+1:end]]
+    # train_neg_s, train_neg_t = neg_s[neg_eids[train_size+1:end]], neg_t[neg_eids[train_size+1:end]]
+    
+    train_pos_g = GNNGraph((train_pos_s, train_pos_t), num_nodes=g.num_nodes)
+    train_neg_g = GNNGraph((train_neg_s, train_neg_t), num_nodes=g.num_nodes)
+
+    test_pos_g = GNNGraph((test_pos_s, test_pos_t), num_nodes=g.num_nodes)
+    test_neg_g = GNNGraph((test_neg_s, test_neg_t), num_nodes=g.num_nodes)
+    
+    @show train_pos_g test_pos_g train_neg_g test_neg_g
+
+    ### DEFINE MODEL
+    nin, nhidden = size(X,1), args.nhidden
+    
+    model = GNNChain(GCNConv(nin => nhidden, relu),
+                     GCNConv(nhidden => nhidden)) |> device
+
+    pred = DotPredictor()
+
+    ps = Flux.params(model)
+    opt = ADAM(args.η)
+
+    ### LOSS FUNCTION
+
+    function loss(pos_g, neg_g)
+        h = model(train_pos_g, X)
+        pos_score = pred(pos_g, h)
+        neg_score = pred(neg_g, h)
+        scores = [pos_score; neg_score]
+        labels = [ones_like(pos_score); zeros_like(neg_score)]
+        return logitbinarycrossentropy(scores, labels)
+    end
+
+    function accuracy(pos_g, neg_g)
+        h = model(train_pos_g, X)
+        pos_score = pred(pos_g, h)
+        neg_score = pred(neg_g, h)
+        scores = [pos_score; neg_score]
+        labels = [ones_like(pos_score); zeros_like(neg_score)]
+        return logitbinarycrossentropy(scores, labels)
+    end
+    
+    ### LOGGING FUNCTION
+    function report(epoch)
+        train_loss = loss(train_pos_g, train_neg_g)
+        test_loss = loss(test_pos_g, test_neg_g)
+        println("Epoch: $epoch   Train: $(train_loss)   Test: $(test_loss)")
+    end
+    
+    ### TRAINING
+    report(0)
+    for epoch in 1:args.epochs
+        gs = Flux.gradient(() -> loss(train_pos_g, train_neg_g), ps)
+        Flux.Optimise.update!(opt, ps, gs)
+        epoch % args.infotime == 0 && report(epoch)
+    end
+end
+
+# train()
\ No newline at end of file
diff --git a/src/msgpass.jl b/src/msgpass.jl
index 1611ebe56..78a9333bf 100644
--- a/src/msgpass.jl
+++ b/src/msgpass.jl
@@ -77,7 +77,8 @@ end
 ## APPLY EDGES
 
 """
-    apply_edges(f, xi, xj, e)
+    apply_edges(f, g, xi, xj, e)
+    apply_edges(f, g; [xi, xj, e])
 
 Returns the message from node `j` to node `i` .
 In the message-passing scheme, the incoming messages 

From 53f79cd8cfdb829a705aa510c949bc080a9b003e Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Fri, 29 Oct 2021 09:24:33 +0200
Subject: [PATCH 2/4] link

---
 examples/Project.toml            | 3 +--
 examples/link_prediction_cora.jl | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/Project.toml b/examples/Project.toml
index 3d950f665..b4a89fa64 100644
--- a/examples/Project.toml
+++ b/examples/Project.toml
@@ -1,10 +1,9 @@
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-DiffEqFlux = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
-DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
+MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
diff --git a/examples/link_prediction_cora.jl b/examples/link_prediction_cora.jl
index e2212463a..d6477fae0 100644
--- a/examples/link_prediction_cora.jl
+++ b/examples/link_prediction_cora.jl
@@ -23,7 +23,7 @@ Base.@kwdef mutable struct Args
     epochs = 200          # number of epochs
     seed = 17             # set seed > 0 for reproducibility
     usecuda = false      # if true use cuda (if available)
-    nhidden = 128        # dimension of hidden features
+    nhidden = 64        # dimension of hidden features
     infotime = 10 	     # report every `infotime` epochs
 end
 

From 81fb4d16a29e097027c23ca14638b957ecfefd0b Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Mon, 1 Nov 2021 20:05:22 +0100
Subject: [PATCH 3/4] implement negative sampling

---
 examples/Project.toml                         |  3 +-
 ...tion_cora.jl => link_prediction_pubmed.jl} | 79 ++++++++-----------
 2 files changed, 37 insertions(+), 45 deletions(-)
 rename examples/{link_prediction_cora.jl => link_prediction_pubmed.jl} (54%)

diff --git a/examples/Project.toml b/examples/Project.toml
index b4a89fa64..3d950f665 100644
--- a/examples/Project.toml
+++ b/examples/Project.toml
@@ -1,9 +1,10 @@
 [deps]
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+DiffEqFlux = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
+DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694"
 Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
 MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
-MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d"
diff --git a/examples/link_prediction_cora.jl b/examples/link_prediction_pubmed.jl
similarity index 54%
rename from examples/link_prediction_cora.jl
rename to examples/link_prediction_pubmed.jl
index d6477fae0..f5b043f52 100644
--- a/examples/link_prediction_cora.jl
+++ b/examples/link_prediction_pubmed.jl
@@ -2,21 +2,18 @@
 # Ported from https://docs.dgl.ai/tutorials/blitz/4_link_predict.html#sphx-glr-tutorials-blitz-4-link-predict-py
 
 using Flux
+# Link prediction task
+# https://arxiv.org/pdf/2102.12557.pdf
+
 using Flux: onecold, onehotbatch
 using Flux.Losses: logitbinarycrossentropy
 using GraphNeuralNetworks
-using GraphNeuralNetworks: ones_like, zeros_like
-using MLDatasets: Cora
+using MLDatasets: PubMed, Cora
 using Statistics, Random, LinearAlgebra
 using CUDA
-using MLJBase: AreaUnderCurve
+# using MLJBase: AreaUnderCurve
 CUDA.allowscalar(false)
 
-"""
-Transform vector of cartesian indexes into a tuple of vectors containing integers.
-"""
-ci2t(ci::AbstractVector{<:CartesianIndex}, dims) = ntuple(i -> map(x -> x[i], ci), dims)
-
 # arguments for the `train` function 
 Base.@kwdef mutable struct Args
     η = 1f-3             # learning rate
@@ -34,6 +31,8 @@ function (::DotPredictor)(g, x)
     return vec(z)
 end
 
+using ChainRulesCore
+
 function train(; kws...)
     # args = Args(; kws...)
     args = Args()
@@ -54,67 +53,59 @@ function train(; kws...)
     g = GNNGraph(data.adjacency_list) |> device
     X = data.node_features |> device
     
+    
     #### SPLIT INTO NEGATIVE AND POSITIVE SAMPLES
-    # Split edge set for training and testing
     s, t = edge_index(g)
     eids = randperm(g.num_edges)
     test_size = round(Int, g.num_edges * 0.1)
-    train_size = g.num_edges - test_size
+    
     test_pos_s, test_pos_t = s[eids[1:test_size]], t[eids[1:test_size]]
-    train_pos_s, train_pos_t = s[eids[test_size+1:end]], t[eids[test_size+1:end]]
-
-    # Find all negative edges and split them for training and testing
-    adj = adjacency_matrix(g)
-    adj_neg = 1 .- adj - I
-    neg_s, neg_t = ci2t(findall(adj_neg .> 0), 2)
-
-    neg_eids = randperm(length(neg_s))[1:g.num_edges]
-    test_neg_s, test_neg_t = neg_s[neg_eids[1:test_size]], neg_t[neg_eids[1:test_size]]
-    train_neg_s, train_neg_t = neg_s[neg_eids[test_size+1:end]], neg_t[neg_eids[test_size+1:end]]
-    # train_neg_s, train_neg_t = neg_s[neg_eids[train_size+1:end]], neg_t[neg_eids[train_size+1:end]]
+    test_pos_g = GNNGraph(test_pos_s, test_pos_t, num_nodes=g.num_nodes)
     
-    train_pos_g = GNNGraph((train_pos_s, train_pos_t), num_nodes=g.num_nodes)
-    train_neg_g = GNNGraph((train_neg_s, train_neg_t), num_nodes=g.num_nodes)
+    train_pos_s, train_pos_t = s[eids[test_size+1:end]], t[eids[test_size+1:end]]
+    train_pos_g = GNNGraph(train_pos_s, train_pos_t, num_nodes=g.num_nodes)
 
-    test_pos_g = GNNGraph((test_pos_s, test_pos_t), num_nodes=g.num_nodes)
-    test_neg_g = GNNGraph((test_neg_s, test_neg_t), num_nodes=g.num_nodes)
+    test_neg_g = negative_sample(g, num_neg_edges=test_size)
     
-    @show train_pos_g test_pos_g train_neg_g test_neg_g
-
-    ### DEFINE MODEL
+    ### DEFINE MODEL #########
     nin, nhidden = size(X,1), args.nhidden
     
-    model = GNNChain(GCNConv(nin => nhidden, relu),
-                     GCNConv(nhidden => nhidden)) |> device
+    model = WithGraph(GNNChain(GCNConv(nin => nhidden, relu),
+                               GCNConv(nhidden => nhidden)),
+                      train_pos_g) |> device
 
     pred = DotPredictor()
 
     ps = Flux.params(model)
     opt = ADAM(args.η)
 
-    ### LOSS FUNCTION
+    ### LOSS FUNCTION ############
 
-    function loss(pos_g, neg_g)
-        h = model(train_pos_g, X)
+    function loss(pos_g, neg_g = nothing)
+        h = model(X)
+        if neg_g === nothing
+            # we sample a negative graph at each training step
+            neg_g = negative_sample(pos_g)
+        end
         pos_score = pred(pos_g, h)
         neg_score = pred(neg_g, h)
         scores = [pos_score; neg_score]
-        labels = [ones_like(pos_score); zeros_like(neg_score)]
+        labels = [fill!(similar(pos_score), 1); fill!(similar(neg_score), 0)]
         return logitbinarycrossentropy(scores, labels)
     end
 
-    function accuracy(pos_g, neg_g)
-        h = model(train_pos_g, X)
-        pos_score = pred(pos_g, h)
-        neg_score = pred(neg_g, h)
-        scores = [pos_score; neg_score]
-        labels = [ones_like(pos_score); zeros_like(neg_score)]
-        return logitbinarycrossentropy(scores, labels)
-    end
+    # function accuracy(pos_g, neg_g)
+    #     h = model(train_pos_g, X)
+    #     pos_score = pred(pos_g, h)
+    #     neg_score = pred(neg_g, h)
+    #     scores = [pos_score; neg_score]
+    #     labels = [fill!(similar(pos_score), 1); fill!(similar(neg_score), 0)]
+    #     return logitbinarycrossentropy(scores, labels)
+    # end
     
     ### LOGGING FUNCTION
     function report(epoch)
-        train_loss = loss(train_pos_g, train_neg_g)
+        train_loss = loss(train_pos_g)
         test_loss = loss(test_pos_g, test_neg_g)
         println("Epoch: $epoch   Train: $(train_loss)   Test: $(test_loss)")
     end
@@ -122,7 +113,7 @@ function train(; kws...)
     ### TRAINING
     report(0)
     for epoch in 1:args.epochs
-        gs = Flux.gradient(() -> loss(train_pos_g, train_neg_g), ps)
+        gs = Flux.gradient(() -> loss(train_pos_g), ps)
         Flux.Optimise.update!(opt, ps, gs)
         epoch % args.infotime == 0 && report(epoch)
     end

From 13468e38e7da313f226eb5eb4e602502a7cff37e Mon Sep 17 00:00:00 2001
From: Carlo Lucibello <carlo.lucibello@gmail.com>
Date: Mon, 1 Nov 2021 20:05:45 +0100
Subject: [PATCH 4/4] implement negative sampling

---
 src/GNNGraphs/GNNGraphs.jl |  3 ++-
 src/GNNGraphs/transform.jl | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/GNNGraphs/GNNGraphs.jl b/src/GNNGraphs/GNNGraphs.jl
index 51e8891c6..d30af6c18 100644
--- a/src/GNNGraphs/GNNGraphs.jl
+++ b/src/GNNGraphs/GNNGraphs.jl
@@ -23,7 +23,8 @@ export  edge_index, adjacency_list, normalized_laplacian, scaled_laplacian,
         graph_indicator
 
 include("transform.jl")
-export add_nodes, add_edges, add_self_loops, remove_self_loops, getgraph
+export add_nodes, add_edges, add_self_loops, remove_self_loops, getgraph,
+       negative_sample
 
 include("generate.jl")
 export rand_graph
diff --git a/src/GNNGraphs/transform.jl b/src/GNNGraphs/transform.jl
index 825b22eed..ff86a0720 100644
--- a/src/GNNGraphs/transform.jl
+++ b/src/GNNGraphs/transform.jl
@@ -324,5 +324,26 @@ function getgraph(g::GNNGraph, i::AbstractVector{Int}; nmap=false)
     end
 end
 
+
+"""
+    negative_sample(g::GNNGraph; num_neg_edges=g.num_edges)
+
+Return a graph containing random negative edges (i.e. non-edges) from graph `g`.
+"""
+function negative_sample(g::GNNGraph; num_neg_edges=g.num_edges)
+    adj = adjacency_matrix(g)
+    adj_neg = 1 .- adj - I
+    neg_s, neg_t = ci2t(findall(adj_neg .> 0), 2)
+    neg_eids = randperm(length(neg_s))[1:num_neg_edges]
+    neg_s, neg_t = neg_s[neg_eids], neg_t[neg_eids]
+    return GNNGraph(neg_s, neg_t, num_nodes=g.num_nodes)
+end
+
+# """
+# Transform vector of cartesian indexes into a tuple of vectors containing integers.
+# """
+ci2t(ci::AbstractVector{<:CartesianIndex}, dims) = ntuple(i -> map(x -> x[i], ci), dims)
+
+@non_differentiable negative_sample(x...)
 @non_differentiable add_self_loops(x...)     # TODO this is wrong, since g carries feature arrays, needs rrule
 @non_differentiable remove_self_loops(x...)  # TODO this is wrong, since g carries feature arrays, needs rrule