From 4c96da7b30ed3370d51c88a07d6bd0bec3b28350 Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Mon, 25 Oct 2021 10:55:23 +0200 Subject: [PATCH 1/4] add link prediction example --- examples/link_prediction_cora.jl | 131 +++++++++++++++++++++++++++++++ src/msgpass.jl | 3 +- 2 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 examples/link_prediction_cora.jl diff --git a/examples/link_prediction_cora.jl b/examples/link_prediction_cora.jl new file mode 100644 index 000000000..e2212463a --- /dev/null +++ b/examples/link_prediction_cora.jl @@ -0,0 +1,131 @@ +# An example of link prediction using negative and positive samples. +# Ported from https://docs.dgl.ai/tutorials/blitz/4_link_predict.html#sphx-glr-tutorials-blitz-4-link-predict-py + +using Flux +using Flux: onecold, onehotbatch +using Flux.Losses: logitbinarycrossentropy +using GraphNeuralNetworks +using GraphNeuralNetworks: ones_like, zeros_like +using MLDatasets: Cora +using Statistics, Random, LinearAlgebra +using CUDA +using MLJBase: AreaUnderCurve +CUDA.allowscalar(false) + +""" +Transform vector of cartesian indexes into a tuple of vectors containing integers. +""" +ci2t(ci::AbstractVector{<:CartesianIndex}, dims) = ntuple(i -> map(x -> x[i], ci), dims) + +# arguments for the `train` function +Base.@kwdef mutable struct Args + η = 1f-3 # learning rate + epochs = 200 # number of epochs + seed = 17 # set seed > 0 for reproducibility + usecuda = false # if true use cuda (if available) + nhidden = 128 # dimension of hidden features + infotime = 10 # report every `infotime` epochs +end + +struct DotPredictor end + +function (::DotPredictor)(g, x) + z = apply_edges((xi, xj, e) -> sum(xi .* xj, dims=1), g, xi=x, xj=x) + return vec(z) +end + +function train(; kws...) + # args = Args(; kws...) + args = Args() + + args.seed > 0 && Random.seed!(args.seed) + + if args.usecuda && CUDA.functional() + device = gpu + args.seed > 0 && CUDA.seed!(args.seed) + @info "Training on GPU" + else + device = cpu + @info "Training on CPU" + end + + ### LOAD DATA + data = Cora.dataset() + g = GNNGraph(data.adjacency_list) |> device + X = data.node_features |> device + + #### SPLIT INTO NEGATIVE AND POSITIVE SAMPLES + # Split edge set for training and testing + s, t = edge_index(g) + eids = randperm(g.num_edges) + test_size = round(Int, g.num_edges * 0.1) + train_size = g.num_edges - test_size + test_pos_s, test_pos_t = s[eids[1:test_size]], t[eids[1:test_size]] + train_pos_s, train_pos_t = s[eids[test_size+1:end]], t[eids[test_size+1:end]] + + # Find all negative edges and split them for training and testing + adj = adjacency_matrix(g) + adj_neg = 1 .- adj - I + neg_s, neg_t = ci2t(findall(adj_neg .> 0), 2) + + neg_eids = randperm(length(neg_s))[1:g.num_edges] + test_neg_s, test_neg_t = neg_s[neg_eids[1:test_size]], neg_t[neg_eids[1:test_size]] + train_neg_s, train_neg_t = neg_s[neg_eids[test_size+1:end]], neg_t[neg_eids[test_size+1:end]] + # train_neg_s, train_neg_t = neg_s[neg_eids[train_size+1:end]], neg_t[neg_eids[train_size+1:end]] + + train_pos_g = GNNGraph((train_pos_s, train_pos_t), num_nodes=g.num_nodes) + train_neg_g = GNNGraph((train_neg_s, train_neg_t), num_nodes=g.num_nodes) + + test_pos_g = GNNGraph((test_pos_s, test_pos_t), num_nodes=g.num_nodes) + test_neg_g = GNNGraph((test_neg_s, test_neg_t), num_nodes=g.num_nodes) + + @show train_pos_g test_pos_g train_neg_g test_neg_g + + ### DEFINE MODEL + nin, nhidden = size(X,1), args.nhidden + + model = GNNChain(GCNConv(nin => nhidden, relu), + GCNConv(nhidden => nhidden)) |> device + + pred = DotPredictor() + + ps = Flux.params(model) + opt = ADAM(args.η) + + ### LOSS FUNCTION + + function loss(pos_g, neg_g) + h = model(train_pos_g, X) + pos_score = pred(pos_g, h) + neg_score = pred(neg_g, h) + scores = [pos_score; neg_score] + labels = [ones_like(pos_score); zeros_like(neg_score)] + return logitbinarycrossentropy(scores, labels) + end + + function accuracy(pos_g, neg_g) + h = model(train_pos_g, X) + pos_score = pred(pos_g, h) + neg_score = pred(neg_g, h) + scores = [pos_score; neg_score] + labels = [ones_like(pos_score); zeros_like(neg_score)] + return logitbinarycrossentropy(scores, labels) + end + + ### LOGGING FUNCTION + function report(epoch) + train_loss = loss(train_pos_g, train_neg_g) + test_loss = loss(test_pos_g, test_neg_g) + println("Epoch: $epoch Train: $(train_loss) Test: $(test_loss)") + end + + ### TRAINING + report(0) + for epoch in 1:args.epochs + gs = Flux.gradient(() -> loss(train_pos_g, train_neg_g), ps) + Flux.Optimise.update!(opt, ps, gs) + epoch % args.infotime == 0 && report(epoch) + end +end + +# train() \ No newline at end of file diff --git a/src/msgpass.jl b/src/msgpass.jl index 1611ebe56..78a9333bf 100644 --- a/src/msgpass.jl +++ b/src/msgpass.jl @@ -77,7 +77,8 @@ end ## APPLY EDGES """ - apply_edges(f, xi, xj, e) + apply_edges(f, g, xi, xj, e) + apply_edges(f, g; [xi, xj, e]) Returns the message from node `j` to node `i` . In the message-passing scheme, the incoming messages From 53f79cd8cfdb829a705aa510c949bc080a9b003e Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Fri, 29 Oct 2021 09:24:33 +0200 Subject: [PATCH 2/4] link --- examples/Project.toml | 3 +-- examples/link_prediction_cora.jl | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/Project.toml b/examples/Project.toml index 3d950f665..b4a89fa64 100644 --- a/examples/Project.toml +++ b/examples/Project.toml @@ -1,10 +1,9 @@ [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -DiffEqFlux = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0" -DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" +MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d" diff --git a/examples/link_prediction_cora.jl b/examples/link_prediction_cora.jl index e2212463a..d6477fae0 100644 --- a/examples/link_prediction_cora.jl +++ b/examples/link_prediction_cora.jl @@ -23,7 +23,7 @@ Base.@kwdef mutable struct Args epochs = 200 # number of epochs seed = 17 # set seed > 0 for reproducibility usecuda = false # if true use cuda (if available) - nhidden = 128 # dimension of hidden features + nhidden = 64 # dimension of hidden features infotime = 10 # report every `infotime` epochs end From 81fb4d16a29e097027c23ca14638b957ecfefd0b Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Mon, 1 Nov 2021 20:05:22 +0100 Subject: [PATCH 3/4] implement negative sampling --- examples/Project.toml | 3 +- ...tion_cora.jl => link_prediction_pubmed.jl} | 79 ++++++++----------- 2 files changed, 37 insertions(+), 45 deletions(-) rename examples/{link_prediction_cora.jl => link_prediction_pubmed.jl} (54%) diff --git a/examples/Project.toml b/examples/Project.toml index b4a89fa64..3d950f665 100644 --- a/examples/Project.toml +++ b/examples/Project.toml @@ -1,9 +1,10 @@ [deps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +DiffEqFlux = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0" +DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" GraphNeuralNetworks = "cffab07f-9bc2-4db1-8861-388f63bf7694" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" -MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" NNlibCUDA = "a00861dc-f156-4864-bf3c-e6376f28a68d" diff --git a/examples/link_prediction_cora.jl b/examples/link_prediction_pubmed.jl similarity index 54% rename from examples/link_prediction_cora.jl rename to examples/link_prediction_pubmed.jl index d6477fae0..f5b043f52 100644 --- a/examples/link_prediction_cora.jl +++ b/examples/link_prediction_pubmed.jl @@ -2,21 +2,18 @@ # Ported from https://docs.dgl.ai/tutorials/blitz/4_link_predict.html#sphx-glr-tutorials-blitz-4-link-predict-py using Flux +# Link prediction task +# https://arxiv.org/pdf/2102.12557.pdf + using Flux: onecold, onehotbatch using Flux.Losses: logitbinarycrossentropy using GraphNeuralNetworks -using GraphNeuralNetworks: ones_like, zeros_like -using MLDatasets: Cora +using MLDatasets: PubMed, Cora using Statistics, Random, LinearAlgebra using CUDA -using MLJBase: AreaUnderCurve +# using MLJBase: AreaUnderCurve CUDA.allowscalar(false) -""" -Transform vector of cartesian indexes into a tuple of vectors containing integers. -""" -ci2t(ci::AbstractVector{<:CartesianIndex}, dims) = ntuple(i -> map(x -> x[i], ci), dims) - # arguments for the `train` function Base.@kwdef mutable struct Args η = 1f-3 # learning rate @@ -34,6 +31,8 @@ function (::DotPredictor)(g, x) return vec(z) end +using ChainRulesCore + function train(; kws...) # args = Args(; kws...) args = Args() @@ -54,67 +53,59 @@ function train(; kws...) g = GNNGraph(data.adjacency_list) |> device X = data.node_features |> device + #### SPLIT INTO NEGATIVE AND POSITIVE SAMPLES - # Split edge set for training and testing s, t = edge_index(g) eids = randperm(g.num_edges) test_size = round(Int, g.num_edges * 0.1) - train_size = g.num_edges - test_size + test_pos_s, test_pos_t = s[eids[1:test_size]], t[eids[1:test_size]] - train_pos_s, train_pos_t = s[eids[test_size+1:end]], t[eids[test_size+1:end]] - - # Find all negative edges and split them for training and testing - adj = adjacency_matrix(g) - adj_neg = 1 .- adj - I - neg_s, neg_t = ci2t(findall(adj_neg .> 0), 2) - - neg_eids = randperm(length(neg_s))[1:g.num_edges] - test_neg_s, test_neg_t = neg_s[neg_eids[1:test_size]], neg_t[neg_eids[1:test_size]] - train_neg_s, train_neg_t = neg_s[neg_eids[test_size+1:end]], neg_t[neg_eids[test_size+1:end]] - # train_neg_s, train_neg_t = neg_s[neg_eids[train_size+1:end]], neg_t[neg_eids[train_size+1:end]] + test_pos_g = GNNGraph(test_pos_s, test_pos_t, num_nodes=g.num_nodes) - train_pos_g = GNNGraph((train_pos_s, train_pos_t), num_nodes=g.num_nodes) - train_neg_g = GNNGraph((train_neg_s, train_neg_t), num_nodes=g.num_nodes) + train_pos_s, train_pos_t = s[eids[test_size+1:end]], t[eids[test_size+1:end]] + train_pos_g = GNNGraph(train_pos_s, train_pos_t, num_nodes=g.num_nodes) - test_pos_g = GNNGraph((test_pos_s, test_pos_t), num_nodes=g.num_nodes) - test_neg_g = GNNGraph((test_neg_s, test_neg_t), num_nodes=g.num_nodes) + test_neg_g = negative_sample(g, num_neg_edges=test_size) - @show train_pos_g test_pos_g train_neg_g test_neg_g - - ### DEFINE MODEL + ### DEFINE MODEL ######### nin, nhidden = size(X,1), args.nhidden - model = GNNChain(GCNConv(nin => nhidden, relu), - GCNConv(nhidden => nhidden)) |> device + model = WithGraph(GNNChain(GCNConv(nin => nhidden, relu), + GCNConv(nhidden => nhidden)), + train_pos_g) |> device pred = DotPredictor() ps = Flux.params(model) opt = ADAM(args.η) - ### LOSS FUNCTION + ### LOSS FUNCTION ############ - function loss(pos_g, neg_g) - h = model(train_pos_g, X) + function loss(pos_g, neg_g = nothing) + h = model(X) + if neg_g === nothing + # we sample a negative graph at each training step + neg_g = negative_sample(pos_g) + end pos_score = pred(pos_g, h) neg_score = pred(neg_g, h) scores = [pos_score; neg_score] - labels = [ones_like(pos_score); zeros_like(neg_score)] + labels = [fill!(similar(pos_score), 1); fill!(similar(neg_score), 0)] return logitbinarycrossentropy(scores, labels) end - function accuracy(pos_g, neg_g) - h = model(train_pos_g, X) - pos_score = pred(pos_g, h) - neg_score = pred(neg_g, h) - scores = [pos_score; neg_score] - labels = [ones_like(pos_score); zeros_like(neg_score)] - return logitbinarycrossentropy(scores, labels) - end + # function accuracy(pos_g, neg_g) + # h = model(train_pos_g, X) + # pos_score = pred(pos_g, h) + # neg_score = pred(neg_g, h) + # scores = [pos_score; neg_score] + # labels = [fill!(similar(pos_score), 1); fill!(similar(neg_score), 0)] + # return logitbinarycrossentropy(scores, labels) + # end ### LOGGING FUNCTION function report(epoch) - train_loss = loss(train_pos_g, train_neg_g) + train_loss = loss(train_pos_g) test_loss = loss(test_pos_g, test_neg_g) println("Epoch: $epoch Train: $(train_loss) Test: $(test_loss)") end @@ -122,7 +113,7 @@ function train(; kws...) ### TRAINING report(0) for epoch in 1:args.epochs - gs = Flux.gradient(() -> loss(train_pos_g, train_neg_g), ps) + gs = Flux.gradient(() -> loss(train_pos_g), ps) Flux.Optimise.update!(opt, ps, gs) epoch % args.infotime == 0 && report(epoch) end From 13468e38e7da313f226eb5eb4e602502a7cff37e Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Mon, 1 Nov 2021 20:05:45 +0100 Subject: [PATCH 4/4] implement negative sampling --- src/GNNGraphs/GNNGraphs.jl | 3 ++- src/GNNGraphs/transform.jl | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/GNNGraphs/GNNGraphs.jl b/src/GNNGraphs/GNNGraphs.jl index 51e8891c6..d30af6c18 100644 --- a/src/GNNGraphs/GNNGraphs.jl +++ b/src/GNNGraphs/GNNGraphs.jl @@ -23,7 +23,8 @@ export edge_index, adjacency_list, normalized_laplacian, scaled_laplacian, graph_indicator include("transform.jl") -export add_nodes, add_edges, add_self_loops, remove_self_loops, getgraph +export add_nodes, add_edges, add_self_loops, remove_self_loops, getgraph, + negative_sample include("generate.jl") export rand_graph diff --git a/src/GNNGraphs/transform.jl b/src/GNNGraphs/transform.jl index 825b22eed..ff86a0720 100644 --- a/src/GNNGraphs/transform.jl +++ b/src/GNNGraphs/transform.jl @@ -324,5 +324,26 @@ function getgraph(g::GNNGraph, i::AbstractVector{Int}; nmap=false) end end + +""" + negative_sample(g::GNNGraph; num_neg_edges=g.num_edges) + +Return a graph containing random negative edges (i.e. non-edges) from graph `g`. +""" +function negative_sample(g::GNNGraph; num_neg_edges=g.num_edges) + adj = adjacency_matrix(g) + adj_neg = 1 .- adj - I + neg_s, neg_t = ci2t(findall(adj_neg .> 0), 2) + neg_eids = randperm(length(neg_s))[1:num_neg_edges] + neg_s, neg_t = neg_s[neg_eids], neg_t[neg_eids] + return GNNGraph(neg_s, neg_t, num_nodes=g.num_nodes) +end + +# """ +# Transform vector of cartesian indexes into a tuple of vectors containing integers. +# """ +ci2t(ci::AbstractVector{<:CartesianIndex}, dims) = ntuple(i -> map(x -> x[i], ci), dims) + +@non_differentiable negative_sample(x...) @non_differentiable add_self_loops(x...) # TODO this is wrong, since g carries feature arrays, needs rrule @non_differentiable remove_self_loops(x...) # TODO this is wrong, since g carries feature arrays, needs rrule