Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bidirected graph support in rand_split_edge #71

Merged
merged 3 commits into from
Nov 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions examples/link_prediction_pubmed.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@ using Flux
using Flux: onecold, onehotbatch
using Flux.Losses: logitbinarycrossentropy
using GraphNeuralNetworks
using MLDatasets: PubMed, Cora
using MLDatasets: PubMed
using Statistics, Random, LinearAlgebra
using CUDA
# using MLJBase: AreaUnderCurve
CUDA.allowscalar(false)

# arguments for the `train` function
Expand All @@ -28,7 +27,7 @@ struct DotPredictor end

function (::DotPredictor)(g, x)
z = apply_edges((xi, xj, e) -> sum(xi .* xj, dims=1), g, xi=x, xj=x)
# z = apply_edges(xi_dot_xj, g, xi=x, xj=x) # Same with buit-in methods
# z = apply_edges(xi_dot_xj, g, xi=x, xj=x) # Same with built-in method
return vec(z)
end

Expand All @@ -47,21 +46,25 @@ function train(; kws...)
end

### LOAD DATA
data = Cora.dataset()
# data = PubMed.dataset()
data = PubMed.dataset()
g = GNNGraph(data.adjacency_list)

# Print some info
@info g
@show is_bidirected(g)
@show has_self_loops(g)
@show has_multi_edges(g)
@show mean(degree(g))
isbidir = is_bidirected(g)

# Move to device
g = g |> device
X = data.node_features |> device

#### SPLIT INTO NEGATIVE AND POSITIVE SAMPLES
train_pos_g, test_pos_g = rand_edge_split(g, 0.9)
#### TRAIN/TEST splits
# With bidirected graph, we make sure that an edge and its reverse
# are in the same split
train_pos_g, test_pos_g = rand_edge_split(g, 0.9, bidirected=isbidir)
test_neg_g = negative_sample(g, num_neg_edges=test_pos_g.num_edges, bidirected=isbidir)

### DEFINE MODEL #########
Expand Down
2 changes: 1 addition & 1 deletion src/GNNGraphs/GNNGraphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using SparseArrays
using Functors: @functor
using CUDA
import Graphs
using Graphs: AbstractGraph, outneighbors, inneighbors, adjacency_matrix, degree
using Graphs: AbstractGraph, outneighbors, inneighbors, adjacency_matrix, degree, has_self_loops
import Flux
using Flux: batch
import NNlib
Expand Down
42 changes: 25 additions & 17 deletions src/GNNGraphs/transform.jl
Original file line number Diff line number Diff line change
Expand Up @@ -378,33 +378,41 @@ function negative_sample(g::GNNGraph;
end

"""
rand_edge_split(g::GNNGraph, frac) -> g1, g2
rand_edge_split(g::GNNGraph, frac; bidirected=is_bidirected(g)) -> g1, g2

Randomly partition the edges in `g` to from two graphs, `g1`
and `g2`. Both will have the same number of nodes as `g`.
`g1` will contain a fraction `frac` of the original edges,
while `g2` wil contain the rest.
Useful for train/test splits in link prediction tasks.
"""
function rand_edge_split(g::GNNGraph, frac)
# TODO add bidirected version
s, t = edge_index(g)
eids = randperm(g.num_edges)
size1 = round(Int, g.num_edges * frac)

s1, t1 = s[eids[1:size1]], t[eids[1:size1]]
g1 = GNNGraph(s1, t1, num_nodes=g.num_nodes)

If `bidirected = true` makes sure that an edge and its reverse go into the same split.
This option is supported only for bidirected graphs with no self-loops
and multi-edges.

`rand_edge_split` is tipically used to create train/test splits in link prediction tasks.
"""
function rand_edge_split(g::GNNGraph, frac; bidirected=is_bidirected(g))
s, t = edge_index(g)
eids = randperm(g.num_edges)
size1 = round(Int, g.num_edges * frac)
ne = bidirected ? g.num_edges ÷ 2 : g.num_edges
eids = randperm(ne)
size1 = round(Int, ne * frac)

s1, t1 = s[eids[1:size1]], t[eids[1:size1]]
if !bidirected
s1, t1 = s[eids[1:size1]], t[eids[1:size1]]
s2, t2 = s[eids[size1+1:end]], t[eids[size1+1:end]]
else
@assert is_bidirected(g)
@assert !has_self_loops(g)
@assert !has_multi_edges(g)
mask = s .< t
s, t = s[mask], t[mask]
s1, t1 = s[eids[1:size1]], t[eids[1:size1]]
s1, t1 = [s1; t1], [t1; s1]
s2, t2 = s[eids[size1+1:end]], t[eids[size1+1:end]]
s2, t2 = [s2; t2], [t2; s2]
end
g1 = GNNGraph(s1, t1, num_nodes=g.num_nodes)

s2, t2 = s[eids[size1+1:end]], t[eids[size1+1:end]]
g2 = GNNGraph(s2, t2, num_nodes=g.num_nodes)

return g1, g2
end

Expand Down
31 changes: 31 additions & 0 deletions test/GNNGraphs/transform.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,35 @@
@test intersect(g, gneg).num_edges == 0
end
end

@testset "rand_edge_split" begin
if GRAPH_T == :coo
n, m = 100,300

g = rand_graph(n, m, bidirected=true, graph_type=GRAPH_T)
# check bidirected=is_bidirected(g) default
g1, g2 = rand_edge_split(g, 0.9)
@test is_bidirected(g1)
@test is_bidirected(g2)
@test intersect(g1, g2).num_edges == 0
@test g1.num_edges + g2.num_edges == g.num_edges
@test g2.num_edges < 50

g = rand_graph(n, m, bidirected=false, graph_type=GRAPH_T)
# check bidirected=is_bidirected(g) default
g1, g2 = rand_edge_split(g, 0.9)
@test !is_bidirected(g1)
@test !is_bidirected(g2)
@test intersect(g1, g2).num_edges == 0
@test g1.num_edges + g2.num_edges == g.num_edges
@test g2.num_edges < 50

g1, g2 = rand_edge_split(g, 0.9, bidirected=false)
@test !is_bidirected(g1)
@test !is_bidirected(g2)
@test intersect(g1, g2).num_edges == 0
@test g1.num_edges + g2.num_edges == g.num_edges
@test g2.num_edges < 50
end
end
end