-
Notifications
You must be signed in to change notification settings - Fork 47
/
gnngraph.jl
347 lines (300 loc) · 12.1 KB
/
gnngraph.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#===================================
Define GNNGraph type as a subtype of Graphs.AbstractGraph.
For the core methods to be implemented by any AbstractGraph, see
https://juliagraphs.org/Graphs.jl/latest/types/#AbstractGraph-Type
https://juliagraphs.org/Graphs.jl/latest/developing/#Developing-Alternate-Graph-Types
=============================================#
"""
GNNGraph(data; [graph_type, ndata, edata, gdata, num_nodes, graph_indicator, dir])
GNNGraph(g::GNNGraph; [ndata, edata, gdata])
A type representing a graph structure that also stores
feature arrays associated to nodes, edges, and the graph itself.
The feature arrays are stored in the fields `ndata`, `edata`, and `gdata`
as [`DataStore`](@ref) objects offering a convenient dictionary-like
and namedtuple-like interface. The features can be passed at construction
time or added later.
A `GNNGraph` can be constructed out of different `data` objects
expressing the connections inside the graph. The internal representation type
is determined by `graph_type`.
When constructed from another `GNNGraph`, the internal graph representation
is preserved and shared. The node/edge/graph features are retained
as well, unless explicitely set by the keyword arguments
`ndata`, `edata`, and `gdata`.
A `GNNGraph` can also represent multiple graphs batched togheter
(see [`MLUtils.batch`](@ref) or [`SparseArrays.blockdiag`](@ref)).
The field `g.graph_indicator` contains the graph membership
of each node.
`GNNGraph`s are always directed graphs, therefore each edge is defined
by a source node and a target node (see [`edge_index`](@ref)).
Self loops (edges connecting a node to itself) and multiple edges
(more than one edge between the same pair of nodes) are supported.
A `GNNGraph` is a Graphs.jl's `AbstractGraph`, therefore it supports most
functionality from that library.
# Arguments
- `data`: Some data representing the graph topology. Possible type are
- An adjacency matrix
- An adjacency list.
- A tuple containing the source and target vectors (COO representation)
- A Graphs.jl' graph.
- `graph_type`: A keyword argument that specifies
the underlying representation used by the GNNGraph.
Currently supported values are
- `:coo`. Graph represented as a tuple `(source, target)`, such that the `k`-th edge
connects the node `source[k]` to node `target[k]`.
Optionally, also edge weights can be given: `(source, target, weights)`.
- `:sparse`. A sparse adjacency matrix representation.
- `:dense`. A dense adjacency matrix representation.
Defaults to `:coo`, currently the most supported type.
- `dir`: The assumed edge direction when given adjacency matrix or adjacency list input data `g`.
Possible values are `:out` and `:in`. Default `:out`.
- `num_nodes`: The number of nodes. If not specified, inferred from `g`. Default `nothing`.
- `graph_indicator`: For batched graphs, a vector containing the graph assignment of each node. Default `nothing`.
- `ndata`: Node features. An array or named tuple of arrays whose last dimension has size `num_nodes`.
- `edata`: Edge features. An array or named tuple of arrays whose last dimension has size `num_edges`.
- `gdata`: Graph features. An array or named tuple of arrays whose last dimension has size `num_graphs`.
# Examples
```julia
using GNNGraphs
# Construct from adjacency list representation
data = [[2,3], [1,4,5], [1], [2,5], [2,4]]
g = GNNGraph(data)
# Number of nodes, edges, and batched graphs
g.num_nodes # 5
g.num_edges # 10
g.num_graphs # 1
# Same graph in COO representation
s = [1,1,2,2,2,3,4,4,5,5]
t = [2,3,1,4,5,3,2,5,2,4]
g = GNNGraph(s, t)
# From a Graphs' graph
g = GNNGraph(erdos_renyi(100, 20))
# Add 2 node feature arrays at creation time
g = GNNGraph(g, ndata = (x=rand(100, g.num_nodes), y=rand(g.num_nodes)))
# Add 1 edge feature array, after the graph creation
g.edata.z = rand(16, g.num_edges)
# Add node features and edge features with default names `x` and `e`
g = GNNGraph(g, ndata = rand(100, g.num_nodes), edata = rand(16, g.num_edges))
g.ndata.x # or just g.x
g.edata.e # or just g.e
# Collect edges' source and target nodes.
# Both source and target are vectors of length num_edges
source, target = edge_index(g)
```
A `GNNGraph` can be sent to the GPU, for example by using Flux.jl's `gpu` function
or MLDataDevices.jl's utilities.
```
"""
struct GNNGraph{T <: Union{COO_T, ADJMAT_T}} <: AbstractGNNGraph{T}
graph::T
num_nodes::Int
num_edges::Int
num_graphs::Int
graph_indicator::Union{Nothing, AVecI} # vector of ints or nothing
ndata::DataStore
edata::DataStore
gdata::DataStore
end
@functor GNNGraph
function GNNGraph(data::D;
num_nodes = nothing,
graph_indicator = nothing,
graph_type = :coo,
dir = :out,
ndata = nothing,
edata = nothing,
gdata = nothing) where {D <: Union{COO_T, ADJMAT_T, ADJLIST_T}}
@assert graph_type ∈ [:coo, :dense, :sparse] "Invalid graph_type $graph_type requested"
@assert dir ∈ [:in, :out]
if graph_type == :coo
graph, num_nodes, num_edges = to_coo(data; num_nodes, dir)
elseif graph_type == :dense
graph, num_nodes, num_edges = to_dense(data; num_nodes, dir)
elseif graph_type == :sparse
graph, num_nodes, num_edges = to_sparse(data; num_nodes, dir)
end
num_graphs = !isnothing(graph_indicator) ? maximum(graph_indicator) : 1
ndata = normalize_graphdata(ndata, default_name = :x, n = num_nodes)
edata = normalize_graphdata(edata, default_name = :e, n = num_edges,
duplicate_if_needed = true)
# don't force the shape of the data when there is only one graph
gdata = normalize_graphdata(gdata, default_name = :u,
n = num_graphs > 1 ? num_graphs : -1)
GNNGraph(graph,
num_nodes, num_edges, num_graphs,
graph_indicator,
ndata, edata, gdata)
end
GNNGraph(; kws...) = GNNGraph(0; kws...)
function (::Type{<:GNNGraph})(num_nodes::T; kws...) where {T <: Integer}
s, t = T[], T[]
return GNNGraph(s, t; num_nodes, kws...)
end
Base.zero(::Type{G}) where {G <: GNNGraph} = G(0)
# COO convenience constructors
function GNNGraph(s::AbstractVector, t::AbstractVector, v = nothing; kws...)
GNNGraph((s, t, v); kws...)
end
GNNGraph((s, t)::NTuple{2}; kws...) = GNNGraph((s, t, nothing); kws...)
# GNNGraph(g::AbstractGraph; kws...) = GNNGraph(adjacency_matrix(g, dir=:out); kws...)
function GNNGraph(g::AbstractGraph; edge_weight = nothing, kws...)
s = Graphs.src.(Graphs.edges(g))
t = Graphs.dst.(Graphs.edges(g))
w = edge_weight
if !Graphs.is_directed(g)
# add reverse edges since GNNGraph is directed
s, t = [s; t], [t; s]
if !isnothing(w)
@assert length(w) == Graphs.ne(g) "edge_weight must have length equal to the number of undirected edges"
w = [w; w]
end
end
num_nodes::Int = Graphs.nv(g)
GNNGraph((s, t, w); num_nodes = num_nodes, kws...)
end
function GNNGraph(g::GNNGraph; ndata = g.ndata, edata = g.edata, gdata = g.gdata,
graph_type = nothing)
ndata = normalize_graphdata(ndata, default_name = :x, n = g.num_nodes)
edata = normalize_graphdata(edata, default_name = :e, n = g.num_edges,
duplicate_if_needed = true)
gdata = normalize_graphdata(gdata, default_name = :u, n = g.num_graphs)
if !isnothing(graph_type)
if graph_type == :coo
graph, num_nodes, num_edges = to_coo(g.graph; g.num_nodes)
elseif graph_type == :dense
graph, num_nodes, num_edges = to_dense(g.graph; g.num_nodes)
elseif graph_type == :sparse
graph, num_nodes, num_edges = to_sparse(g.graph; g.num_nodes)
end
@assert num_nodes == g.num_nodes
@assert num_edges == g.num_edges
else
graph = g.graph
end
return GNNGraph(graph,
g.num_nodes, g.num_edges, g.num_graphs,
g.graph_indicator,
ndata, edata, gdata)
end
"""
copy(g::GNNGraph; deep=false)
Create a copy of `g`. If `deep` is `true`, then copy will be a deep copy (equivalent to `deepcopy(g)`),
otherwise it will be a shallow copy with the same underlying graph data.
"""
function Base.copy(g::GNNGraph; deep = false)
if deep
GNNGraph(deepcopy(g.graph),
g.num_nodes, g.num_edges, g.num_graphs,
deepcopy(g.graph_indicator),
deepcopy(g.ndata), deepcopy(g.edata), deepcopy(g.gdata))
else
GNNGraph(g.graph,
g.num_nodes, g.num_edges, g.num_graphs,
g.graph_indicator,
g.ndata, g.edata, g.gdata)
end
end
function print_feature(io::IO, feature)
if !isempty(feature)
if length(keys(feature)) == 1
k = first(keys(feature))
v = first(values(feature))
print(io, "$(k): $(dims2string(size(v)))")
else
print(io, "(")
for (i, (k, v)) in enumerate(pairs(feature))
print(io, "$k: $(dims2string(size(v)))")
if i == length(feature)
print(io, ")")
else
print(io, ", ")
end
end
end
end
end
function print_all_features(io::IO, feat1, feat2, feat3)
n1 = length(feat1)
n2 = length(feat2)
n3 = length(feat3)
if n1 == 0 && n2 == 0 && n3 == 0
print(io, "no")
elseif n1 != 0 && (n2 != 0 || n3 != 0)
print_feature(io, feat1)
print(io, ", ")
elseif n2 == 0 && n3 == 0
print_feature(io, feat1)
end
if n2 != 0 && n3 != 0
print_feature(io, feat2)
print(io, ", ")
elseif n2 != 0 && n3 == 0
print_feature(io, feat2)
end
print_feature(io, feat3)
end
function Base.show(io::IO, g::GNNGraph)
print(io, "GNNGraph($(g.num_nodes), $(g.num_edges)) with ")
print_all_features(io, g.ndata, g.edata, g.gdata)
print(io, " data")
end
function Base.show(io::IO, ::MIME"text/plain", g::GNNGraph)
if get(io, :compact, false)
print(io, "GNNGraph($(g.num_nodes), $(g.num_edges)) with ")
print_all_features(io, g.ndata, g.edata, g.gdata)
print(io, " data")
else
print(io,
"GNNGraph:\n num_nodes: $(g.num_nodes)\n num_edges: $(g.num_edges)")
g.num_graphs > 1 && print(io, "\n num_graphs: $(g.num_graphs)")
if !isempty(g.ndata)
print(io, "\n ndata:")
for k in keys(g.ndata)
print(io, "\n\t$k = $(shortsummary(g.ndata[k]))")
end
end
if !isempty(g.edata)
print(io, "\n edata:")
for k in keys(g.edata)
print(io, "\n\t$k = $(shortsummary(g.edata[k]))")
end
end
if !isempty(g.gdata)
print(io, "\n gdata:")
for k in keys(g.gdata)
print(io, "\n\t$k = $(shortsummary(g.gdata[k]))")
end
end
end
end
MLUtils.numobs(g::GNNGraph) = g.num_graphs
MLUtils.getobs(g::GNNGraph, i) = getgraph(g, i)
#########################
function Base.:(==)(g1::GNNGraph, g2::GNNGraph)
g1 === g2 && return true
for k in fieldnames(typeof(g1))
k === :graph_indicator && continue
getfield(g1, k) != getfield(g2, k) && return false
end
return true
end
function Base.hash(g::T, h::UInt) where {T <: GNNGraph}
fs = (getfield(g, k) for k in fieldnames(T) if k !== :graph_indicator)
return foldl((h, f) -> hash(f, h), fs, init = hash(T, h))
end
function Base.getproperty(g::GNNGraph, s::Symbol)
if s in fieldnames(GNNGraph)
return getfield(g, s)
end
if (s in keys(g.ndata)) + (s in keys(g.edata)) + (s in keys(g.gdata)) > 1
throw(ArgumentError("Ambiguous property name $s"))
end
if s in keys(g.ndata)
return g.ndata[s]
elseif s in keys(g.edata)
return g.edata[s]
elseif s in keys(g.gdata)
return g.gdata[s]
else
throw(ArgumentError("$(s) is not a field of GNNGraph"))
end
end